如何迭代由空格分隔的单词组成的字符串中的单词?

注意,我对C字符串函数或那种字符操作/访问不感兴趣。比起效率,我更喜欢优雅。我当前的解决方案:

#include <iostream>
#include <sstream>
#include <string>

using namespace std;

int main() {
    string s = "Somewhere down the road";
    istringstream iss(s);

    do {
        string subs;
        iss >> subs;
        cout << "Substring: " << subs << endl;
    } while (iss);
}

当前回答

#include <iostream>
#include <string>
#include <deque>

std::deque<std::string> split(
    const std::string& line, 
    std::string::value_type delimiter,
    bool skipEmpty = false
) {
    std::deque<std::string> parts{};

    if (!skipEmpty && !line.empty() && delimiter == line.at(0)) {
        parts.push_back({});
    }

    for (const std::string::value_type& c : line) {
        if (
            (
                c == delimiter 
                &&
                (skipEmpty ? (!parts.empty() && !parts.back().empty()) : true)
            )
            ||
            (c != delimiter && parts.empty())
        ) {
            parts.push_back({});
        }

        if (c != delimiter) {
            parts.back().push_back(c);
        }
    }

    if (skipEmpty && !parts.empty() && parts.back().empty()) {
        parts.pop_back();
    }

    return parts;
}

void test(const std::string& line) {
    std::cout << line << std::endl;

    std::cout << "skipEmpty=0 |";
    for (const std::string& part : split(line, ':')) {
        std::cout << part << '|';
    }
    std::cout << std::endl;

    std::cout << "skipEmpty=1 |";
    for (const std::string& part : split(line, ':', true)) {
        std::cout << part << '|';
    }
    std::cout << std::endl;

    std::cout << std::endl;
}

int main() {
    test("foo:bar:::baz");
    test("");
    test("foo");
    test(":");
    test("::");
    test(":foo");
    test("::foo");
    test(":foo:");
    test(":foo::");

    return 0;
}

输出:

foo:bar:::baz
skipEmpty=0 |foo|bar|||baz|
skipEmpty=1 |foo|bar|baz|


skipEmpty=0 |
skipEmpty=1 |

foo
skipEmpty=0 |foo|
skipEmpty=1 |foo|

:
skipEmpty=0 |||
skipEmpty=1 |

::
skipEmpty=0 ||||
skipEmpty=1 |

:foo
skipEmpty=0 ||foo|
skipEmpty=1 |foo|

::foo
skipEmpty=0 |||foo|
skipEmpty=1 |foo|

:foo:
skipEmpty=0 ||foo||
skipEmpty=1 |foo|

:foo::
skipEmpty=0 ||foo|||
skipEmpty=1 |foo|

其他回答

#include <iostream>
#include <regex>

using namespace std;

int main() {
   string s = "foo bar  baz";
   regex e("\\s+");
   regex_token_iterator<string::iterator> i(s.begin(), s.end(), e, -1);
   regex_token_iterator<string::iterator> end;
   while (i != end)
      cout << " [" << *i++ << "]";
}

IMO,这是最接近python的re.split()。有关regex_token_iterator的更多信息,请参阅cplusplus.com。-1(regex_token_iterator tor中的第四个参数)是序列中不匹配的部分,使用匹配作为分隔符。

这里有一个拆分函数:

是通用的使用标准C++(无增强)接受多个分隔符忽略空标记(可以轻松更改)模板<typename T>矢量<T>拆分(常量T&str,常量T&分隔符){向量<T>v;typename T::size_type start=0;自动位置=str.find_first_of(分隔符,开始);而(pos!=T::npos){if(pos!=开始)//忽略空标记v.template_back(str,start,pos-start);开始=位置+1;pos=str.find_first_of(分隔符,开始);}if(start<str.length())//忽略尾随分隔符v.template_back(str,start,str.length()-start);//添加字符串的剩余部分返回v;}

示例用法:

    vector<string> v = split<string>("Hello, there; World", ";,");
    vector<wstring> v = split<wstring>(L"Hello, there; World", L";,");

获取Boost!:-)

#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string.hpp>
#include <iostream>
#include <vector>

using namespace std;
using namespace boost;

int main(int argc, char**argv) {
    typedef vector < string > list_type;

    list_type list;
    string line;

    line = "Somewhere down the road";
    split(list, line, is_any_of(" "));

    for(int i = 0; i < list.size(); i++)
    {
        cout << list[i] << endl;
    }

    return 0;
}

此示例给出了输出-

Somewhere
down
the
road

这类似于堆栈溢出问题:如何在C++中标记字符串?。需要Boost外部库

#include <iostream>
#include <string>
#include <boost/tokenizer.hpp>

using namespace std;
using namespace boost;

int main(int argc, char** argv)
{
    string text = "token  test\tstring";

    char_separator<char> sep(" \t");
    tokenizer<char_separator<char>> tokens(text, sep);
    for (const string& t : tokens)
    {
        cout << t << "." << endl;
    }
}

如果您需要通过非空格符号解析字符串,则字符串流可能很方便:

string s = "Name:JAck; Spouse:Susan; ...";
string dummy, name, spouse;

istringstream iss(s);
getline(iss, dummy, ':');
getline(iss, name, ';');
getline(iss, dummy, ':');
getline(iss, spouse, ';')