Java有一个方便的分割方法:

String str = "The quick brown fox";
String[] results = str.split(" ");

在c++中有简单的方法来做到这一点吗?


当前回答

我为自己编写了一个https://stackoverflow.com/a/50247503/3976739的简化版本(可能有一点效率)。我希望这能有所帮助。

void StrTokenizer(string& source, const char* delimiter, vector<string>& Tokens)
{   
   size_t new_index = 0;
   size_t old_index = 0;

   while (new_index != std::string::npos)   
   {
      new_index = source.find(delimiter, old_index);
      Tokens.emplace_back(source.substr(old_index, new_index-old_index));

      if (new_index != std::string::npos)
          old_index = ++new_index;
   }
}

其他回答

我以前只用标准库做了一个lexer/tokenizer。代码如下:

#include <iostream>
#include <string>
#include <vector>
#include <sstream>

using namespace std;

string seps(string& s) {
    if (!s.size()) return "";
    stringstream ss;
    ss << s[0];
    for (int i = 1; i < s.size(); i++) {
        ss << '|' << s[i];
    }
    return ss.str();
}

void Tokenize(string& str, vector<string>& tokens, const string& delimiters = " ")
{
    seps(str);

    // Skip delimiters at beginning.
    string::size_type lastPos = str.find_first_not_of(delimiters, 0);
    // Find first "non-delimiter".
    string::size_type pos = str.find_first_of(delimiters, lastPos);

    while (string::npos != pos || string::npos != lastPos)
    {
        // Found a token, add it to the vector.
        tokens.push_back(str.substr(lastPos, pos - lastPos));
        // Skip delimiters.  Note the "not_of"
        lastPos = str.find_first_not_of(delimiters, pos);
        // Find next "non-delimiter"
        pos = str.find_first_of(delimiters, lastPos);
    }
}

int main(int argc, char *argv[])
{
    vector<string> t;
    string s = "Tokens for everyone!";

    Tokenize(s, t, "|");

    for (auto c : t)
        cout << c << endl;

    system("pause");

    return 0;
}

使用regex_token_iterators的解决方案:

#include <iostream>
#include <regex>
#include <string>

using namespace std;

int main()
{
    string str("The quick brown fox");

    regex reg("\\s+");

    sregex_token_iterator iter(str.begin(), str.end(), reg, -1);
    sregex_token_iterator end;

    vector<string> vec(iter, end);

    for (auto a : vec)
    {
        cout << a << endl;
    }
}

这是一个非常简单的问题:

#include <vector>
#include <string>
using namespace std;

vector<string> split(const char *str, char c = ' ')
{
    vector<string> result;

    do
    {
        const char *begin = str;

        while(*str != c && *str)
            str++;

        result.push_back(string(begin, str));
    } while (0 != *str++);

    return result;
}

MFC/ATL有一个非常好的标记器。从MSDN:

CAtlString str( "%First Second#Third" );
CAtlString resToken;
int curPos= 0;

resToken= str.Tokenize("% #",curPos);
while (resToken != "")
{
   printf("Resulting token: %s\n", resToken);
   resToken= str.Tokenize("% #",curPos);
};

Output

Resulting Token: First
Resulting Token: Second
Resulting Token: Third

请看这个例子。它可能对你有帮助。

#include <iostream>
#include <sstream>

using namespace std;

int main ()
{
    string tmps;
    istringstream is ("the dellimiter is the space");
    while (is.good ()) {
        is >> tmps;
        cout << tmps << "\n";
    }
    return 0;
}