如何迭代由空格分隔的单词组成的字符串中的单词?

注意,我对C字符串函数或那种字符操作/访问不感兴趣。比起效率,我更喜欢优雅。我当前的解决方案:

#include <iostream>
#include <sstream>
#include <string>

using namespace std;

int main() {
    string s = "Somewhere down the road";
    istringstream iss(s);

    do {
        string subs;
        iss >> subs;
        cout << "Substring: " << subs << endl;
    } while (iss);
}

当前回答

有一个名为strtok的函数。

#include<string>
using namespace std;

vector<string> split(char* str,const char* delim)
{
    char* saveptr;
    char* token = strtok_r(str,delim,&saveptr);

    vector<string> result;

    while(token != NULL)
    {
        result.push_back(token);
        token = strtok_r(NULL,delim,&saveptr);
    }
    return result;
}

其他回答

使用vector作为基类的快速版本,可完全访问其所有运算符:

    // Split string into parts.
    class Split : public std::vector<std::string>
    {
        public:
            Split(const std::string& str, char* delimList)
            {
               size_t lastPos = 0;
               size_t pos = str.find_first_of(delimList);

               while (pos != std::string::npos)
               {
                    if (pos != lastPos)
                        push_back(str.substr(lastPos, pos-lastPos));
                    lastPos = pos + 1;
                    pos = str.find_first_of(delimList, lastPos);
               }
               if (lastPos < str.length())
                   push_back(str.substr(lastPos, pos-lastPos));
            }
    };

用于填充STL集的示例:

std::set<std::string> words;
Split split("Hello,World", ",");
words.insert(split.begin(), split.end());

如果您需要通过非空格符号解析字符串,则字符串流可能很方便:

string s = "Name:JAck; Spouse:Susan; ...";
string dummy, name, spouse;

istringstream iss(s);
getline(iss, dummy, ':');
getline(iss, name, ';');
getline(iss, dummy, ':');
getline(iss, spouse, ';')

LazyString拆分器:

#include <string>
#include <algorithm>
#include <unordered_set>

using namespace std;

class LazyStringSplitter
{
    string::const_iterator start, finish;
    unordered_set<char> chop;

public:

    // Empty Constructor
    explicit LazyStringSplitter()
    {}

    explicit LazyStringSplitter (const string cstr, const string delims)
        : start(cstr.begin())
        , finish(cstr.end())
        , chop(delims.begin(), delims.end())
    {}

    void operator () (const string cstr, const string delims)
    {
        chop.insert(delims.begin(), delims.end());
        start = cstr.begin();
        finish = cstr.end();
    }

    bool empty() const { return (start >= finish); }

    string next()
    {
        // return empty string
        // if ran out of characters
        if (empty())
            return string("");

        auto runner = find_if(start, finish, [&](char c) {
            return chop.count(c) == 1;
        });

        // construct next string
        string ret(start, runner);
        start = runner + 1;

        // Never return empty string
        // + tail recursion makes this method efficient
        return !ret.empty() ? ret : next();
    }
};

我将此方法称为LazyStringSplitter是因为一个原因——它不会一次性拆分字符串。本质上,它的行为类似于python生成器它公开了一个名为next的方法,该方法返回从原始字符串拆分的下一个字符串我使用了c++11STL中的无序集,因此查找分隔符的速度要快得多下面是它的工作原理

测试程序

#include <iostream>
using namespace std;

int main()
{
    LazyStringSplitter splitter;

    // split at the characters ' ', '!', '.', ','
    splitter("This, is a string. And here is another string! Let's test and see how well this does.", " !.,");

    while (!splitter.empty())
        cout << splitter.next() << endl;
    return 0;
}

输出,输出

This
is
a
string
And
here
is
another
string
Let's
test
and
see
how
well
this
does

改进这一点的下一个计划是实施开始和结束方法,以便可以执行以下操作:

vector<string> split_string(splitter.begin(), splitter.end());

这是我写的一个函数,帮助我做了很多事情。它在为WebSocket做协议时帮助了我。

using namespace std;
#include <iostream>
#include <vector>
#include <sstream>
#include <string>

vector<string> split ( string input , string split_id ) {
  vector<string> result;
  int i = 0;
  bool add;
  string temp;
  stringstream ss;
  size_t found;
  string real;
  int r = 0;
    while ( i != input.length() ) {
        add = false;
        ss << input.at(i);
        temp = ss.str();
        found = temp.find(split_id);
        if ( found != string::npos ) {
            add = true;
            real.append ( temp , 0 , found );
        } else if ( r > 0 &&  ( i+1 ) == input.length() ) {
            add = true;
            real.append ( temp , 0 , found );
        }
        if ( add ) {
            result.push_back(real);
            ss.str(string());
            ss.clear();
            temp.clear();
            real.clear();
            r = 0;
        }
        i++;
        r++;
    }
  return result;
}

int main() {
    string s = "S,o,m,e,w,h,e,r,e, down the road \n In a really big C++ house.  \n  Lives a little old lady.   \n   That no one ever knew.    \n    She comes outside.     \n     In the very hot sun.      \n\n\n\n\n\n\n\n   And throws C++ at us.    \n    The End.  FIN.";
    vector < string > Token;
    Token = split ( s , "," );
    for ( int i = 0 ; i < Token.size(); i++)    cout << Token.at(i) << endl;
    cout << endl << Token.size();
    int a;
    cin >> a;
    return a;
}

这是我的版本

#include <vector>

inline std::vector<std::string> Split(const std::string &str, const std::string &delim = " ")
{
    std::vector<std::string> tokens;
    if (str.size() > 0)
    {
        if (delim.size() > 0)
        {
            std::string::size_type currPos = 0, prevPos = 0;
            while ((currPos = str.find(delim, prevPos)) != std::string::npos)
            {
                std::string item = str.substr(prevPos, currPos - prevPos);
                if (item.size() > 0)
                {
                    tokens.push_back(item);
                }
                prevPos = currPos + 1;
            }
            tokens.push_back(str.substr(prevPos));
        }
        else
        {
            tokens.push_back(str);
        }
    }
    return tokens;
}

它适用于多字符分隔符。它防止空令牌进入结果。它使用单个标头。当您不提供分隔符时,它将字符串作为一个标记返回。如果字符串为空,它还会返回一个空结果。不幸的是,它的效率很低,因为存在巨大的std::vector副本,除非您使用C++11进行编译,否则应该使用移动示意图。在C++11中,这段代码应该很快。