如何迭代由空格分隔的单词组成的字符串中的单词?

注意,我对C字符串函数或那种字符操作/访问不感兴趣。比起效率,我更喜欢优雅。我当前的解决方案:

#include <iostream>
#include <sstream>
#include <string>

using namespace std;

int main() {
    string s = "Somewhere down the road";
    istringstream iss(s);

    do {
        string subs;
        iss >> subs;
        cout << "Substring: " << subs << endl;
    } while (iss);
}

当前回答

我喜欢将boost/regex方法用于此任务,因为它们为指定拆分条件提供了最大的灵活性。

#include <iostream>
#include <string>
#include <boost/regex.hpp>

int main() {
    std::string line("A:::line::to:split");
    const boost::regex re(":+"); // one or more colons

    // -1 means find inverse matches aka split
    boost::sregex_token_iterator tokens(line.begin(),line.end(),re,-1);
    boost::sregex_token_iterator end;

    for (; tokens != end; ++tokens)
        std::cout << *tokens << std::endl;
}

其他回答

使用vector作为基类的快速版本,可完全访问其所有运算符:

    // Split string into parts.
    class Split : public std::vector<std::string>
    {
        public:
            Split(const std::string& str, char* delimList)
            {
               size_t lastPos = 0;
               size_t pos = str.find_first_of(delimList);

               while (pos != std::string::npos)
               {
                    if (pos != lastPos)
                        push_back(str.substr(lastPos, pos-lastPos));
                    lastPos = pos + 1;
                    pos = str.find_first_of(delimList, lastPos);
               }
               if (lastPos < str.length())
                   push_back(str.substr(lastPos, pos-lastPos));
            }
    };

用于填充STL集的示例:

std::set<std::string> words;
Split split("Hello,World", ",");
words.insert(split.begin(), split.end());

我知道很晚才来参加聚会,但我正在考虑最优雅的方法,如果给你一系列分隔符而不是空格,并且只使用标准库。

以下是我的想法:

要通过分隔符序列将单词拆分为字符串向量,请执行以下操作:

template<class Container>
std::vector<std::string> split_by_delimiters(const std::string& input, const Container& delimiters)
{
    std::vector<std::string> result;

    for (auto current = begin(input) ; current != end(input) ; )
    {
        auto first = find_if(current, end(input), not_in(delimiters));
        if (first == end(input)) break;
        auto last = find_if(first, end(input), is_in(delimiters));
        result.emplace_back(first, last);
        current = last;
    }
    return result;
}

通过提供一系列有效字符,以另一种方式进行拆分:

template<class Container>
std::vector<std::string> split_by_valid_chars(const std::string& input, const Container& valid_chars)
{
    std::vector<std::string> result;

    for (auto current = begin(input) ; current != end(input) ; )
    {
        auto first = find_if(current, end(input), is_in(valid_chars));
        if (first == end(input)) break;
        auto last = find_if(first, end(input), not_in(valid_chars));
        result.emplace_back(first, last);
        current = last;
    }
    return result;
}

is_in和not_in的定义如下:

namespace detail {
    template<class Container>
    struct is_in {
        is_in(const Container& charset)
        : _charset(charset)
        {}

        bool operator()(char c) const
        {
            return find(begin(_charset), end(_charset), c) != end(_charset);
        }

        const Container& _charset;
    };

    template<class Container>
    struct not_in {
        not_in(const Container& charset)
        : _charset(charset)
        {}

        bool operator()(char c) const
        {
            return find(begin(_charset), end(_charset), c) == end(_charset);
        }

        const Container& _charset;
    };

}

template<class Container>
detail::not_in<Container> not_in(const Container& c)
{
    return detail::not_in<Container>(c);
}

template<class Container>
detail::is_in<Container> is_in(const Container& c)
{
    return detail::is_in<Container>(c);
}

这类似于堆栈溢出问题:如何在C++中标记字符串?。需要Boost外部库

#include <iostream>
#include <string>
#include <boost/tokenizer.hpp>

using namespace std;
using namespace boost;

int main(int argc, char** argv)
{
    string text = "token  test\tstring";

    char_separator<char> sep(" \t");
    tokenizer<char_separator<char>> tokens(text, sep);
    for (const string& t : tokens)
    {
        cout << t << "." << endl;
    }
}

下面的代码使用strtok()将字符串拆分为标记,并将标记存储在向量中。

#include <iostream>
#include <algorithm>
#include <vector>
#include <string>

using namespace std;


char one_line_string[] = "hello hi how are you nice weather we are having ok then bye";
char seps[]   = " ,\t\n";
char *token;



int main()
{
   vector<string> vec_String_Lines;
   token = strtok( one_line_string, seps );

   cout << "Extracting and storing data in a vector..\n\n\n";

   while( token != NULL )
   {
      vec_String_Lines.push_back(token);
      token = strtok( NULL, seps );
   }
     cout << "Displaying end result in vector line storage..\n\n";

    for ( int i = 0; i < vec_String_Lines.size(); ++i)
    cout << vec_String_Lines[i] << "\n";
    cout << "\n\n\n";


return 0;
}

我相信还没有人发布这个解决方案。与其直接使用分隔符,它基本上与boost::split()相同,即它允许您传递一个谓词,如果字符是分隔符,则返回true,否则返回false。我认为这给了程序员更多的控制,最棒的是你不需要提升。

template <class Container, class String, class Predicate>
void split(Container& output, const String& input,
           const Predicate& pred, bool trimEmpty = false) {
    auto it = begin(input);
    auto itLast = it;
    while (it = find_if(it, end(input), pred), it != end(input)) {
        if (not (trimEmpty and it == itLast)) {
            output.emplace_back(itLast, it);
        }
        ++it;
        itLast = it;
    }
}

然后可以这样使用:

struct Delim {
    bool operator()(char c) {
        return not isalpha(c);
    }
};    

int main() {
    string s("#include<iostream>\n"
             "int main() { std::cout << \"Hello world!\" << std::endl; }");

    vector<string> v;

    split(v, s, Delim(), true);
    /* Which is also the same as */
    split(v, s, [](char c) { return not isalpha(c); }, true);

    for (const auto& i : v) {
        cout << i << endl;
    }
}