如何迭代由空格分隔的单词组成的字符串中的单词?

注意,我对C字符串函数或那种字符操作/访问不感兴趣。比起效率,我更喜欢优雅。我当前的解决方案:

#include <iostream>
#include <sstream>
#include <string>

using namespace std;

int main() {
    string s = "Somewhere down the road";
    istringstream iss(s);

    do {
        string subs;
        iss >> subs;
        cout << "Substring: " << subs << endl;
    } while (iss);
}

当前回答

没有任何内存分配的C++17版本(std::函数除外)

void iter_words(const std::string_view& input, const std::function<void(std::string_view)>& process_word) {

    auto itr = input.begin();

    auto consume_whitespace = [&]() {
        for(; itr != input.end(); ++itr) {
            if(!isspace(*itr))
                return;
        }
    };

    auto consume_letters = [&]() {
        for(; itr != input.end(); ++itr) {
            if(isspace(*itr))
                return;
        }
    };

    while(true) {
        consume_whitespace();
        if(itr == input.end())
            return;
        auto word_start = itr - input.begin();
        consume_letters();
        auto word_end = itr - input.begin();
        process_word(input.substr(word_start, word_end - word_start));
    }
}

int main() {
    iter_words("foo bar", [](std::string_view sv) {
        std::cout << "Got word: " <<  sv << '\n';
    });
    return 0;
}

其他回答

这是另一个解决方案。它结构紧凑,效率合理:

std::vector<std::string> split(const std::string &text, char sep) {
  std::vector<std::string> tokens;
  std::size_t start = 0, end = 0;
  while ((end = text.find(sep, start)) != std::string::npos) {
    tokens.push_back(text.substr(start, end - start));
    start = end + 1;
  }
  tokens.push_back(text.substr(start));
  return tokens;
}

它可以很容易地模板化以处理字符串分隔符、宽字符串等。

注意,拆分“”会产生一个空字符串,拆分“,”(即sep)会产生两个空字符串。

它还可以轻松扩展以跳过空令牌:

std::vector<std::string> split(const std::string &text, char sep) {
    std::vector<std::string> tokens;
    std::size_t start = 0, end = 0;
    while ((end = text.find(sep, start)) != std::string::npos) {
        if (end != start) {
          tokens.push_back(text.substr(start, end - start));
        }
        start = end + 1;
    }
    if (end != start) {
       tokens.push_back(text.substr(start));
    }
    return tokens;
}

如果需要在多个分隔符处拆分字符串,同时跳过空标记,则可以使用此版本:

std::vector<std::string> split(const std::string& text, const std::string& delims)
{
    std::vector<std::string> tokens;
    std::size_t start = text.find_first_not_of(delims), end = 0;

    while((end = text.find_first_of(delims, start)) != std::string::npos)
    {
        tokens.push_back(text.substr(start, end - start));
        start = text.find_first_not_of(delims, end);
    }
    if(start != std::string::npos)
        tokens.push_back(text.substr(start));

    return tokens;
}

我对string和u32string~的一般实现,使用boost::algorithm::split签名。

template<typename CharT, typename UnaryPredicate>
void split(std::vector<std::basic_string<CharT>>& split_result,
           const std::basic_string<CharT>& s,
           UnaryPredicate predicate)
{
    using ST = std::basic_string<CharT>;
    using std::swap;
    std::vector<ST> tmp_result;
    auto iter = s.cbegin(),
         end_iter = s.cend();
    while (true)
    {
        /**
         * edge case: empty str -> push an empty str and exit.
         */
        auto find_iter = find_if(iter, end_iter, predicate);
        tmp_result.emplace_back(iter, find_iter);
        if (find_iter == end_iter) { break; }
        iter = ++find_iter; 
    }
    swap(tmp_result, split_result);
}


template<typename CharT>
void split(std::vector<std::basic_string<CharT>>& split_result,
           const std::basic_string<CharT>& s,
           const std::basic_string<CharT>& char_candidate)
{
    std::unordered_set<CharT> candidate_set(char_candidate.cbegin(),
                                            char_candidate.cend());
    auto predicate = [&candidate_set](const CharT& c) {
        return candidate_set.count(c) > 0U;
    };
    return split(split_result, s, predicate);
}

template<typename CharT>
void split(std::vector<std::basic_string<CharT>>& split_result,
           const std::basic_string<CharT>& s,
           const CharT* literals)
{
    return split(split_result, s, std::basic_string<CharT>(literals));
}

值得一提的是,这里有另一种从输入字符串中提取令牌的方法,仅依赖于标准库设施。这是STL设计背后力量和优雅的一个例子。

#include <iostream>
#include <string>
#include <sstream>
#include <algorithm>
#include <iterator>

int main() {
    using namespace std;
    string sentence = "And I feel fine...";
    istringstream iss(sentence);
    copy(istream_iterator<string>(iss),
         istream_iterator<string>(),
         ostream_iterator<string>(cout, "\n"));
}

可以使用相同的通用复制算法将提取的令牌插入到容器中,而不是将其复制到输出流中。

vector<string> tokens;
copy(istream_iterator<string>(iss),
     istream_iterator<string>(),
     back_inserter(tokens));

…或直接创建矢量:

vector<string> tokens{istream_iterator<string>{iss},
                      istream_iterator<string>{}};

使用std::string_view和Eric Niebler的range-v3库:

https://wandbox.org/permlink/kW5lwRCL1pxjp2pW

#include <iostream>
#include <string>
#include <string_view>
#include "range/v3/view.hpp"
#include "range/v3/algorithm.hpp"

int main() {
    std::string s = "Somewhere down the range v3 library";
    ranges::for_each(s  
        |   ranges::view::split(' ')
        |   ranges::view::transform([](auto &&sub) {
                return std::string_view(&*sub.begin(), ranges::distance(sub));
            }),
        [](auto s) {std::cout << "Substring: " << s << "\n";}
    );
}

通过使用循环的范围而不是范围::for_each算法:

#include <iostream>
#include <string>
#include <string_view>
#include "range/v3/view.hpp"

int main()
{
    std::string str = "Somewhere down the range v3 library";
    for (auto s : str | ranges::view::split(' ')
                      | ranges::view::transform([](auto&& sub) { return std::string_view(&*sub.begin(), ranges::distance(sub)); }
                      ))
    {
        std::cout << "Substring: " << s << "\n";
    }
}

我相信还没有人发布这个解决方案。与其直接使用分隔符,它基本上与boost::split()相同,即它允许您传递一个谓词,如果字符是分隔符,则返回true,否则返回false。我认为这给了程序员更多的控制,最棒的是你不需要提升。

template <class Container, class String, class Predicate>
void split(Container& output, const String& input,
           const Predicate& pred, bool trimEmpty = false) {
    auto it = begin(input);
    auto itLast = it;
    while (it = find_if(it, end(input), pred), it != end(input)) {
        if (not (trimEmpty and it == itLast)) {
            output.emplace_back(itLast, it);
        }
        ++it;
        itLast = it;
    }
}

然后可以这样使用:

struct Delim {
    bool operator()(char c) {
        return not isalpha(c);
    }
};    

int main() {
    string s("#include<iostream>\n"
             "int main() { std::cout << \"Hello world!\" << std::endl; }");

    vector<string> v;

    split(v, s, Delim(), true);
    /* Which is also the same as */
    split(v, s, [](char c) { return not isalpha(c); }, true);

    for (const auto& i : v) {
        cout << i << endl;
    }
}