如何迭代由空格分隔的单词组成的字符串中的单词?

注意,我对C字符串函数或那种字符操作/访问不感兴趣。比起效率,我更喜欢优雅。我当前的解决方案:

#include <iostream>
#include <sstream>
#include <string>

using namespace std;

int main() {
    string s = "Somewhere down the road";
    istringstream iss(s);

    do {
        string subs;
        iss >> subs;
        cout << "Substring: " << subs << endl;
    } while (iss);
}

当前回答

使用std::string_view和Eric Niebler的range-v3库:

https://wandbox.org/permlink/kW5lwRCL1pxjp2pW

#include <iostream>
#include <string>
#include <string_view>
#include "range/v3/view.hpp"
#include "range/v3/algorithm.hpp"

int main() {
    std::string s = "Somewhere down the range v3 library";
    ranges::for_each(s  
        |   ranges::view::split(' ')
        |   ranges::view::transform([](auto &&sub) {
                return std::string_view(&*sub.begin(), ranges::distance(sub));
            }),
        [](auto s) {std::cout << "Substring: " << s << "\n";}
    );
}

通过使用循环的范围而不是范围::for_each算法:

#include <iostream>
#include <string>
#include <string_view>
#include "range/v3/view.hpp"

int main()
{
    std::string str = "Somewhere down the range v3 library";
    for (auto s : str | ranges::view::split(' ')
                      | ranges::view::transform([](auto&& sub) { return std::string_view(&*sub.begin(), ranges::distance(sub)); }
                      ))
    {
        std::cout << "Substring: " << s << "\n";
    }
}

其他回答

这里有一个只使用标准正则表达式库的简单解决方案

#include <regex>
#include <string>
#include <vector>

std::vector<string> Tokenize( const string str, const std::regex regex )
{
    using namespace std;

    std::vector<string> result;

    sregex_token_iterator it( str.begin(), str.end(), regex, -1 );
    sregex_token_iterator reg_end;

    for ( ; it != reg_end; ++it ) {
        if ( !it->str().empty() ) //token could be empty:check
            result.emplace_back( it->str() );
    }

    return result;
}

正则表达式参数允许检查多个参数(空格、逗号等)

我通常只选中空格和逗号分隔,所以我也有这个默认函数:

std::vector<string> TokenizeDefault( const string str )
{
    using namespace std;

    regex re( "[\\s,]+" );

    return Tokenize( str, re );
}

“[\\s,]+”检查空格(\\s)和逗号(,)。

注意,如果要拆分wstring而不是string,

将所有std::regex更改为std::wregex将所有sregex_token_iterator更改为wsregex_token_idterator

注意,根据编译器的不同,您可能还希望引用字符串参数。

最近我不得不将一个骆驼大小写的单词拆分成子单词。没有分隔符,只有大写字符。

#include <string>
#include <list>
#include <locale> // std::isupper

template<class String>
const std::list<String> split_camel_case_string(const String &s)
{
    std::list<String> R;
    String w;

    for (String::const_iterator i = s.begin(); i < s.end(); ++i) {  {
        if (std::isupper(*i)) {
            if (w.length()) {
                R.push_back(w);
                w.clear();
            }
        }
        w += *i;
    }

    if (w.length())
        R.push_back(w);
    return R;
}

例如,这将“AQueryTrades”拆分为“A”、“Query”和“Trades”。该函数适用于窄字符串和宽字符串。因为它尊重当前的语言环境,所以将“RaumfahrtÜberwachungsVerordnung”分为“Raumfahrt”、“Überwachungs”和“Verordnug”。

注意std::upper应该真正作为函数模板参数传递。然后,此函数的更广义的from也可以在分隔符(如“、”、“;”或“”)处拆分。

这是我写的一个函数,帮助我做了很多事情。它在为WebSocket做协议时帮助了我。

using namespace std;
#include <iostream>
#include <vector>
#include <sstream>
#include <string>

vector<string> split ( string input , string split_id ) {
  vector<string> result;
  int i = 0;
  bool add;
  string temp;
  stringstream ss;
  size_t found;
  string real;
  int r = 0;
    while ( i != input.length() ) {
        add = false;
        ss << input.at(i);
        temp = ss.str();
        found = temp.find(split_id);
        if ( found != string::npos ) {
            add = true;
            real.append ( temp , 0 , found );
        } else if ( r > 0 &&  ( i+1 ) == input.length() ) {
            add = true;
            real.append ( temp , 0 , found );
        }
        if ( add ) {
            result.push_back(real);
            ss.str(string());
            ss.clear();
            temp.clear();
            real.clear();
            r = 0;
        }
        i++;
        r++;
    }
  return result;
}

int main() {
    string s = "S,o,m,e,w,h,e,r,e, down the road \n In a really big C++ house.  \n  Lives a little old lady.   \n   That no one ever knew.    \n    She comes outside.     \n     In the very hot sun.      \n\n\n\n\n\n\n\n   And throws C++ at us.    \n    The End.  FIN.";
    vector < string > Token;
    Token = split ( s , "," );
    for ( int i = 0 ; i < Token.size(); i++)    cout << Token.at(i) << endl;
    cout << endl << Token.size();
    int a;
    cin >> a;
    return a;
}

我使用以下方法

void split(string in, vector<string>& parts, char separator) {
    string::iterator  ts, curr;
    ts = curr = in.begin();
    for(; curr <= in.end(); curr++ ) {
        if( (curr == in.end() || *curr == separator) && curr > ts )
               parts.push_back( string( ts, curr ));
        if( curr == in.end() )
               break;
        if( *curr == separator ) ts = curr + 1; 
    }
}

PlasmaHH,我忘记包含删除带有空格的标记的额外检查(curr>ts)。

我使用这个simpleton是因为我们得到了字符串类“特殊”(即非标准):

void splitString(const String &s, const String &delim, std::vector<String> &result) {
    const int l = delim.length();
    int f = 0;
    int i = s.indexOf(delim,f);
    while (i>=0) {
        String token( i-f > 0 ? s.substring(f,i-f) : "");
        result.push_back(token);
        f=i+l;
        i = s.indexOf(delim,f);
    }
    String token = s.substring(f);
    result.push_back(token);
}