如何迭代由空格分隔的单词组成的字符串中的单词?

注意,我对C字符串函数或那种字符操作/访问不感兴趣。比起效率,我更喜欢优雅。我当前的解决方案:

#include <iostream>
#include <sstream>
#include <string>

using namespace std;

int main() {
    string s = "Somewhere down the road";
    istringstream iss(s);

    do {
        string subs;
        iss >> subs;
        cout << "Substring: " << subs << endl;
    } while (iss);
}

当前回答

这是我写的一个函数,帮助我做了很多事情。它在为WebSocket做协议时帮助了我。

using namespace std;
#include <iostream>
#include <vector>
#include <sstream>
#include <string>

vector<string> split ( string input , string split_id ) {
  vector<string> result;
  int i = 0;
  bool add;
  string temp;
  stringstream ss;
  size_t found;
  string real;
  int r = 0;
    while ( i != input.length() ) {
        add = false;
        ss << input.at(i);
        temp = ss.str();
        found = temp.find(split_id);
        if ( found != string::npos ) {
            add = true;
            real.append ( temp , 0 , found );
        } else if ( r > 0 &&  ( i+1 ) == input.length() ) {
            add = true;
            real.append ( temp , 0 , found );
        }
        if ( add ) {
            result.push_back(real);
            ss.str(string());
            ss.clear();
            temp.clear();
            real.clear();
            r = 0;
        }
        i++;
        r++;
    }
  return result;
}

int main() {
    string s = "S,o,m,e,w,h,e,r,e, down the road \n In a really big C++ house.  \n  Lives a little old lady.   \n   That no one ever knew.    \n    She comes outside.     \n     In the very hot sun.      \n\n\n\n\n\n\n\n   And throws C++ at us.    \n    The End.  FIN.";
    vector < string > Token;
    Token = split ( s , "," );
    for ( int i = 0 ; i < Token.size(); i++)    cout << Token.at(i) << endl;
    cout << endl << Token.size();
    int a;
    cin >> a;
    return a;
}

其他回答

对于那个些需要使用字符串分隔符拆分字符串的人,也许可以尝试我的以下解决方案。

std::vector<size_t> str_pos(const std::string &search, const std::string &target)
{
    std::vector<size_t> founds;

    if(!search.empty())
    {
        size_t start_pos = 0;

        while (true)
        {
            size_t found_pos = target.find(search, start_pos);

            if(found_pos != std::string::npos)
            {
                size_t found = found_pos;

                founds.push_back(found);

                start_pos = (found_pos + 1);
            }
            else
            {
                break;
            }
        }
    }

    return founds;
}

std::string str_sub_index(size_t begin_index, size_t end_index, const std::string &target)
{
    std::string sub;

    size_t size = target.length();

    const char* copy = target.c_str();

    for(size_t i = begin_index; i <= end_index; i++)
    {
        if(i >= size)
        {
            break;
        }
        else
        {
            char c = copy[i];

            sub += c;
        }
    }

    return sub;
}

std::vector<std::string> str_split(const std::string &delimiter, const std::string &target)
{
    std::vector<std::string> splits;

    if(!delimiter.empty())
    {
        std::vector<size_t> founds = str_pos(delimiter, target);

        size_t founds_size = founds.size();

        if(founds_size > 0)
        {
            size_t search_len = delimiter.length();

            size_t begin_index = 0;

            for(int i = 0; i <= founds_size; i++)
            {
                std::string sub;

                if(i != founds_size)
                {
                    size_t pos  = founds.at(i);

                    sub = str_sub_index(begin_index, pos - 1, target);

                    begin_index = (pos + search_len);
                }
                else
                {
                    sub = str_sub_index(begin_index, (target.length() - 1), target);
                }

                splits.push_back(sub);
            }
        }
    }

    return splits;
}

这些片段由3个函数组成。坏消息是使用str_split函数,您将需要另外两个函数。是的,这是一大块代码。但好消息是,这两个附加功能可以独立工作,有时也很有用

测试main()块中的函数如下:

int main()
{
    std::string s = "Hello, world! We need to make the world a better place. Because your world is also my world, and our children's world.";

    std::vector<std::string> split = str_split("world", s);

    for(int i = 0; i < split.size(); i++)
    {
        std::cout << split[i] << std::endl;
    }
}

它将产生:

Hello, 
! We need to make the 
 a better place. Because your 
 is also my 
, and our children's 
.

我认为这不是最有效的代码,但至少它可以工作。希望有帮助。

有一种更简单的方法可以做到这一点!!

#include <vector>
#include <string>
std::vector<std::string> splitby(std::string string, char splitter) {
    int splits = 0;
    std::vector<std::string> result = {};
    std::string locresult = "";
    for (unsigned int i = 0; i < string.size(); i++) {
        if ((char)string.at(i) != splitter) {
            locresult += string.at(i);
        }
        else {
            result.push_back(locresult);
            locresult = "";
        }
    }
    if (splits == 0) {
        result.push_back(locresult);
    }
    return result;
}

void printvector(std::vector<std::string> v) {
    std::cout << '{';
    for (unsigned int i = 0; i < v.size(); i++) {
        if (i < v.size() - 1) {
            std::cout << '"' << v.at(i) << "\",";
        }
        else {
            std::cout << '"' << v.at(i) << "\"";
        }
    }
    std::cout << "}\n";
}

我已经使用strtok滚动了自己的代码,并使用boost拆分了一个字符串。我找到的最好的方法是C++字符串工具包库。它非常灵活和快速。

#include <iostream>
#include <vector>
#include <string>
#include <strtk.hpp>

const char *whitespace  = " \t\r\n\f";
const char *whitespace_and_punctuation  = " \t\r\n\f;,=";

int main()
{
    {   // normal parsing of a string into a vector of strings
        std::string s("Somewhere down the road");
        std::vector<std::string> result;
        if( strtk::parse( s, whitespace, result ) )
        {
            for(size_t i = 0; i < result.size(); ++i )
                std::cout << result[i] << std::endl;
        }
    }

    {  // parsing a string into a vector of floats with other separators
        // besides spaces

        std::string s("3.0, 3.14; 4.0");
        std::vector<float> values;
        if( strtk::parse( s, whitespace_and_punctuation, values ) )
        {
            for(size_t i = 0; i < values.size(); ++i )
                std::cout << values[i] << std::endl;
        }
    }

    {  // parsing a string into specific variables

        std::string s("angle = 45; radius = 9.9");
        std::string w1, w2;
        float v1, v2;
        if( strtk::parse( s, whitespace_and_punctuation, w1, v1, w2, v2) )
        {
            std::cout << "word " << w1 << ", value " << v1 << std::endl;
            std::cout << "word " << w2 << ", value " << v2 << std::endl;
        }
    }

    return 0;
}

该工具包比这个简单示例显示的灵活性要高得多,但它在将字符串解析为有用元素方面的实用性令人难以置信。

LazyString拆分器:

#include <string>
#include <algorithm>
#include <unordered_set>

using namespace std;

class LazyStringSplitter
{
    string::const_iterator start, finish;
    unordered_set<char> chop;

public:

    // Empty Constructor
    explicit LazyStringSplitter()
    {}

    explicit LazyStringSplitter (const string cstr, const string delims)
        : start(cstr.begin())
        , finish(cstr.end())
        , chop(delims.begin(), delims.end())
    {}

    void operator () (const string cstr, const string delims)
    {
        chop.insert(delims.begin(), delims.end());
        start = cstr.begin();
        finish = cstr.end();
    }

    bool empty() const { return (start >= finish); }

    string next()
    {
        // return empty string
        // if ran out of characters
        if (empty())
            return string("");

        auto runner = find_if(start, finish, [&](char c) {
            return chop.count(c) == 1;
        });

        // construct next string
        string ret(start, runner);
        start = runner + 1;

        // Never return empty string
        // + tail recursion makes this method efficient
        return !ret.empty() ? ret : next();
    }
};

我将此方法称为LazyStringSplitter是因为一个原因——它不会一次性拆分字符串。本质上,它的行为类似于python生成器它公开了一个名为next的方法,该方法返回从原始字符串拆分的下一个字符串我使用了c++11STL中的无序集,因此查找分隔符的速度要快得多下面是它的工作原理

测试程序

#include <iostream>
using namespace std;

int main()
{
    LazyStringSplitter splitter;

    // split at the characters ' ', '!', '.', ','
    splitter("This, is a string. And here is another string! Let's test and see how well this does.", " !.,");

    while (!splitter.empty())
        cout << splitter.next() << endl;
    return 0;
}

输出,输出

This
is
a
string
And
here
is
another
string
Let's
test
and
see
how
well
this
does

改进这一点的下一个计划是实施开始和结束方法,以便可以执行以下操作:

vector<string> split_string(splitter.begin(), splitter.end());

这是我的方法,切割和分割:

string cut (string& str, const string& del)
{
    string f = str;

    if (in.find_first_of(del) != string::npos)
    {
        f = str.substr(0,str.find_first_of(del));
        str = str.substr(str.find_first_of(del)+del.length());
    }

    return f;
}

vector<string> split (const string& in, const string& del=" ")
{
    vector<string> out();
    string t = in;

    while (t.length() > del.length())
        out.push_back(cut(t,del));

    return out;
}

顺便说一下,如果我能做些什么来优化这个。。