如何迭代由空格分隔的单词组成的字符串中的单词?

注意,我对C字符串函数或那种字符操作/访问不感兴趣。比起效率,我更喜欢优雅。我当前的解决方案:

#include <iostream>
#include <sstream>
#include <string>

using namespace std;

int main() {
    string s = "Somewhere down the road";
    istringstream iss(s);

    do {
        string subs;
        iss >> subs;
        cout << "Substring: " << subs << endl;
    } while (iss);
}

当前回答

这个呢

#include <string>
#include <vector>

using namespace std;

vector<string> split(string str, const char delim) {
    vector<string> v;
    string tmp;

    for(string::const_iterator i; i = str.begin(); i <= str.end(); ++i) {
        if(*i != delim && i != str.end()) {
            tmp += *i; 
        } else {
            v.push_back(tmp);
            tmp = ""; 
        }   
    }   

    return v;
}

其他回答

谢谢@Jairo Abdiel Toribio Cisneros。它对我有效,但您的函数返回一些空元素。因此,对于没有空的返回,我编辑了以下内容:

std::vector<std::string> split(std::string str, const char* delim) {
    std::vector<std::string> v;
    std::string tmp;

    for(std::string::const_iterator i = str.begin(); i <= str.end(); ++i) {
        if(*i != *delim && i != str.end()) {
            tmp += *i;
        } else {
            if (tmp.length() > 0) {
                v.push_back(tmp);
            }
            tmp = "";
        }
    }

    return v;
}

使用:

std::string s = "one:two::three";
std::string delim = ":";
std::vector<std::string> vv = split(s, delim.c_str());

LazyString拆分器:

#include <string>
#include <algorithm>
#include <unordered_set>

using namespace std;

class LazyStringSplitter
{
    string::const_iterator start, finish;
    unordered_set<char> chop;

public:

    // Empty Constructor
    explicit LazyStringSplitter()
    {}

    explicit LazyStringSplitter (const string cstr, const string delims)
        : start(cstr.begin())
        , finish(cstr.end())
        , chop(delims.begin(), delims.end())
    {}

    void operator () (const string cstr, const string delims)
    {
        chop.insert(delims.begin(), delims.end());
        start = cstr.begin();
        finish = cstr.end();
    }

    bool empty() const { return (start >= finish); }

    string next()
    {
        // return empty string
        // if ran out of characters
        if (empty())
            return string("");

        auto runner = find_if(start, finish, [&](char c) {
            return chop.count(c) == 1;
        });

        // construct next string
        string ret(start, runner);
        start = runner + 1;

        // Never return empty string
        // + tail recursion makes this method efficient
        return !ret.empty() ? ret : next();
    }
};

我将此方法称为LazyStringSplitter是因为一个原因——它不会一次性拆分字符串。本质上,它的行为类似于python生成器它公开了一个名为next的方法,该方法返回从原始字符串拆分的下一个字符串我使用了c++11STL中的无序集,因此查找分隔符的速度要快得多下面是它的工作原理

测试程序

#include <iostream>
using namespace std;

int main()
{
    LazyStringSplitter splitter;

    // split at the characters ' ', '!', '.', ','
    splitter("This, is a string. And here is another string! Let's test and see how well this does.", " !.,");

    while (!splitter.empty())
        cout << splitter.next() << endl;
    return 0;
}

输出,输出

This
is
a
string
And
here
is
another
string
Let's
test
and
see
how
well
this
does

改进这一点的下一个计划是实施开始和结束方法,以便可以执行以下操作:

vector<string> split_string(splitter.begin(), splitter.end());

在处理空格作为分隔符时,使用std::istream_iterator<T>的明显答案已经给出,并得到了很多支持。当然,元素可能不是用空格分隔,而是用一些分隔符分隔。我没有找到任何答案,只是重新定义了空格的含义,将其称为分隔符,然后使用常规方法。

要改变流对空白的看法,只需使用(std::istream::imbue())将流的std::locale更改为std::ctype<char>方面,并定义空白的含义(也可以对std:::ctype<wchar_t>进行更改,但实际上略有不同,因为std::actype<char>是表驱动的,而std::type<wchar_t>是由虚拟函数驱动的)。

#include <iostream>
#include <algorithm>
#include <iterator>
#include <sstream>
#include <locale>

struct whitespace_mask {
    std::ctype_base::mask mask_table[std::ctype<char>::table_size];
    whitespace_mask(std::string const& spaces) {
        std::ctype_base::mask* table = this->mask_table;
        std::ctype_base::mask const* tab
            = std::use_facet<std::ctype<char>>(std::locale()).table();
        for (std::size_t i(0); i != std::ctype<char>::table_size; ++i) {
            table[i] = tab[i] & ~std::ctype_base::space;
        }
        std::for_each(spaces.begin(), spaces.end(), [=](unsigned char c) {
            table[c] |= std::ctype_base::space;
        });
    }
};
class whitespace_facet
    : private whitespace_mask
    , public std::ctype<char> {
public:
    whitespace_facet(std::string const& spaces)
        : whitespace_mask(spaces)
        , std::ctype<char>(this->mask_table) {
    }
};

struct whitespace {
    std::string spaces;
    whitespace(std::string const& spaces): spaces(spaces) {}
};
std::istream& operator>>(std::istream& in, whitespace const& ws) {
    std::locale loc(in.getloc(), new whitespace_facet(ws.spaces));
    in.imbue(loc);
    return in;
}
// everything above would probably go into a utility library...

int main() {
    std::istringstream in("a, b, c, d, e");
    std::copy(std::istream_iterator<std::string>(in >> whitespace(", ")),
              std::istream_iterator<std::string>(),
              std::ostream_iterator<std::string>(std::cout, "\n"));

    std::istringstream pipes("a b c|  d |e     e");
    std::copy(std::istream_iterator<std::string>(pipes >> whitespace("|")),
              std::istream_iterator<std::string>(),
              std::ostream_iterator<std::string>(std::cout, "\n"));   
}

大部分代码用于打包提供软分隔符的通用工具:合并一行中的多个分隔符。无法生成空序列。当流中需要不同的分隔符时,您可能会使用共享流缓冲区使用不同的设置流:

void f(std::istream& in) {
    std::istream pipes(in.rdbuf());
    pipes >> whitespace("|");
    std::istream comma(in.rdbuf());
    comma >> whitespace(",");

    std::string s0, s1;
    if (pipes >> s0 >> std::ws   // read up to first pipe and ignore sequence of pipes
        && comma >> s1 >> std::ws) { // read up to first comma and ignore commas
        // ...
    }
}

下面的代码使用strtok()将字符串拆分为标记,并将标记存储在向量中。

#include <iostream>
#include <algorithm>
#include <vector>
#include <string>

using namespace std;


char one_line_string[] = "hello hi how are you nice weather we are having ok then bye";
char seps[]   = " ,\t\n";
char *token;



int main()
{
   vector<string> vec_String_Lines;
   token = strtok( one_line_string, seps );

   cout << "Extracting and storing data in a vector..\n\n\n";

   while( token != NULL )
   {
      vec_String_Lines.push_back(token);
      token = strtok( NULL, seps );
   }
     cout << "Displaying end result in vector line storage..\n\n";

    for ( int i = 0; i < vec_String_Lines.size(); ++i)
    cout << vec_String_Lines[i] << "\n";
    cout << "\n\n\n";


return 0;
}
#include <iostream>
#include <string>
#include <deque>

std::deque<std::string> split(
    const std::string& line, 
    std::string::value_type delimiter,
    bool skipEmpty = false
) {
    std::deque<std::string> parts{};

    if (!skipEmpty && !line.empty() && delimiter == line.at(0)) {
        parts.push_back({});
    }

    for (const std::string::value_type& c : line) {
        if (
            (
                c == delimiter 
                &&
                (skipEmpty ? (!parts.empty() && !parts.back().empty()) : true)
            )
            ||
            (c != delimiter && parts.empty())
        ) {
            parts.push_back({});
        }

        if (c != delimiter) {
            parts.back().push_back(c);
        }
    }

    if (skipEmpty && !parts.empty() && parts.back().empty()) {
        parts.pop_back();
    }

    return parts;
}

void test(const std::string& line) {
    std::cout << line << std::endl;

    std::cout << "skipEmpty=0 |";
    for (const std::string& part : split(line, ':')) {
        std::cout << part << '|';
    }
    std::cout << std::endl;

    std::cout << "skipEmpty=1 |";
    for (const std::string& part : split(line, ':', true)) {
        std::cout << part << '|';
    }
    std::cout << std::endl;

    std::cout << std::endl;
}

int main() {
    test("foo:bar:::baz");
    test("");
    test("foo");
    test(":");
    test("::");
    test(":foo");
    test("::foo");
    test(":foo:");
    test(":foo::");

    return 0;
}

输出:

foo:bar:::baz
skipEmpty=0 |foo|bar|||baz|
skipEmpty=1 |foo|bar|baz|


skipEmpty=0 |
skipEmpty=1 |

foo
skipEmpty=0 |foo|
skipEmpty=1 |foo|

:
skipEmpty=0 |||
skipEmpty=1 |

::
skipEmpty=0 ||||
skipEmpty=1 |

:foo
skipEmpty=0 ||foo|
skipEmpty=1 |foo|

::foo
skipEmpty=0 |||foo|
skipEmpty=1 |foo|

:foo:
skipEmpty=0 ||foo||
skipEmpty=1 |foo|

:foo::
skipEmpty=0 ||foo|||
skipEmpty=1 |foo|