如何迭代由空格分隔的单词组成的字符串中的单词?

注意,我对C字符串函数或那种字符操作/访问不感兴趣。比起效率,我更喜欢优雅。我当前的解决方案:

#include <iostream>
#include <sstream>
#include <string>

using namespace std;

int main() {
    string s = "Somewhere down the road";
    istringstream iss(s);

    do {
        string subs;
        iss >> subs;
        cout << "Substring: " << subs << endl;
    } while (iss);
}

当前回答

使用vector作为基类的快速版本,可完全访问其所有运算符:

    // Split string into parts.
    class Split : public std::vector<std::string>
    {
        public:
            Split(const std::string& str, char* delimList)
            {
               size_t lastPos = 0;
               size_t pos = str.find_first_of(delimList);

               while (pos != std::string::npos)
               {
                    if (pos != lastPos)
                        push_back(str.substr(lastPos, pos-lastPos));
                    lastPos = pos + 1;
                    pos = str.find_first_of(delimList, lastPos);
               }
               if (lastPos < str.length())
                   push_back(str.substr(lastPos, pos-lastPos));
            }
    };

用于填充STL集的示例:

std::set<std::string> words;
Split split("Hello,World", ",");
words.insert(split.begin(), split.end());

其他回答

我的实施可以是另一种解决方案:

std::vector<std::wstring> SplitString(const std::wstring & String, const std::wstring & Seperator)
{
    std::vector<std::wstring> Lines;
    size_t stSearchPos = 0;
    size_t stFoundPos;
    while (stSearchPos < String.size() - 1)
    {
        stFoundPos = String.find(Seperator, stSearchPos);
        stFoundPos = (stFoundPos == std::string::npos) ? String.size() : stFoundPos;
        Lines.push_back(String.substr(stSearchPos, stFoundPos - stSearchPos));
        stSearchPos = stFoundPos + Seperator.size();
    }
    return Lines;
}

测试代码:

std::wstring MyString(L"Part 1SEPsecond partSEPlast partSEPend");
std::vector<std::wstring> Parts = IniFile::SplitString(MyString, L"SEP");
std::wcout << L"The string: " << MyString << std::endl;
for (std::vector<std::wstring>::const_iterator it=Parts.begin(); it<Parts.end(); ++it)
{
    std::wcout << *it << L"<---" << std::endl;
}
std::wcout << std::endl;
MyString = L"this,time,a,comma separated,string";
std::wcout << L"The string: " << MyString << std::endl;
Parts = IniFile::SplitString(MyString, L",");
for (std::vector<std::wstring>::const_iterator it=Parts.begin(); it<Parts.end(); ++it)
{
    std::wcout << *it << L"<---" << std::endl;
}

测试代码的输出:

The string: Part 1SEPsecond partSEPlast partSEPend
Part 1<---
second part<---
last part<---
end<---

The string: this,time,a,comma separated,string
this<---
time<---
a<---
comma separated<---
string<---

如果您需要通过非空格符号解析字符串,则字符串流可能很方便:

string s = "Name:JAck; Spouse:Susan; ...";
string dummy, name, spouse;

istringstream iss(s);
getline(iss, dummy, ':');
getline(iss, name, ';');
getline(iss, dummy, ':');
getline(iss, spouse, ';')

我使用这个simpleton是因为我们得到了字符串类“特殊”(即非标准):

void splitString(const String &s, const String &delim, std::vector<String> &result) {
    const int l = delim.length();
    int f = 0;
    int i = s.indexOf(delim,f);
    while (i>=0) {
        String token( i-f > 0 ? s.substring(f,i-f) : "");
        result.push_back(token);
        f=i+l;
        i = s.indexOf(delim,f);
    }
    String token = s.substring(f);
    result.push_back(token);
}

另一种灵活快速的方式

template<typename Operator>
void tokenize(Operator& op, const char* input, const char* delimiters) {
  const char* s = input;
  const char* e = s;
  while (*e != 0) {
    e = s;
    while (*e != 0 && strchr(delimiters, *e) == 0) ++e;
    if (e - s > 0) {
      op(s, e - s);
    }
    s = e + 1;
  }
}

要将其与字符串向量一起使用(编辑:由于有人指出不继承STL类…hrmf;):

template<class ContainerType>
class Appender {
public:
  Appender(ContainerType& container) : container_(container) {;}
  void operator() (const char* s, unsigned length) { 
    container_.push_back(std::string(s,length));
  }
private:
  ContainerType& container_;
};

std::vector<std::string> strVector;
Appender v(strVector);
tokenize(v, "A number of words to be tokenized", " \t");

就是这样!这只是使用tokenizer的一种方式,比如如何计数单词:

class WordCounter {
public:
  WordCounter() : noOfWords(0) {}
  void operator() (const char*, unsigned) {
    ++noOfWords;
  }
  unsigned noOfWords;
};

WordCounter wc;
tokenize(wc, "A number of words to be counted", " \t"); 
ASSERT( wc.noOfWords == 7 );

受限于想象力;)

我相信还没有人发布这个解决方案。与其直接使用分隔符,它基本上与boost::split()相同,即它允许您传递一个谓词,如果字符是分隔符,则返回true,否则返回false。我认为这给了程序员更多的控制,最棒的是你不需要提升。

template <class Container, class String, class Predicate>
void split(Container& output, const String& input,
           const Predicate& pred, bool trimEmpty = false) {
    auto it = begin(input);
    auto itLast = it;
    while (it = find_if(it, end(input), pred), it != end(input)) {
        if (not (trimEmpty and it == itLast)) {
            output.emplace_back(itLast, it);
        }
        ++it;
        itLast = it;
    }
}

然后可以这样使用:

struct Delim {
    bool operator()(char c) {
        return not isalpha(c);
    }
};    

int main() {
    string s("#include<iostream>\n"
             "int main() { std::cout << \"Hello world!\" << std::endl; }");

    vector<string> v;

    split(v, s, Delim(), true);
    /* Which is also the same as */
    split(v, s, [](char c) { return not isalpha(c); }, true);

    for (const auto& i : v) {
        cout << i << endl;
    }
}