如何迭代由空格分隔的单词组成的字符串中的单词?

注意,我对C字符串函数或那种字符操作/访问不感兴趣。比起效率,我更喜欢优雅。我当前的解决方案:

#include <iostream>
#include <sstream>
#include <string>

using namespace std;

int main() {
    string s = "Somewhere down the road";
    istringstream iss(s);

    do {
        string subs;
        iss >> subs;
        cout << "Substring: " << subs << endl;
    } while (iss);
}

当前回答

到目前为止,我在Boost中使用了这个,但我需要一些不依赖它的东西,所以我得出了这个结论:

static void Split(std::vector<std::string>& lst, const std::string& input, const std::string& separators, bool remove_empty = true)
{
    std::ostringstream word;
    for (size_t n = 0; n < input.size(); ++n)
    {
        if (std::string::npos == separators.find(input[n]))
            word << input[n];
        else
        {
            if (!word.str().empty() || !remove_empty)
                lst.push_back(word.str());
            word.str("");
        }
    }
    if (!word.str().empty() || !remove_empty)
        lst.push_back(word.str());
}

好的一点是,在分隔符中可以传递多个字符。

其他回答

另一种灵活快速的方式

template<typename Operator>
void tokenize(Operator& op, const char* input, const char* delimiters) {
  const char* s = input;
  const char* e = s;
  while (*e != 0) {
    e = s;
    while (*e != 0 && strchr(delimiters, *e) == 0) ++e;
    if (e - s > 0) {
      op(s, e - s);
    }
    s = e + 1;
  }
}

要将其与字符串向量一起使用(编辑:由于有人指出不继承STL类…hrmf;):

template<class ContainerType>
class Appender {
public:
  Appender(ContainerType& container) : container_(container) {;}
  void operator() (const char* s, unsigned length) { 
    container_.push_back(std::string(s,length));
  }
private:
  ContainerType& container_;
};

std::vector<std::string> strVector;
Appender v(strVector);
tokenize(v, "A number of words to be tokenized", " \t");

就是这样!这只是使用tokenizer的一种方式,比如如何计数单词:

class WordCounter {
public:
  WordCounter() : noOfWords(0) {}
  void operator() (const char*, unsigned) {
    ++noOfWords;
  }
  unsigned noOfWords;
};

WordCounter wc;
tokenize(wc, "A number of words to be counted", " \t"); 
ASSERT( wc.noOfWords == 7 );

受限于想象力;)

有一种更简单的方法可以做到这一点!!

#include <vector>
#include <string>
std::vector<std::string> splitby(std::string string, char splitter) {
    int splits = 0;
    std::vector<std::string> result = {};
    std::string locresult = "";
    for (unsigned int i = 0; i < string.size(); i++) {
        if ((char)string.at(i) != splitter) {
            locresult += string.at(i);
        }
        else {
            result.push_back(locresult);
            locresult = "";
        }
    }
    if (splits == 0) {
        result.push_back(locresult);
    }
    return result;
}

void printvector(std::vector<std::string> v) {
    std::cout << '{';
    for (unsigned int i = 0; i < v.size(); i++) {
        if (i < v.size() - 1) {
            std::cout << '"' << v.at(i) << "\",";
        }
        else {
            std::cout << '"' << v.at(i) << "\"";
        }
    }
    std::cout << "}\n";
}

这是我的版本获取了Kev的来源:

#include <string>
#include <vector>
void split(vector<string> &result, string str, char delim ) {
  string tmp;
  string::iterator i;
  result.clear();

  for(i = str.begin(); i <= str.end(); ++i) {
    if((const char)*i != delim  && i != str.end()) {
      tmp += *i;
    } else {
      result.push_back(tmp);
      tmp = "";
    }
  }
}

之后,调用函数并执行以下操作:

vector<string> hosts;
split(hosts, "192.168.1.2,192.168.1.3", ',');
for( size_t i = 0; i < hosts.size(); i++){
  cout <<  "Connecting host : " << hosts.at(i) << "..." << endl;
}

我的实施可以是另一种解决方案:

std::vector<std::wstring> SplitString(const std::wstring & String, const std::wstring & Seperator)
{
    std::vector<std::wstring> Lines;
    size_t stSearchPos = 0;
    size_t stFoundPos;
    while (stSearchPos < String.size() - 1)
    {
        stFoundPos = String.find(Seperator, stSearchPos);
        stFoundPos = (stFoundPos == std::string::npos) ? String.size() : stFoundPos;
        Lines.push_back(String.substr(stSearchPos, stFoundPos - stSearchPos));
        stSearchPos = stFoundPos + Seperator.size();
    }
    return Lines;
}

测试代码:

std::wstring MyString(L"Part 1SEPsecond partSEPlast partSEPend");
std::vector<std::wstring> Parts = IniFile::SplitString(MyString, L"SEP");
std::wcout << L"The string: " << MyString << std::endl;
for (std::vector<std::wstring>::const_iterator it=Parts.begin(); it<Parts.end(); ++it)
{
    std::wcout << *it << L"<---" << std::endl;
}
std::wcout << std::endl;
MyString = L"this,time,a,comma separated,string";
std::wcout << L"The string: " << MyString << std::endl;
Parts = IniFile::SplitString(MyString, L",");
for (std::vector<std::wstring>::const_iterator it=Parts.begin(); it<Parts.end(); ++it)
{
    std::wcout << *it << L"<---" << std::endl;
}

测试代码的输出:

The string: Part 1SEPsecond partSEPlast partSEPend
Part 1<---
second part<---
last part<---
end<---

The string: this,time,a,comma separated,string
this<---
time<---
a<---
comma separated<---
string<---

这类似于堆栈溢出问题:如何在C++中标记字符串?。需要Boost外部库

#include <iostream>
#include <string>
#include <boost/tokenizer.hpp>

using namespace std;
using namespace boost;

int main(int argc, char** argv)
{
    string text = "token  test\tstring";

    char_separator<char> sep(" \t");
    tokenizer<char_separator<char>> tokens(text, sep);
    for (const string& t : tokens)
    {
        cout << t << "." << endl;
    }
}