如何迭代由空格分隔的单词组成的字符串中的单词?

注意,我对C字符串函数或那种字符操作/访问不感兴趣。比起效率,我更喜欢优雅。我当前的解决方案:

#include <iostream>
#include <sstream>
#include <string>

using namespace std;

int main() {
    string s = "Somewhere down the road";
    istringstream iss(s);

    do {
        string subs;
        iss >> subs;
        cout << "Substring: " << subs << endl;
    } while (iss);
}

当前回答

在处理空格作为分隔符时,使用std::istream_iterator<T>的明显答案已经给出,并得到了很多支持。当然,元素可能不是用空格分隔,而是用一些分隔符分隔。我没有找到任何答案,只是重新定义了空格的含义,将其称为分隔符,然后使用常规方法。

要改变流对空白的看法,只需使用(std::istream::imbue())将流的std::locale更改为std::ctype<char>方面,并定义空白的含义(也可以对std:::ctype<wchar_t>进行更改,但实际上略有不同,因为std::actype<char>是表驱动的,而std::type<wchar_t>是由虚拟函数驱动的)。

#include <iostream>
#include <algorithm>
#include <iterator>
#include <sstream>
#include <locale>

struct whitespace_mask {
    std::ctype_base::mask mask_table[std::ctype<char>::table_size];
    whitespace_mask(std::string const& spaces) {
        std::ctype_base::mask* table = this->mask_table;
        std::ctype_base::mask const* tab
            = std::use_facet<std::ctype<char>>(std::locale()).table();
        for (std::size_t i(0); i != std::ctype<char>::table_size; ++i) {
            table[i] = tab[i] & ~std::ctype_base::space;
        }
        std::for_each(spaces.begin(), spaces.end(), [=](unsigned char c) {
            table[c] |= std::ctype_base::space;
        });
    }
};
class whitespace_facet
    : private whitespace_mask
    , public std::ctype<char> {
public:
    whitespace_facet(std::string const& spaces)
        : whitespace_mask(spaces)
        , std::ctype<char>(this->mask_table) {
    }
};

struct whitespace {
    std::string spaces;
    whitespace(std::string const& spaces): spaces(spaces) {}
};
std::istream& operator>>(std::istream& in, whitespace const& ws) {
    std::locale loc(in.getloc(), new whitespace_facet(ws.spaces));
    in.imbue(loc);
    return in;
}
// everything above would probably go into a utility library...

int main() {
    std::istringstream in("a, b, c, d, e");
    std::copy(std::istream_iterator<std::string>(in >> whitespace(", ")),
              std::istream_iterator<std::string>(),
              std::ostream_iterator<std::string>(std::cout, "\n"));

    std::istringstream pipes("a b c|  d |e     e");
    std::copy(std::istream_iterator<std::string>(pipes >> whitespace("|")),
              std::istream_iterator<std::string>(),
              std::ostream_iterator<std::string>(std::cout, "\n"));   
}

大部分代码用于打包提供软分隔符的通用工具:合并一行中的多个分隔符。无法生成空序列。当流中需要不同的分隔符时,您可能会使用共享流缓冲区使用不同的设置流:

void f(std::istream& in) {
    std::istream pipes(in.rdbuf());
    pipes >> whitespace("|");
    std::istream comma(in.rdbuf());
    comma >> whitespace(",");

    std::string s0, s1;
    if (pipes >> s0 >> std::ws   // read up to first pipe and ignore sequence of pipes
        && comma >> s1 >> std::ws) { // read up to first comma and ignore commas
        // ...
    }
}

其他回答

我这样做是因为我需要一种简单的方法来分割字符串和基于c的字符串。。。希望其他人也能发现它很有用。此外,它不依赖令牌,您可以使用字段作为分隔符,这是我需要的另一个键。

我相信可以做一些改进,以进一步提高其优雅度,请尽一切努力

StringSplitter.hpp:

#include <vector>
#include <iostream>
#include <string.h>

using namespace std;

class StringSplit
{
private:
    void copy_fragment(char*, char*, char*);
    void copy_fragment(char*, char*, char);
    bool match_fragment(char*, char*, int);
    int untilnextdelim(char*, char);
    int untilnextdelim(char*, char*);
    void assimilate(char*, char);
    void assimilate(char*, char*);
    bool string_contains(char*, char*);
    long calc_string_size(char*);
    void copy_string(char*, char*);

public:
    vector<char*> split_cstr(char);
    vector<char*> split_cstr(char*);
    vector<string> split_string(char);
    vector<string> split_string(char*);
    char* String;
    bool do_string;
    bool keep_empty;
    vector<char*> Container;
    vector<string> ContainerS;

    StringSplit(char * in)
    {
        String = in;
    }

    StringSplit(string in)
    {
        size_t len = calc_string_size((char*)in.c_str());
        String = new char[len + 1];
        memset(String, 0, len + 1);
        copy_string(String, (char*)in.c_str());
        do_string = true;
    }

    ~StringSplit()
    {
        for (int i = 0; i < Container.size(); i++)
        {
            if (Container[i] != NULL)
            {
                delete[] Container[i];
            }
        }
        if (do_string)
        {
            delete[] String;
        }
    }
};

StringSplitter.cpp:

#include <string.h>
#include <iostream>
#include <vector>
#include "StringSplit.hpp"

using namespace std;

void StringSplit::assimilate(char*src, char delim)
{
    int until = untilnextdelim(src, delim);
    if (until > 0)
    {
        char * temp = new char[until + 1];
        memset(temp, 0, until + 1);
        copy_fragment(temp, src, delim);
        if (keep_empty || *temp != 0)
        {
            if (!do_string)
            {
                Container.push_back(temp);
            }
            else
            {
                string x = temp;
                ContainerS.push_back(x);
            }

        }
        else
        {
            delete[] temp;
        }
    }
}

void StringSplit::assimilate(char*src, char* delim)
{
    int until = untilnextdelim(src, delim);
    if (until > 0)
    {
        char * temp = new char[until + 1];
        memset(temp, 0, until + 1);
        copy_fragment(temp, src, delim);
        if (keep_empty || *temp != 0)
        {
            if (!do_string)
            {
                Container.push_back(temp);
            }
            else
            {
                string x = temp;
                ContainerS.push_back(x);
            }
        }
        else
        {
            delete[] temp;
        }
    }
}

long StringSplit::calc_string_size(char* _in)
{
    long i = 0;
    while (*_in++)
    {
        i++;
    }
    return i;
}

bool StringSplit::string_contains(char* haystack, char* needle)
{
    size_t len = calc_string_size(needle);
    size_t lenh = calc_string_size(haystack);
    while (lenh--)
    {
        if (match_fragment(haystack + lenh, needle, len))
        {
            return true;
        }
    }
    return false;
}

bool StringSplit::match_fragment(char* _src, char* cmp, int len)
{
    while (len--)
    {
        if (*(_src + len) != *(cmp + len))
        {
            return false;
        }
    }
    return true;
}

int StringSplit::untilnextdelim(char* _in, char delim)
{
    size_t len = calc_string_size(_in);
    if (*_in == delim)
    {
        _in += 1;
        return len - 1;
    }

    int c = 0;
    while (*(_in + c) != delim && c < len)
    {
        c++;
    }

    return c;
}

int StringSplit::untilnextdelim(char* _in, char* delim)
{
    int s = calc_string_size(delim);
    int c = 1 + s;

    if (!string_contains(_in, delim))
    {
        return calc_string_size(_in);
    }
    else if (match_fragment(_in, delim, s))
    {
        _in += s;
        return calc_string_size(_in);
    }

    while (!match_fragment(_in + c, delim, s))
    {
        c++;
    }

    return c;
}

void StringSplit::copy_fragment(char* dest, char* src, char delim)
{
    if (*src == delim)
    {
        src++;
    }

    int c = 0;
    while (*(src + c) != delim && *(src + c))
    {
        *(dest + c) = *(src + c);
        c++;
    }
    *(dest + c) = 0;
}

void StringSplit::copy_string(char* dest, char* src)
{
    int i = 0;
    while (*(src + i))
    {
        *(dest + i) = *(src + i);
        i++;
    }
}

void StringSplit::copy_fragment(char* dest, char* src, char* delim)
{
    size_t len = calc_string_size(delim);
    size_t lens = calc_string_size(src);

    if (match_fragment(src, delim, len))
    {
        src += len;
        lens -= len;
    }

    int c = 0;
    while (!match_fragment(src + c, delim, len) && (c < lens))
    {
        *(dest + c) = *(src + c);
        c++;
    }
    *(dest + c) = 0;
}

vector<char*> StringSplit::split_cstr(char Delimiter)
{
    int i = 0;
    while (*String)
    {
        if (*String != Delimiter && i == 0)
        {
            assimilate(String, Delimiter);
        }
        if (*String == Delimiter)
        {
            assimilate(String, Delimiter);
        }
        i++;
        String++;
    }

    String -= i;
    delete[] String;

    return Container;
}

vector<string> StringSplit::split_string(char Delimiter)
{
    do_string = true;

    int i = 0;
    while (*String)
    {
        if (*String != Delimiter && i == 0)
        {
            assimilate(String, Delimiter);
        }
        if (*String == Delimiter)
        {
            assimilate(String, Delimiter);
        }
        i++;
        String++;
    }

    String -= i;
    delete[] String;

    return ContainerS;
}

vector<char*> StringSplit::split_cstr(char* Delimiter)
{
    int i = 0;
    size_t LenDelim = calc_string_size(Delimiter);

    while(*String)
    {
        if (!match_fragment(String, Delimiter, LenDelim) && i == 0)
        {
            assimilate(String, Delimiter);
        }
        if (match_fragment(String, Delimiter, LenDelim))
        {
            assimilate(String,Delimiter);
        }
        i++;
        String++;
    }

    String -= i;
    delete[] String;

    return Container;
}

vector<string> StringSplit::split_string(char* Delimiter)
{
    do_string = true;
    int i = 0;
    size_t LenDelim = calc_string_size(Delimiter);

    while (*String)
    {
        if (!match_fragment(String, Delimiter, LenDelim) && i == 0)
        {
            assimilate(String, Delimiter);
        }
        if (match_fragment(String, Delimiter, LenDelim))
        {
            assimilate(String, Delimiter);
        }
        i++;
        String++;
    }

    String -= i;
    delete[] String;

    return ContainerS;
}

示例:

int main(int argc, char*argv[])
{
    StringSplit ss = "This:CUT:is:CUT:an:CUT:example:CUT:cstring";
    vector<char*> Split = ss.split_cstr(":CUT:");

    for (int i = 0; i < Split.size(); i++)
    {
        cout << Split[i] << endl;
    }

    return 0;
}

将输出:

这是一实例cst环

int main(int argc, char*argv[])
{
    StringSplit ss = "This:is:an:example:cstring";
    vector<char*> Split = ss.split_cstr(':');

    for (int i = 0; i < Split.size(); i++)
    {
        cout << Split[i] << endl;
    }

    return 0;
}

int main(int argc, char*argv[])
{
    string mystring = "This[SPLIT]is[SPLIT]an[SPLIT]example[SPLIT]string";
    StringSplit ss = mystring;
    vector<string> Split = ss.split_string("[SPLIT]");

    for (int i = 0; i < Split.size(); i++)
    {
        cout << Split[i] << endl;
    }

    return 0;
}

int main(int argc, char*argv[])
{
    string mystring = "This|is|an|example|string";
    StringSplit ss = mystring;
    vector<string> Split = ss.split_string('|');

    for (int i = 0; i < Split.size(); i++)
    {
        cout << Split[i] << endl;
    }

    return 0;
}

要保留空条目(默认情况下将排除空条目):

StringSplit ss = mystring;
ss.keep_empty = true;
vector<string> Split = ss.split_string(":DELIM:");

目标是使其类似于C#的Split()方法,其中拆分字符串非常简单:

String[] Split = 
    "Hey:cut:what's:cut:your:cut:name?".Split(new[]{":cut:"}, StringSplitOptions.None);

foreach(String X in Split)
{
    Console.Write(X);
}

我希望其他人能像我一样觉得这很有用。

#include <iostream>
#include <string>
#include <deque>

std::deque<std::string> split(
    const std::string& line, 
    std::string::value_type delimiter,
    bool skipEmpty = false
) {
    std::deque<std::string> parts{};

    if (!skipEmpty && !line.empty() && delimiter == line.at(0)) {
        parts.push_back({});
    }

    for (const std::string::value_type& c : line) {
        if (
            (
                c == delimiter 
                &&
                (skipEmpty ? (!parts.empty() && !parts.back().empty()) : true)
            )
            ||
            (c != delimiter && parts.empty())
        ) {
            parts.push_back({});
        }

        if (c != delimiter) {
            parts.back().push_back(c);
        }
    }

    if (skipEmpty && !parts.empty() && parts.back().empty()) {
        parts.pop_back();
    }

    return parts;
}

void test(const std::string& line) {
    std::cout << line << std::endl;

    std::cout << "skipEmpty=0 |";
    for (const std::string& part : split(line, ':')) {
        std::cout << part << '|';
    }
    std::cout << std::endl;

    std::cout << "skipEmpty=1 |";
    for (const std::string& part : split(line, ':', true)) {
        std::cout << part << '|';
    }
    std::cout << std::endl;

    std::cout << std::endl;
}

int main() {
    test("foo:bar:::baz");
    test("");
    test("foo");
    test(":");
    test("::");
    test(":foo");
    test("::foo");
    test(":foo:");
    test(":foo::");

    return 0;
}

输出:

foo:bar:::baz
skipEmpty=0 |foo|bar|||baz|
skipEmpty=1 |foo|bar|baz|


skipEmpty=0 |
skipEmpty=1 |

foo
skipEmpty=0 |foo|
skipEmpty=1 |foo|

:
skipEmpty=0 |||
skipEmpty=1 |

::
skipEmpty=0 ||||
skipEmpty=1 |

:foo
skipEmpty=0 ||foo|
skipEmpty=1 |foo|

::foo
skipEmpty=0 |||foo|
skipEmpty=1 |foo|

:foo:
skipEmpty=0 ||foo||
skipEmpty=1 |foo|

:foo::
skipEmpty=0 ||foo|||
skipEmpty=1 |foo|

我编写了以下代码。您可以指定分隔符,它可以是字符串。结果类似于Java的String.split,结果中包含空字符串。

例如,如果我们调用split(“ABCPICKABCANYABCTWO:ABC”,“ABC”),结果如下:

0  <len:0>
1 PICK <len:4>
2 ANY <len:3>
3 TWO: <len:4>
4  <len:0>

代码:

vector <string> split(const string& str, const string& delimiter = " ") {
    vector <string> tokens;

    string::size_type lastPos = 0;
    string::size_type pos = str.find(delimiter, lastPos);

    while (string::npos != pos) {
        // Found a token, add it to the vector.
        cout << str.substr(lastPos, pos - lastPos) << endl;
        tokens.push_back(str.substr(lastPos, pos - lastPos));
        lastPos = pos + delimiter.size();
        pos = str.find(delimiter, lastPos);
    }

    tokens.push_back(str.substr(lastPos, str.size() - lastPos));
    return tokens;
}

没有Boost,没有字符串流,只有标准的C库与std::string和std::list:C库函数配合使用,便于分析,C++数据类型便于内存管理。

空白被认为是换行符、制表符和空格的任意组合。空白字符集由wschars变量建立。

#include <string>
#include <list>
#include <iostream>
#include <cstring>

using namespace std;

const char *wschars = "\t\n ";

list<string> split(const string &str)
{
  const char *cstr = str.c_str();
  list<string> out;

  while (*cstr) {                     // while remaining string not empty
    size_t toklen;
    cstr += strspn(cstr, wschars);    // skip leading whitespace
    toklen = strcspn(cstr, wschars);  // figure out token length
    if (toklen)                       // if we have a token, add to list
      out.push_back(string(cstr, toklen));
    cstr += toklen;                   // skip over token
  }

  // ran out of string; return list

  return out;
}

int main(int argc, char **argv)
{
  list<string> li = split(argv[1]);
  for (list<string>::iterator i = li.begin(); i != li.end(); i++)
    cout << "{" << *i << "}" << endl;
  return 0;
}

Run:

$ ./split ""
$ ./split "a"
{a}
$ ./split " a "
{a}
$ ./split " a b"
{a}
{b}
$ ./split " a b c"
{a}
{b}
{c}
$ ./split " a b c d  "
{a}
{b}
{c}
{d}

split的尾部递归版本(本身分裂为两个函数)。除了将字符串推入列表之外,所有对变量的破坏性操作都消失了!

void split_rec(const char *cstr, list<string> &li)
{
  if (*cstr) {
    const size_t leadsp = strspn(cstr, wschars);
    const size_t toklen = strcspn(cstr + leadsp, wschars);

    if (toklen)
      li.push_back(string(cstr + leadsp, toklen));

    split_rec(cstr + leadsp + toklen, li);
  }
}

list<string> split(const string &str)
{
  list<string> out;
  split_rec(str.c_str(), out);
  return out;
}

对于一个大得离谱而且可能是冗余的版本,可以尝试很多For循环。

string stringlist[10];
int count = 0;

for (int i = 0; i < sequence.length(); i++)
{
    if (sequence[i] == ' ')
    {
        stringlist[count] = sequence.substr(0, i);
        sequence.erase(0, i+1);
        i = 0;
        count++;
    }
    else if (i == sequence.length()-1)  // Last word
    {
        stringlist[count] = sequence.substr(0, i+1);
    }
}

它并不漂亮,但总的来说(除了标点符号和一系列其他错误)它是有效的!