
String str = "The quick brown fox";
String[] results = str.split(" ");



下面是我的Swiss®军刀字符串标记器,用于用空格分隔字符串,处理单引号和双引号包装的字符串,以及从结果中剥离这些字符。我使用RegexBuddy 4。x生成大部分代码片段,但我添加了用于剥离引号和其他一些东西的自定义处理。

#include <string>
#include <locale>
#include <regex>

std::vector<std::wstring> tokenize_string(std::wstring string_to_tokenize) {
    std::vector<std::wstring> tokens;

    std::wregex re(LR"(("[^"]*"|'[^']*'|[^"' ]+))", std::regex_constants::collate);

    std::wsregex_iterator next( string_to_tokenize.begin(),
                                std::regex_constants::match_not_null );

    std::wsregex_iterator end;
    const wchar_t single_quote = L'\'';
    const wchar_t double_quote = L'\"';
    while ( next != end ) {
        std::wsmatch match = *next;
        const std::wstring token = match.str( 0 );

        if (token.length() > 2 && (token.front() == double_quote || token.front() == single_quote))
            tokens.emplace_back( std::wstring(token.begin()+1, token.begin()+token.length()-1) );
    return tokens;



char myString[] = "The quick brown fox";
char *p = strtok(myString, " ");
while (p) {
    printf ("Token: %s\n", p);
    p = strtok(NULL, " ");




void StrTokenizer(string& source, const char* delimiter, vector<string>& Tokens)
   size_t new_index = 0;
   size_t old_index = 0;

   while (new_index != std::string::npos)   
      new_index = source.find(delimiter, old_index);
      Tokens.emplace_back(source.substr(old_index, new_index-old_index));

      if (new_index != std::string::npos)
          old_index = ++new_index;


没有动态内存分配 不使用boost 不使用正则表达式 c++17标准


#include <iomanip>
#include <iostream>
#include <iterator>
#include <string_view>
#include <utility>

struct split_by_spaces
    std::string_view      text;
    static constexpr char delim = ' ';

    struct iterator
        const std::string_view& text;
        std::size_t             cur_pos;
        std::size_t             end_pos;

        std::string_view operator*() const
            return { &text[cur_pos], end_pos - cur_pos };
        bool operator==(const iterator& other) const
            return cur_pos == other.cur_pos && end_pos == other.end_pos;
        bool operator!=(const iterator& other) const
            return !(*this == other);
        iterator& operator++()
            cur_pos = text.find_first_not_of(delim, end_pos);

            if (cur_pos == std::string_view::npos)
                cur_pos = text.size();
                end_pos = cur_pos;
                return *this;

            end_pos = text.find(delim, cur_pos);

            if (cur_pos == std::string_view::npos)
                end_pos = text.size();

            return *this;

    [[nodiscard]] iterator begin() const
        auto start = text.find_first_not_of(delim);
        if (start == std::string_view::npos)
            return iterator{ text, text.size(), text.size() };
        auto end_word = text.find(delim, start);
        if (end_word == std::string_view::npos)
            end_word = text.size();
        return iterator{ text, start, end_word };
    [[nodiscard]] iterator end() const
        return iterator{ text, text.size(), text.size() };

int main(int argc, char** argv)
    using namespace std::literals;
    auto str = " there should be no memory allocation during parsing"
               "  into words this line and you   should'n create any"
               "  contaner                  for intermediate words  "sv;

    auto comma = "";
    for (std::string_view word : split_by_spaces{ str })
        std::cout << std::exchange(comma, ",") << std::quoted(word);

    auto only_spaces = "                   "sv;
    for (std::string_view word : split_by_spaces{ only_spaces })
        std::cout << "you will not see this line in output" << std::endl;

简单的c++代码(标准c++ 98),接受多个分隔符(在std::string中指定),只使用向量、字符串和迭代器。

#include <iostream>
#include <vector>
#include <string>
#include <stdexcept> 

split(const std::string& str, const std::string& delim){
    std::vector<std::string> result;
    if (str.empty())
        throw std::runtime_error("Can not tokenize an empty string!");
    std::string::const_iterator begin, str_it;
    begin = str_it = str.begin(); 
    do {
        while (delim.find(*str_it) == std::string::npos && str_it != str.end())
            str_it++; // find the position of the first delimiter in str
        std::string token = std::string(begin, str_it); // grab the token
        if (!token.empty()) // empty token only when str starts with a delimiter
            result.push_back(token); // push the token into a vector<string>
        while (delim.find(*str_it) != std::string::npos && str_it != str.end())
            str_it++; // ignore the additional consecutive delimiters
        begin = str_it; // process the remaining tokens
        } while (str_it != str.end());
    return result;

int main() {
    std::string test_string = ".this is.a.../.simple;;test;;;END";
    std::string delim = "; ./"; // string containing the delimiters
    std::vector<std::string> tokens = split(test_string, delim);           
    for (std::vector<std::string>::const_iterator it = tokens.begin(); 
        it != tokens.end(); it++)
            std::cout << *it << std::endl;


#include <iostream>
#include <string>
#include <boost/foreach.hpp>
#include <boost/tokenizer.hpp>

using namespace std;
using namespace boost;

int main(int, char**)
    string text = "token, test   string";

    char_separator<char> sep(", ");
    tokenizer< char_separator<char> > tokens(text, sep);
    BOOST_FOREACH (const string& t, tokens) {
        cout << t << "." << endl;

针对c++ 11更新:

#include <iostream>
#include <string>
#include <boost/tokenizer.hpp>

using namespace std;
using namespace boost;

int main(int, char**)
    string text = "token, test   string";

    char_separator<char> sep(", ");
    tokenizer<char_separator<char>> tokens(text, sep);
    for (const auto& t : tokens) {
        cout << t << "." << endl;