我在c++中使用以下方法解析字符串:

using namespace std;

string parsed,input="text to be parsed";
stringstream input_stringstream(input);

if (getline(input_stringstream,parsed,' '))
{
     // do some processing.
}

使用单个字符分隔符进行解析是可以的。但是如果我想使用字符串作为分隔符呢?

例子:我想拆分:

scott>=tiger

用>=作为分隔符,这样我就可以得到斯科特和老虎。


当前回答

作为奖励,这里有一个分割函数和宏的代码示例,很容易使用,你可以在其中选择容器类型:

#include <iostream>
#include <vector>
#include <string>

#define split(str, delim, type) (split_fn<type<std::string>>(str, delim))
 
template <typename Container>
Container split_fn(const std::string& str, char delim = ' ') {
    Container cont{};
    std::size_t current, previous = 0;
    current = str.find(delim);
    while (current != std::string::npos) {
        cont.push_back(str.substr(previous, current - previous));
        previous = current + 1;
        current = str.find(delim, previous);
    }
    cont.push_back(str.substr(previous, current - previous));
    
    return cont;
}

int main() {
    
    auto test = std::string{"This is a great test"};
    auto res = split(test, ' ', std::vector);
    
    for(auto &i : res) {
        std::cout << i << ", "; // "this", "is", "a", "great", "test"
    }
    
    
    return 0;
}

其他回答

这是一个简洁的分裂函数。我决定让背靠背分隔符返回为空字符串,但您可以很容易地检查子字符串是否为空,如果是,则不将其添加到向量。

#include <vector>
#include <string>
using namespace std;



vector<string> split(string to_split, string delimiter) {
    size_t pos = 0;
    vector<string> matches{};
    do {
        pos = to_split.find(delimiter);
        int change_end;
        if (pos == string::npos) {
            pos = to_split.length() - 1;
            change_end = 1;
        }
        else {
            change_end = 0;
        }
        matches.push_back(to_split.substr(0, pos+change_end));
        
        to_split.erase(0, pos+1);

    }
    while (!to_split.empty());
    return matches;

}

答案已经在那里,但选择答案使用擦除功能,这是非常昂贵的,想想一些非常大的字符串(以mb为单位)。因此我使用下面的函数。

vector<string> split(const string& i_str, const string& i_delim)
{
    vector<string> result;
    size_t startIndex = 0;

    for (size_t found = i_str.find(i_delim); found != string::npos; found = i_str.find(i_delim, startIndex))
    {
        result.emplace_back(i_str.begin()+startIndex, i_str.begin()+found);
        startIndex = found + i_delim.size();
    }
    if (startIndex != i_str.size())
        result.emplace_back(i_str.begin()+startIndex, i_str.end());
    return result;      
}

一个更简单的解决方案是-

可以使用strtok在多字符分隔符的基础上进行分隔。 记住使用strdup,这样原始字符串就不会发生变化。

#include <stdio.h>
#include <string.h>
const char* str = "scott>=tiger";
char *token = strtok(strdup(str), ">=");
while (token != NULL)
    {
        printf("%s\n", token);
        token = strtok(NULL, ">=");
    }

下面是一个使用Boost string Algorithms库和Boost Range库将一个字符串与另一个字符串分割的示例。这个解决方案的灵感来自StringAlgo库文档,请参阅Split部分。

下面是split_with_string函数的完整程序,以及全面的测试-用godbolt试试:

#include <iostream>
#include <string>
#include <vector>
#include <boost/algorithm/string.hpp>
#include <boost/range/iterator_range.hpp>

std::vector<std::string> split_with_string(std::string_view s, std::string_view search) 
{
    if (search.empty()) return {std::string{s}};

    std::vector<boost::iterator_range<std::string_view::iterator>> found;
    boost::algorithm::ifind_all(found, s, search);
    if (found.empty()) return {};

    std::vector<std::string> parts;
    parts.reserve(found.size() + 2); // a bit more

    std::string_view::iterator part_begin = s.cbegin(), part_end;
    for (auto& split_found : found)
    {
        // do not skip empty extracts
        part_end = split_found.begin();
        parts.emplace_back(part_begin, part_end);
        part_begin = split_found.end();
    }
    if (part_end != s.end())
        parts.emplace_back(part_begin, s.end());

    return parts;
}

#define TEST(expr) std::cout << ((!(expr)) ? "FAIL" : "PASS") << ": " #expr "\t" << std::endl

int main()
{
    auto s0 = split_with_string("adsf-+qwret-+nvfkbdsj", "");
    TEST(s0.size() == 1);
    TEST(s0.front() == "adsf-+qwret-+nvfkbdsj");
    auto s1 = split_with_string("adsf-+qwret-+nvfkbdsj", "-+");
    TEST(s1.size() == 3);
    TEST(s1.front() == "adsf");
    TEST(s1.back() == "nvfkbdsj");
    auto s2 = split_with_string("-+adsf-+qwret-+nvfkbdsj-+", "-+");
    TEST(s2.size() == 5);
    TEST(s2.front() == "");
    TEST(s2.back() == "");
    auto s3 = split_with_string("-+adsf-+qwret-+nvfkbdsj", "-+");
    TEST(s3.size() == 4);
    TEST(s3.front() == "");
    TEST(s3.back() == "nvfkbdsj");
    auto s4 = split_with_string("adsf-+qwret-+nvfkbdsj-+", "-+");
    TEST(s4.size() == 4);
    TEST(s4.front() == "adsf");
    TEST(s4.back() == "");
    auto s5 = split_with_string("dbo.abc", "dbo.");
    TEST(s5.size() == 2);
    TEST(s5.front() == "");
    TEST(s5.back() == "abc");
    auto s6 = split_with_string("dbo.abc", ".");
    TEST(s6.size() == 2);
    TEST(s6.front() == "dbo");
    TEST(s6.back() == "abc");
}

测试输出:

PASS: s0.size() == 1    
PASS: s0.front() == "adsf-+qwret-+nvfkbdsj" 
PASS: s1.size() == 3    
PASS: s1.front() == "adsf"  
PASS: s1.back() == "nvfkbdsj"   
PASS: s2.size() == 5    
PASS: s2.front() == ""  
PASS: s2.back() == ""   
PASS: s3.size() == 4    
PASS: s3.front() == ""  
PASS: s3.back() == "nvfkbdsj"   
PASS: s4.size() == 4    
PASS: s4.front() == "adsf"  
PASS: s4.back() == ""   
PASS: s5.size() == 2    
PASS: s5.front() == ""  
PASS: s5.back() == "abc"    
PASS: s6.size() == 2    
PASS: s6.front() == "dbo"   
PASS: s6.back() == "abc"    

一种非常简单/幼稚的方法:

vector<string> words_seperate(string s){
    vector<string> ans;
    string w="";
    for(auto i:s){
        if(i==' '){
           ans.push_back(w);
           w="";
        }
        else{
           w+=i;
        }
    }
    ans.push_back(w);
    return ans;
}

或者你可以使用boost库拆分函数:

vector<string> result; 
boost::split(result, input, boost::is_any_of("\t"));

或者你可以尝试TOKEN或strtok:

char str[] = "DELIMIT-ME-C++"; 
char *token = strtok(str, "-"); 
while (token) 
{ 
    cout<<token; 
    token = strtok(NULL, "-"); 
} 

或者你可以这样做:

char split_with=' ';
vector<string> words;
string token; 
stringstream ss(our_string);
while(getline(ss , token , split_with)) words.push_back(token);