我在c++中使用以下方法解析字符串:

using namespace std;

string parsed,input="text to be parsed";
stringstream input_stringstream(input);

if (getline(input_stringstream,parsed,' '))
{
     // do some processing.
}

使用单个字符分隔符进行解析是可以的。但是如果我想使用字符串作为分隔符呢?

例子:我想拆分:

scott>=tiger

用>=作为分隔符,这样我就可以得到斯科特和老虎。


当前回答

我得到这个解。这很简单,所有的打印/值都在循环中(循环后不需要检查)。

#include <iostream>
#include <string>

using std::cout;
using std::string;

int main() {
    string s = "it-+is-+working!";
    string d = "-+";

    int firstFindI = 0;
    int secendFindI = 0;
    while (secendFindI != string::npos)
    {
        secendFindI = s.find(d, firstFindI);
        cout << s.substr(firstFindI, secendFindI - firstFindI) << "\n"; // print sliced part
        firstFindI = secendFindI + d.size(); // add to the search index
    }
}

感谢@SteveWard改进了这个答案。

其他回答

您可以使用std::string::find()函数来查找字符串分隔符的位置,然后使用std::string::substr()来获取一个令牌。

例子:

std::string s = "scott>=tiger";
std::string delimiter = ">=";
std::string token = s.substr(0, s.find(delimiter)); // token is "scott"

find(const string& str, size_t pos = 0)函数的作用是:返回字符串中str第一次出现的位置,如果没有找到则返回npos。 substr(size_t pos = 0, size_t n = npos)函数的作用是:返回对象的子字符串,从位置pos开始,长度npos。


如果你有多个分隔符,在你提取了一个标记之后,你可以删除它(包括分隔符)以继续后续的提取(如果你想保留原始字符串,只需使用s = s.s substr(pos + delimiter.length());):

s.erase(0, s.find(delimiter) + delimiter.length());

这样就可以轻松地循环获取每个令牌。

完整的示例

std::string s = "scott>=tiger>=mushroom";
std::string delimiter = ">=";

size_t pos = 0;
std::string token;
while ((pos = s.find(delimiter)) != std::string::npos) {
    token = s.substr(0, pos);
    std::cout << token << std::endl;
    s.erase(0, pos + delimiter.length());
}
std::cout << s << std::endl;

输出:

scott
tiger
mushroom

这是一个完整的方法,它在任何分隔符上分割字符串,并返回分割后的字符串的向量。

这是改编自ryanbwork的答案。然而,他的检查:if(token != mystring)给出错误的结果,如果你的字符串中有重复的元素。这是我对那个问题的解决方案。

vector<string> Split(string mystring, string delimiter)
{
    vector<string> subStringList;
    string token;
    while (true)
    {
        size_t findfirst = mystring.find_first_of(delimiter);
        if (findfirst == string::npos) //find_first_of returns npos if it couldn't find the delimiter anymore
        {
            subStringList.push_back(mystring); //push back the final piece of mystring
            return subStringList;
        }
        token = mystring.substr(0, mystring.find_first_of(delimiter));
        mystring = mystring.substr(mystring.find_first_of(delimiter) + 1);
        subStringList.push_back(token);
    }
    return subStringList;
}

从c++ 11开始,它可以这样做:

std::vector<std::string> splitString(const std::string& str,
                                     const std::regex& regex)
{
  return {std::sregex_token_iterator{str.begin(), str.end(), regex, -1}, 
          std::sregex_token_iterator() };
} 

// usually we have a predefined set of regular expressions: then
// let's build those only once and re-use them multiple times
static const std::regex regex1(R"some-reg-exp1", std::regex::optimize);
static const std::regex regex2(R"some-reg-exp2", std::regex::optimize);
static const std::regex regex3(R"some-reg-exp3", std::regex::optimize);

string str = "some string to split";
std::vector<std::string> tokens( splitString(str, regex1) ); 

注:

这是对这个答案的一个小小的改进 参见std::regex_constants::optimize使用的优化技术

我会使用boost::tokenizer。下面的文档解释了如何创建适当的标记器函数:http://www.boost.org/doc/libs/1_52_0/libs/tokenizer/tokenizerfunction.htm

这里有一个对你的案子有用。

struct my_tokenizer_func
{
    template<typename It>
    bool operator()(It& next, It end, std::string & tok)
    {
        if (next == end)
            return false;
        char const * del = ">=";
        auto pos = std::search(next, end, del, del + 2);
        tok.assign(next, pos);
        next = pos;
        if (next != end)
            std::advance(next, 2);
        return true;
    }

    void reset() {}
};

int main()
{
    std::string to_be_parsed = "1) one>=2) two>=3) three>=4) four";
    for (auto i : boost::tokenizer<my_tokenizer_func>(to_be_parsed))
        std::cout << i << '\n';
}

下面是一个使用Boost string Algorithms库和Boost Range库将一个字符串与另一个字符串分割的示例。这个解决方案的灵感来自StringAlgo库文档,请参阅Split部分。

下面是split_with_string函数的完整程序,以及全面的测试-用godbolt试试:

#include <iostream>
#include <string>
#include <vector>
#include <boost/algorithm/string.hpp>
#include <boost/range/iterator_range.hpp>

std::vector<std::string> split_with_string(std::string_view s, std::string_view search) 
{
    if (search.empty()) return {std::string{s}};

    std::vector<boost::iterator_range<std::string_view::iterator>> found;
    boost::algorithm::ifind_all(found, s, search);
    if (found.empty()) return {};

    std::vector<std::string> parts;
    parts.reserve(found.size() + 2); // a bit more

    std::string_view::iterator part_begin = s.cbegin(), part_end;
    for (auto& split_found : found)
    {
        // do not skip empty extracts
        part_end = split_found.begin();
        parts.emplace_back(part_begin, part_end);
        part_begin = split_found.end();
    }
    if (part_end != s.end())
        parts.emplace_back(part_begin, s.end());

    return parts;
}

#define TEST(expr) std::cout << ((!(expr)) ? "FAIL" : "PASS") << ": " #expr "\t" << std::endl

int main()
{
    auto s0 = split_with_string("adsf-+qwret-+nvfkbdsj", "");
    TEST(s0.size() == 1);
    TEST(s0.front() == "adsf-+qwret-+nvfkbdsj");
    auto s1 = split_with_string("adsf-+qwret-+nvfkbdsj", "-+");
    TEST(s1.size() == 3);
    TEST(s1.front() == "adsf");
    TEST(s1.back() == "nvfkbdsj");
    auto s2 = split_with_string("-+adsf-+qwret-+nvfkbdsj-+", "-+");
    TEST(s2.size() == 5);
    TEST(s2.front() == "");
    TEST(s2.back() == "");
    auto s3 = split_with_string("-+adsf-+qwret-+nvfkbdsj", "-+");
    TEST(s3.size() == 4);
    TEST(s3.front() == "");
    TEST(s3.back() == "nvfkbdsj");
    auto s4 = split_with_string("adsf-+qwret-+nvfkbdsj-+", "-+");
    TEST(s4.size() == 4);
    TEST(s4.front() == "adsf");
    TEST(s4.back() == "");
    auto s5 = split_with_string("dbo.abc", "dbo.");
    TEST(s5.size() == 2);
    TEST(s5.front() == "");
    TEST(s5.back() == "abc");
    auto s6 = split_with_string("dbo.abc", ".");
    TEST(s6.size() == 2);
    TEST(s6.front() == "dbo");
    TEST(s6.back() == "abc");
}

测试输出:

PASS: s0.size() == 1    
PASS: s0.front() == "adsf-+qwret-+nvfkbdsj" 
PASS: s1.size() == 3    
PASS: s1.front() == "adsf"  
PASS: s1.back() == "nvfkbdsj"   
PASS: s2.size() == 5    
PASS: s2.front() == ""  
PASS: s2.back() == ""   
PASS: s3.size() == 4    
PASS: s3.front() == ""  
PASS: s3.back() == "nvfkbdsj"   
PASS: s4.size() == 4    
PASS: s4.front() == "adsf"  
PASS: s4.back() == ""   
PASS: s5.size() == 2    
PASS: s5.front() == ""  
PASS: s5.back() == "abc"    
PASS: s6.size() == 2    
PASS: s6.front() == "dbo"   
PASS: s6.back() == "abc"