我在c++中使用以下方法解析字符串:

using namespace std;

string parsed,input="text to be parsed";
stringstream input_stringstream(input);

if (getline(input_stringstream,parsed,' '))
{
     // do some processing.
}

使用单个字符分隔符进行解析是可以的。但是如果我想使用字符串作为分隔符呢?

例子:我想拆分:

scott>=tiger

用>=作为分隔符,这样我就可以得到斯科特和老虎。


当前回答

以下是我的看法。它处理边缘情况,并接受一个可选参数从结果中删除空条目。

bool endsWith(const std::string& s, const std::string& suffix)
{
    return s.size() >= suffix.size() &&
           s.substr(s.size() - suffix.size()) == suffix;
}

std::vector<std::string> split(const std::string& s, const std::string& delimiter, const bool removeEmptyEntries = false)
{
    std::vector<std::string> tokens;

    for (size_t start = 0, end; start < s.length(); start = end + delimiter.length())
    {
         size_t position = s.find(delimiter, start);
         end = position != std::string::npos ? position : s.length();

         std::string token = s.substr(start, end - start);
         if (!removeEmptyEntries || !token.empty())
         {
             tokens.push_back(token);
         }
    }

    if (!removeEmptyEntries &&
        (s.empty() || endsWith(s, delimiter)))
    {
        tokens.push_back("");
    }

    return tokens;
}

例子

split("a-b-c", "-"); // [3]("a","b","c")

split("a--c", "-"); // [3]("a","","c")

split("-b-", "-"); // [3]("","b","")

split("--c--", "-"); // [5]("","","c","","")

split("--c--", "-", true); // [1]("c")

split("a", "-"); // [1]("a")

split("", "-"); // [1]("")

split("", "-", true); // [0]()

其他回答

作为奖励,这里有一个分割函数和宏的代码示例,很容易使用,你可以在其中选择容器类型:

#include <iostream>
#include <vector>
#include <string>

#define split(str, delim, type) (split_fn<type<std::string>>(str, delim))
 
template <typename Container>
Container split_fn(const std::string& str, char delim = ' ') {
    Container cont{};
    std::size_t current, previous = 0;
    current = str.find(delim);
    while (current != std::string::npos) {
        cont.push_back(str.substr(previous, current - previous));
        previous = current + 1;
        current = str.find(delim, previous);
    }
    cont.push_back(str.substr(previous, current - previous));
    
    return cont;
}

int main() {
    
    auto test = std::string{"This is a great test"};
    auto res = split(test, ' ', std::vector);
    
    for(auto &i : res) {
        std::cout << i << ", "; // "this", "is", "a", "great", "test"
    }
    
    
    return 0;
}
#include<iostream>
#include<algorithm>
using namespace std;

int split_count(string str,char delimit){
return count(str.begin(),str.end(),delimit);
}

void split(string str,char delimit,string res[]){
int a=0,i=0;
while(a<str.size()){
res[i]=str.substr(a,str.find(delimit));
a+=res[i].size()+1;
i++;
}
}

int main(){

string a="abc.xyz.mno.def";
int x=split_count(a,'.')+1;
string res[x];
split(a,'.',res);

for(int i=0;i<x;i++)
cout<<res[i]<<endl;
  return 0;
}

注:仅当分割后的字符串长度相等时才有效

一个更简单的解决方案是-

可以使用strtok在多字符分隔符的基础上进行分隔。 记住使用strdup,这样原始字符串就不会发生变化。

#include <stdio.h>
#include <string.h>
const char* str = "scott>=tiger";
char *token = strtok(strdup(str), ">=");
while (token != NULL)
    {
        printf("%s\n", token);
        token = strtok(NULL, ">=");
    }

功能:

std::vector<std::string> WSJCppCore::split(const std::string& sWhat, const std::string& sDelim) {
    std::vector<std::string> vRet;
    size_t nPos = 0;
    size_t nLen = sWhat.length();
    size_t nDelimLen = sDelim.length();
    while (nPos < nLen) {
        std::size_t nFoundPos = sWhat.find(sDelim, nPos);
        if (nFoundPos != std::string::npos) {
            std::string sToken = sWhat.substr(nPos, nFoundPos - nPos);
            vRet.push_back(sToken);
            nPos = nFoundPos + nDelimLen;
            if (nFoundPos + nDelimLen == nLen) { // last delimiter
                vRet.push_back("");
            }
        } else {
            std::string sToken = sWhat.substr(nPos, nLen - nPos);
            vRet.push_back(sToken);
            break;
        }
    }
    return vRet;
}

单元测试:

bool UnitTestSplit::run() {
bool bTestSuccess = true;

    struct LTest {
        LTest(
            const std::string &sStr,
            const std::string &sDelim,
            const std::vector<std::string> &vExpectedVector
        ) {
            this->sStr = sStr;
            this->sDelim = sDelim;
            this->vExpectedVector = vExpectedVector;
        };
        std::string sStr;
        std::string sDelim;
        std::vector<std::string> vExpectedVector;
    };
    std::vector<LTest> tests;
    tests.push_back(LTest("1 2 3 4 5", " ", {"1", "2", "3", "4", "5"}));
    tests.push_back(LTest("|1f|2п|3%^|44354|5kdasjfdre|2", "|", {"", "1f", "2п", "3%^", "44354", "5kdasjfdre", "2"}));
    tests.push_back(LTest("|1f|2п|3%^|44354|5kdasjfdre|", "|", {"", "1f", "2п", "3%^", "44354", "5kdasjfdre", ""}));
    tests.push_back(LTest("some1 => some2 => some3", "=>", {"some1 ", " some2 ", " some3"}));
    tests.push_back(LTest("some1 => some2 => some3 =>", "=>", {"some1 ", " some2 ", " some3 ", ""}));

    for (int i = 0; i < tests.size(); i++) {
        LTest test = tests[i];
        std::string sPrefix = "test" + std::to_string(i) + "(\"" + test.sStr + "\")";
        std::vector<std::string> vSplitted = WSJCppCore::split(test.sStr, test.sDelim);
        compareN(bTestSuccess, sPrefix + ": size", vSplitted.size(), test.vExpectedVector.size());
        int nMin = std::min(vSplitted.size(), test.vExpectedVector.size());
        for (int n = 0; n < nMin; n++) {
            compareS(bTestSuccess, sPrefix + ", element: " + std::to_string(n), vSplitted[n], test.vExpectedVector[n]);
        }
    }

    return bTestSuccess;
}

这是一个简洁的分裂函数。我决定让背靠背分隔符返回为空字符串,但您可以很容易地检查子字符串是否为空,如果是,则不将其添加到向量。

#include <vector>
#include <string>
using namespace std;



vector<string> split(string to_split, string delimiter) {
    size_t pos = 0;
    vector<string> matches{};
    do {
        pos = to_split.find(delimiter);
        int change_end;
        if (pos == string::npos) {
            pos = to_split.length() - 1;
            change_end = 1;
        }
        else {
            change_end = 0;
        }
        matches.push_back(to_split.substr(0, pos+change_end));
        
        to_split.erase(0, pos+1);

    }
    while (!to_split.empty());
    return matches;

}