我在c++中使用以下方法解析字符串:
using namespace std;
string parsed,input="text to be parsed";
stringstream input_stringstream(input);
if (getline(input_stringstream,parsed,' '))
{
// do some processing.
}
使用单个字符分隔符进行解析是可以的。但是如果我想使用字符串作为分隔符呢?
例子:我想拆分:
scott>=tiger
用>=作为分隔符,这样我就可以得到斯科特和老虎。
下面是一个使用Boost string Algorithms库和Boost Range库将一个字符串与另一个字符串分割的示例。这个解决方案的灵感来自StringAlgo库文档,请参阅Split部分。
下面是split_with_string函数的完整程序,以及全面的测试-用godbolt试试:
#include <iostream>
#include <string>
#include <vector>
#include <boost/algorithm/string.hpp>
#include <boost/range/iterator_range.hpp>
std::vector<std::string> split_with_string(std::string_view s, std::string_view search)
{
if (search.empty()) return {std::string{s}};
std::vector<boost::iterator_range<std::string_view::iterator>> found;
boost::algorithm::ifind_all(found, s, search);
if (found.empty()) return {};
std::vector<std::string> parts;
parts.reserve(found.size() + 2); // a bit more
std::string_view::iterator part_begin = s.cbegin(), part_end;
for (auto& split_found : found)
{
// do not skip empty extracts
part_end = split_found.begin();
parts.emplace_back(part_begin, part_end);
part_begin = split_found.end();
}
if (part_end != s.end())
parts.emplace_back(part_begin, s.end());
return parts;
}
#define TEST(expr) std::cout << ((!(expr)) ? "FAIL" : "PASS") << ": " #expr "\t" << std::endl
int main()
{
auto s0 = split_with_string("adsf-+qwret-+nvfkbdsj", "");
TEST(s0.size() == 1);
TEST(s0.front() == "adsf-+qwret-+nvfkbdsj");
auto s1 = split_with_string("adsf-+qwret-+nvfkbdsj", "-+");
TEST(s1.size() == 3);
TEST(s1.front() == "adsf");
TEST(s1.back() == "nvfkbdsj");
auto s2 = split_with_string("-+adsf-+qwret-+nvfkbdsj-+", "-+");
TEST(s2.size() == 5);
TEST(s2.front() == "");
TEST(s2.back() == "");
auto s3 = split_with_string("-+adsf-+qwret-+nvfkbdsj", "-+");
TEST(s3.size() == 4);
TEST(s3.front() == "");
TEST(s3.back() == "nvfkbdsj");
auto s4 = split_with_string("adsf-+qwret-+nvfkbdsj-+", "-+");
TEST(s4.size() == 4);
TEST(s4.front() == "adsf");
TEST(s4.back() == "");
auto s5 = split_with_string("dbo.abc", "dbo.");
TEST(s5.size() == 2);
TEST(s5.front() == "");
TEST(s5.back() == "abc");
auto s6 = split_with_string("dbo.abc", ".");
TEST(s6.size() == 2);
TEST(s6.front() == "dbo");
TEST(s6.back() == "abc");
}
测试输出:
PASS: s0.size() == 1
PASS: s0.front() == "adsf-+qwret-+nvfkbdsj"
PASS: s1.size() == 3
PASS: s1.front() == "adsf"
PASS: s1.back() == "nvfkbdsj"
PASS: s2.size() == 5
PASS: s2.front() == ""
PASS: s2.back() == ""
PASS: s3.size() == 4
PASS: s3.front() == ""
PASS: s3.back() == "nvfkbdsj"
PASS: s4.size() == 4
PASS: s4.front() == "adsf"
PASS: s4.back() == ""
PASS: s5.size() == 2
PASS: s5.front() == ""
PASS: s5.back() == "abc"
PASS: s6.size() == 2
PASS: s6.front() == "dbo"
PASS: s6.back() == "abc"
这段代码从文本中分离行,并将每个行添加到一个向量中。
vector<string> split(char *phrase, string delimiter){
vector<string> list;
string s = string(phrase);
size_t pos = 0;
string token;
while ((pos = s.find(delimiter)) != string::npos) {
token = s.substr(0, pos);
list.push_back(token);
s.erase(0, pos + delimiter.length());
}
list.push_back(s);
return list;
}
调用:
vector<string> listFilesMax = split(buffer, "\n");
Strtok允许您传入多个字符作为分隔符。我敢打赌,如果你传入“>=”,你的示例字符串将被正确分割(即使>和=被算作单独的分隔符)。
EDIT如果您不想使用c_str()将字符串转换为char*,您可以使用substr和find_first_of进行标记化。
string token, mystring("scott>=tiger");
while(token != mystring){
token = mystring.substr(0,mystring.find_first_of(">="));
mystring = mystring.substr(mystring.find_first_of(">=") + 1);
printf("%s ",token.c_str());
}