我在c++中使用以下方法解析字符串:
using namespace std;
string parsed,input="text to be parsed";
stringstream input_stringstream(input);
if (getline(input_stringstream,parsed,' '))
{
// do some processing.
}
使用单个字符分隔符进行解析是可以的。但是如果我想使用字符串作为分隔符呢?
例子:我想拆分:
scott>=tiger
用>=作为分隔符,这样我就可以得到斯科特和老虎。
我会使用boost::tokenizer。下面的文档解释了如何创建适当的标记器函数:http://www.boost.org/doc/libs/1_52_0/libs/tokenizer/tokenizerfunction.htm
这里有一个对你的案子有用。
struct my_tokenizer_func
{
template<typename It>
bool operator()(It& next, It end, std::string & tok)
{
if (next == end)
return false;
char const * del = ">=";
auto pos = std::search(next, end, del, del + 2);
tok.assign(next, pos);
next = pos;
if (next != end)
std::advance(next, 2);
return true;
}
void reset() {}
};
int main()
{
std::string to_be_parsed = "1) one>=2) two>=3) three>=4) four";
for (auto i : boost::tokenizer<my_tokenizer_func>(to_be_parsed))
std::cout << i << '\n';
}
下面是一个使用Boost string Algorithms库和Boost Range库将一个字符串与另一个字符串分割的示例。这个解决方案的灵感来自StringAlgo库文档,请参阅Split部分。
下面是split_with_string函数的完整程序,以及全面的测试-用godbolt试试:
#include <iostream>
#include <string>
#include <vector>
#include <boost/algorithm/string.hpp>
#include <boost/range/iterator_range.hpp>
std::vector<std::string> split_with_string(std::string_view s, std::string_view search)
{
if (search.empty()) return {std::string{s}};
std::vector<boost::iterator_range<std::string_view::iterator>> found;
boost::algorithm::ifind_all(found, s, search);
if (found.empty()) return {};
std::vector<std::string> parts;
parts.reserve(found.size() + 2); // a bit more
std::string_view::iterator part_begin = s.cbegin(), part_end;
for (auto& split_found : found)
{
// do not skip empty extracts
part_end = split_found.begin();
parts.emplace_back(part_begin, part_end);
part_begin = split_found.end();
}
if (part_end != s.end())
parts.emplace_back(part_begin, s.end());
return parts;
}
#define TEST(expr) std::cout << ((!(expr)) ? "FAIL" : "PASS") << ": " #expr "\t" << std::endl
int main()
{
auto s0 = split_with_string("adsf-+qwret-+nvfkbdsj", "");
TEST(s0.size() == 1);
TEST(s0.front() == "adsf-+qwret-+nvfkbdsj");
auto s1 = split_with_string("adsf-+qwret-+nvfkbdsj", "-+");
TEST(s1.size() == 3);
TEST(s1.front() == "adsf");
TEST(s1.back() == "nvfkbdsj");
auto s2 = split_with_string("-+adsf-+qwret-+nvfkbdsj-+", "-+");
TEST(s2.size() == 5);
TEST(s2.front() == "");
TEST(s2.back() == "");
auto s3 = split_with_string("-+adsf-+qwret-+nvfkbdsj", "-+");
TEST(s3.size() == 4);
TEST(s3.front() == "");
TEST(s3.back() == "nvfkbdsj");
auto s4 = split_with_string("adsf-+qwret-+nvfkbdsj-+", "-+");
TEST(s4.size() == 4);
TEST(s4.front() == "adsf");
TEST(s4.back() == "");
auto s5 = split_with_string("dbo.abc", "dbo.");
TEST(s5.size() == 2);
TEST(s5.front() == "");
TEST(s5.back() == "abc");
auto s6 = split_with_string("dbo.abc", ".");
TEST(s6.size() == 2);
TEST(s6.front() == "dbo");
TEST(s6.back() == "abc");
}
测试输出:
PASS: s0.size() == 1
PASS: s0.front() == "adsf-+qwret-+nvfkbdsj"
PASS: s1.size() == 3
PASS: s1.front() == "adsf"
PASS: s1.back() == "nvfkbdsj"
PASS: s2.size() == 5
PASS: s2.front() == ""
PASS: s2.back() == ""
PASS: s3.size() == 4
PASS: s3.front() == ""
PASS: s3.back() == "nvfkbdsj"
PASS: s4.size() == 4
PASS: s4.front() == "adsf"
PASS: s4.back() == ""
PASS: s5.size() == 2
PASS: s5.front() == ""
PASS: s5.back() == "abc"
PASS: s6.size() == 2
PASS: s6.front() == "dbo"
PASS: s6.back() == "abc"
对于字符串(或单个字符)分隔符,这应该非常有效。不要忘记包含#include <sstream>。
std::string input = "Alfa=,+Bravo=,+Charlie=,+Delta";
std::string delimiter = "=,+";
std::istringstream ss(input);
std::string token;
std::string::iterator it;
while(std::getline(ss, token, *(it = delimiter.begin()))) {
std::cout << token << std::endl; // Token is extracted using '='
it++;
// Skip the rest of delimiter if exists ",+"
while(it != delimiter.end() and ss.peek() == *(it)) {
it++; ss.get();
}
}
第一个while循环使用字符串分隔符的第一个字符提取一个标记。第二个while循环跳过分隔符的其余部分,停在下一个标记的开头。
这段代码从文本中分离行,并将每个行添加到一个向量中。
vector<string> split(char *phrase, string delimiter){
vector<string> list;
string s = string(phrase);
size_t pos = 0;
string token;
while ((pos = s.find(delimiter)) != string::npos) {
token = s.substr(0, pos);
list.push_back(token);
s.erase(0, pos + delimiter.length());
}
list.push_back(s);
return list;
}
调用:
vector<string> listFilesMax = split(buffer, "\n");