我在c++中使用以下方法解析字符串:
using namespace std;
string parsed,input="text to be parsed";
stringstream input_stringstream(input);
if (getline(input_stringstream,parsed,' '))
{
// do some processing.
}
使用单个字符分隔符进行解析是可以的。但是如果我想使用字符串作为分隔符呢?
例子:我想拆分:
scott>=tiger
用>=作为分隔符,这样我就可以得到斯科特和老虎。
这与其他答案相似,但它使用了string_view。这些是原始字符串的视图。类似于c++20的例子。虽然这将是一个c++17的例子。(编辑以跳过空匹配)
#include <algorithm>
#include <iostream>
#include <string_view>
#include <vector>
std::vector<std::string_view> split(std::string_view buffer,
const std::string_view delimeter = " ") {
std::vector<std::string_view> ret{};
std::decay_t<decltype(std::string_view::npos)> pos{};
while ((pos = buffer.find(delimeter)) != std::string_view::npos) {
const auto match = buffer.substr(0, pos);
if (!match.empty()) ret.push_back(match);
buffer = buffer.substr(pos + delimeter.size());
}
if (!buffer.empty()) ret.push_back(buffer);
return ret;
}
int main() {
const auto split_values = split("1 2 3 4 5 6 7 8 9 10 ");
std::for_each(split_values.begin(), split_values.end(),
[](const auto& str) { std::cout << str << '\n'; });
return split_values.size();
}
下面是一个使用Boost string Algorithms库和Boost Range库将一个字符串与另一个字符串分割的示例。这个解决方案的灵感来自StringAlgo库文档,请参阅Split部分。
下面是split_with_string函数的完整程序,以及全面的测试-用godbolt试试:
#include <iostream>
#include <string>
#include <vector>
#include <boost/algorithm/string.hpp>
#include <boost/range/iterator_range.hpp>
std::vector<std::string> split_with_string(std::string_view s, std::string_view search)
{
if (search.empty()) return {std::string{s}};
std::vector<boost::iterator_range<std::string_view::iterator>> found;
boost::algorithm::ifind_all(found, s, search);
if (found.empty()) return {};
std::vector<std::string> parts;
parts.reserve(found.size() + 2); // a bit more
std::string_view::iterator part_begin = s.cbegin(), part_end;
for (auto& split_found : found)
{
// do not skip empty extracts
part_end = split_found.begin();
parts.emplace_back(part_begin, part_end);
part_begin = split_found.end();
}
if (part_end != s.end())
parts.emplace_back(part_begin, s.end());
return parts;
}
#define TEST(expr) std::cout << ((!(expr)) ? "FAIL" : "PASS") << ": " #expr "\t" << std::endl
int main()
{
auto s0 = split_with_string("adsf-+qwret-+nvfkbdsj", "");
TEST(s0.size() == 1);
TEST(s0.front() == "adsf-+qwret-+nvfkbdsj");
auto s1 = split_with_string("adsf-+qwret-+nvfkbdsj", "-+");
TEST(s1.size() == 3);
TEST(s1.front() == "adsf");
TEST(s1.back() == "nvfkbdsj");
auto s2 = split_with_string("-+adsf-+qwret-+nvfkbdsj-+", "-+");
TEST(s2.size() == 5);
TEST(s2.front() == "");
TEST(s2.back() == "");
auto s3 = split_with_string("-+adsf-+qwret-+nvfkbdsj", "-+");
TEST(s3.size() == 4);
TEST(s3.front() == "");
TEST(s3.back() == "nvfkbdsj");
auto s4 = split_with_string("adsf-+qwret-+nvfkbdsj-+", "-+");
TEST(s4.size() == 4);
TEST(s4.front() == "adsf");
TEST(s4.back() == "");
auto s5 = split_with_string("dbo.abc", "dbo.");
TEST(s5.size() == 2);
TEST(s5.front() == "");
TEST(s5.back() == "abc");
auto s6 = split_with_string("dbo.abc", ".");
TEST(s6.size() == 2);
TEST(s6.front() == "dbo");
TEST(s6.back() == "abc");
}
测试输出:
PASS: s0.size() == 1
PASS: s0.front() == "adsf-+qwret-+nvfkbdsj"
PASS: s1.size() == 3
PASS: s1.front() == "adsf"
PASS: s1.back() == "nvfkbdsj"
PASS: s2.size() == 5
PASS: s2.front() == ""
PASS: s2.back() == ""
PASS: s3.size() == 4
PASS: s3.front() == ""
PASS: s3.back() == "nvfkbdsj"
PASS: s4.size() == 4
PASS: s4.front() == "adsf"
PASS: s4.back() == ""
PASS: s5.size() == 2
PASS: s5.front() == ""
PASS: s5.back() == "abc"
PASS: s6.size() == 2
PASS: s6.front() == "dbo"
PASS: s6.back() == "abc"
我会使用boost::tokenizer。下面的文档解释了如何创建适当的标记器函数:http://www.boost.org/doc/libs/1_52_0/libs/tokenizer/tokenizerfunction.htm
这里有一个对你的案子有用。
struct my_tokenizer_func
{
template<typename It>
bool operator()(It& next, It end, std::string & tok)
{
if (next == end)
return false;
char const * del = ">=";
auto pos = std::search(next, end, del, del + 2);
tok.assign(next, pos);
next = pos;
if (next != end)
std::advance(next, 2);
return true;
}
void reset() {}
};
int main()
{
std::string to_be_parsed = "1) one>=2) two>=3) three>=4) four";
for (auto i : boost::tokenizer<my_tokenizer_func>(to_be_parsed))
std::cout << i << '\n';
}