Java有一个方便的分割方法:
String str = "The quick brown fox";
String[] results = str.split(" ");
在c++中有简单的方法来做到这一点吗?
Java有一个方便的分割方法:
String str = "The quick brown fox";
String[] results = str.split(" ");
在c++中有简单的方法来做到这一点吗?
当前回答
对于简单的事情,我只使用以下方法:
unsigned TokenizeString(const std::string& i_source,
const std::string& i_seperators,
bool i_discard_empty_tokens,
std::vector<std::string>& o_tokens)
{
unsigned prev_pos = 0;
unsigned pos = 0;
unsigned number_of_tokens = 0;
o_tokens.clear();
pos = i_source.find_first_of(i_seperators, pos);
while (pos != std::string::npos)
{
std::string token = i_source.substr(prev_pos, pos - prev_pos);
if (!i_discard_empty_tokens || token != "")
{
o_tokens.push_back(i_source.substr(prev_pos, pos - prev_pos));
number_of_tokens++;
}
pos++;
prev_pos = pos;
pos = i_source.find_first_of(i_seperators, pos);
}
if (prev_pos < i_source.length())
{
o_tokens.push_back(i_source.substr(prev_pos));
number_of_tokens++;
}
return number_of_tokens;
}
懦弱的免责声明:我编写实时数据处理软件,其中数据通过二进制文件、套接字或一些API调用(I/O卡、摄像头)传入。除了在启动时读取外部配置文件以外,我从未将这个函数用于更复杂或时间要求更严格的事情。
其他回答
下面是一个示例标记器类,它可以实现您想要的功能
//Header file
class Tokenizer
{
public:
static const std::string DELIMITERS;
Tokenizer(const std::string& str);
Tokenizer(const std::string& str, const std::string& delimiters);
bool NextToken();
bool NextToken(const std::string& delimiters);
const std::string GetToken() const;
void Reset();
protected:
size_t m_offset;
const std::string m_string;
std::string m_token;
std::string m_delimiters;
};
//CPP file
const std::string Tokenizer::DELIMITERS(" \t\n\r");
Tokenizer::Tokenizer(const std::string& s) :
m_string(s),
m_offset(0),
m_delimiters(DELIMITERS) {}
Tokenizer::Tokenizer(const std::string& s, const std::string& delimiters) :
m_string(s),
m_offset(0),
m_delimiters(delimiters) {}
bool Tokenizer::NextToken()
{
return NextToken(m_delimiters);
}
bool Tokenizer::NextToken(const std::string& delimiters)
{
size_t i = m_string.find_first_not_of(delimiters, m_offset);
if (std::string::npos == i)
{
m_offset = m_string.length();
return false;
}
size_t j = m_string.find_first_of(delimiters, i);
if (std::string::npos == j)
{
m_token = m_string.substr(i);
m_offset = m_string.length();
return true;
}
m_token = m_string.substr(i, j - i);
m_offset = j;
return true;
}
例子:
std::vector <std::string> v;
Tokenizer s("split this string", " ");
while (s.NextToken())
{
v.push_back(s.GetToken());
}
我以前只用标准库做了一个lexer/tokenizer。代码如下:
#include <iostream>
#include <string>
#include <vector>
#include <sstream>
using namespace std;
string seps(string& s) {
if (!s.size()) return "";
stringstream ss;
ss << s[0];
for (int i = 1; i < s.size(); i++) {
ss << '|' << s[i];
}
return ss.str();
}
void Tokenize(string& str, vector<string>& tokens, const string& delimiters = " ")
{
seps(str);
// Skip delimiters at beginning.
string::size_type lastPos = str.find_first_not_of(delimiters, 0);
// Find first "non-delimiter".
string::size_type pos = str.find_first_of(delimiters, lastPos);
while (string::npos != pos || string::npos != lastPos)
{
// Found a token, add it to the vector.
tokens.push_back(str.substr(lastPos, pos - lastPos));
// Skip delimiters. Note the "not_of"
lastPos = str.find_first_not_of(delimiters, pos);
// Find next "non-delimiter"
pos = str.find_first_of(delimiters, lastPos);
}
}
int main(int argc, char *argv[])
{
vector<string> t;
string s = "Tokens for everyone!";
Tokenize(s, t, "|");
for (auto c : t)
cout << c << endl;
system("pause");
return 0;
}
下面是我的Swiss®军刀字符串标记器,用于用空格分隔字符串,处理单引号和双引号包装的字符串,以及从结果中剥离这些字符。我使用RegexBuddy 4。x生成大部分代码片段,但我添加了用于剥离引号和其他一些东西的自定义处理。
#include <string>
#include <locale>
#include <regex>
std::vector<std::wstring> tokenize_string(std::wstring string_to_tokenize) {
std::vector<std::wstring> tokens;
std::wregex re(LR"(("[^"]*"|'[^']*'|[^"' ]+))", std::regex_constants::collate);
std::wsregex_iterator next( string_to_tokenize.begin(),
string_to_tokenize.end(),
re,
std::regex_constants::match_not_null );
std::wsregex_iterator end;
const wchar_t single_quote = L'\'';
const wchar_t double_quote = L'\"';
while ( next != end ) {
std::wsmatch match = *next;
const std::wstring token = match.str( 0 );
next++;
if (token.length() > 2 && (token.front() == double_quote || token.front() == single_quote))
tokens.emplace_back( std::wstring(token.begin()+1, token.begin()+token.length()-1) );
else
tokens.emplace_back(token);
}
return tokens;
}
请看这个例子。它可能对你有帮助。
#include <iostream>
#include <sstream>
using namespace std;
int main ()
{
string tmps;
istringstream is ("the dellimiter is the space");
while (is.good ()) {
is >> tmps;
cout << tmps << "\n";
}
return 0;
}
我知道这个问题已经有了答案,但我想有所贡献。也许我的解决方案有点简单,但这就是我想到的:
vector<string> get_words(string const& text, string const& separator)
{
vector<string> result;
string tmp = text;
size_t first_pos = 0;
size_t second_pos = tmp.find(separator);
while (second_pos != string::npos)
{
if (first_pos != second_pos)
{
string word = tmp.substr(first_pos, second_pos - first_pos);
result.push_back(word);
}
tmp = tmp.substr(second_pos + separator.length());
second_pos = tmp.find(separator);
}
result.push_back(tmp);
return result;
}
如果在我的代码中有更好的方法,或者有什么错误,请评论。
更新:添加通用分隔符