Java有一个方便的分割方法:
String str = "The quick brown fox";
String[] results = str.split(" ");
在c++中有简单的方法来做到这一点吗?
Java有一个方便的分割方法:
String str = "The quick brown fox";
String[] results = str.split(" ");
在c++中有简单的方法来做到这一点吗?
当前回答
下面是一种方法,允许您控制是否包含空标记(如strsep)或排除空标记(如strtok)。
#include <string.h> // for strchr and strlen
/*
* want_empty_tokens==true : include empty tokens, like strsep()
* want_empty_tokens==false : exclude empty tokens, like strtok()
*/
std::vector<std::string> tokenize(const char* src,
char delim,
bool want_empty_tokens)
{
std::vector<std::string> tokens;
if (src and *src != '\0') // defensive
while( true ) {
const char* d = strchr(src, delim);
size_t len = (d)? d-src : strlen(src);
if (len or want_empty_tokens)
tokens.push_back( std::string(src, len) ); // capture token
if (d) src += len+1; else break;
}
return tokens;
}
其他回答
这是一个简单的stl解决方案(~5行!)使用std::find和std::find_first_not_of来处理重复的分隔符(例如空格或句号),以及开头和结尾的分隔符:
#include <string>
#include <vector>
void tokenize(std::string str, std::vector<string> &token_v){
size_t start = str.find_first_not_of(DELIMITER), end=start;
while (start != std::string::npos){
// Find next occurence of delimiter
end = str.find(DELIMITER, start);
// Push back the token found into vector
token_v.push_back(str.substr(start, end-start));
// Skip all occurences of the delimiter to find new start
start = str.find_first_not_of(DELIMITER, end);
}
}
现场试试吧!
下面是一个示例标记器类,它可以实现您想要的功能
//Header file
class Tokenizer
{
public:
static const std::string DELIMITERS;
Tokenizer(const std::string& str);
Tokenizer(const std::string& str, const std::string& delimiters);
bool NextToken();
bool NextToken(const std::string& delimiters);
const std::string GetToken() const;
void Reset();
protected:
size_t m_offset;
const std::string m_string;
std::string m_token;
std::string m_delimiters;
};
//CPP file
const std::string Tokenizer::DELIMITERS(" \t\n\r");
Tokenizer::Tokenizer(const std::string& s) :
m_string(s),
m_offset(0),
m_delimiters(DELIMITERS) {}
Tokenizer::Tokenizer(const std::string& s, const std::string& delimiters) :
m_string(s),
m_offset(0),
m_delimiters(delimiters) {}
bool Tokenizer::NextToken()
{
return NextToken(m_delimiters);
}
bool Tokenizer::NextToken(const std::string& delimiters)
{
size_t i = m_string.find_first_not_of(delimiters, m_offset);
if (std::string::npos == i)
{
m_offset = m_string.length();
return false;
}
size_t j = m_string.find_first_of(delimiters, i);
if (std::string::npos == j)
{
m_token = m_string.substr(i);
m_offset = m_string.length();
return true;
}
m_token = m_string.substr(i, j - i);
m_offset = j;
return true;
}
例子:
std::vector <std::string> v;
Tokenizer s("split this string", " ");
while (s.NextToken())
{
v.push_back(s.GetToken());
}
Boost标记器类可以使这类事情变得非常简单:
#include <iostream>
#include <string>
#include <boost/foreach.hpp>
#include <boost/tokenizer.hpp>
using namespace std;
using namespace boost;
int main(int, char**)
{
string text = "token, test string";
char_separator<char> sep(", ");
tokenizer< char_separator<char> > tokens(text, sep);
BOOST_FOREACH (const string& t, tokens) {
cout << t << "." << endl;
}
}
针对c++ 11更新:
#include <iostream>
#include <string>
#include <boost/tokenizer.hpp>
using namespace std;
using namespace boost;
int main(int, char**)
{
string text = "token, test string";
char_separator<char> sep(", ");
tokenizer<char_separator<char>> tokens(text, sep);
for (const auto& t : tokens) {
cout << t << "." << endl;
}
}
/// split a string into multiple sub strings, based on a separator string
/// for example, if separator="::",
///
/// s = "abc" -> "abc"
///
/// s = "abc::def xy::st:" -> "abc", "def xy" and "st:",
///
/// s = "::abc::" -> "abc"
///
/// s = "::" -> NO sub strings found
///
/// s = "" -> NO sub strings found
///
/// then append the sub-strings to the end of the vector v.
///
/// the idea comes from the findUrls() function of "Accelerated C++", chapt7,
/// findurls.cpp
///
void split(const string& s, const string& sep, vector<string>& v)
{
typedef string::const_iterator iter;
iter b = s.begin(), e = s.end(), i;
iter sep_b = sep.begin(), sep_e = sep.end();
// search through s
while (b != e){
i = search(b, e, sep_b, sep_e);
// no more separator found
if (i == e){
// it's not an empty string
if (b != e)
v.push_back(string(b, e));
break;
}
else if (i == b){
// the separator is found and right at the beginning
// in this case, we need to move on and search for the
// next separator
b = i + sep.length();
}
else{
// found the separator
v.push_back(string(b, i));
b = i;
}
}
}
boost库很好,但并不总是可用的。手工做这些事情也是很好的脑力锻炼。这里我们只使用STL中的std::search()算法,参见上面的代码。
这里有许多过于复杂的建议。试试这个简单的std::string解决方案:
using namespace std;
string someText = ...
string::size_type tokenOff = 0, sepOff = tokenOff;
while (sepOff != string::npos)
{
sepOff = someText.find(' ', sepOff);
string::size_type tokenLen = (sepOff == string::npos) ? sepOff : sepOff++ - tokenOff;
string token = someText.substr(tokenOff, tokenLen);
if (!token.empty())
/* do something with token */;
tokenOff = sepOff;
}