我需要在c++中加载和使用CSV文件数据。在这一点上,它实际上只是一个以逗号分隔的解析器(即不用担心转义新行和逗号)。主要需要的是逐行解析器,它将在每次调用方法时为下一行返回一个向量。
我发现这篇文章看起来很有前途: http://www.boost.org/doc/libs/1_35_0/libs/spirit/example/fundamental/list_parser.cpp
我从未使用过Boost's Spirit,但我愿意尝试一下。但前提是我忽略了一个更直接的解决方案。
我需要在c++中加载和使用CSV文件数据。在这一点上,它实际上只是一个以逗号分隔的解析器(即不用担心转义新行和逗号)。主要需要的是逐行解析器,它将在每次调用方法时为下一行返回一个向量。
我发现这篇文章看起来很有前途: http://www.boost.org/doc/libs/1_35_0/libs/spirit/example/fundamental/list_parser.cpp
我从未使用过Boost's Spirit,但我愿意尝试一下。但前提是我忽略了一个更直接的解决方案。
当前回答
如果您所需要的只是加载一个双精度数据文件(没有整数,没有文本),那么这里有一个随时可用的函数。
#include <sstream>
#include <fstream>
#include <iterator>
#include <string>
#include <vector>
#include <algorithm>
using namespace std;
/**
* Parse a CSV data file and fill the 2d STL vector "data".
* Limits: only "pure datas" of doubles, not encapsulated by " and without \n inside.
* Further no formatting in the data (e.g. scientific notation)
* It however handles both dots and commas as decimal separators and removes thousand separator.
*
* returnCodes[0]: file access 0-> ok 1-> not able to read; 2-> decimal separator equal to comma separator
* returnCodes[1]: number of records
* returnCodes[2]: number of fields. -1 If rows have different field size
*
*/
vector<int>
readCsvData (vector <vector <double>>& data, const string& filename, const string& delimiter, const string& decseparator){
int vv[3] = { 0,0,0 };
vector<int> returnCodes(&vv[0], &vv[0]+3);
string rowstring, stringtoken;
double doubletoken;
int rowcount=0;
int fieldcount=0;
data.clear();
ifstream iFile(filename, ios_base::in);
if (!iFile.is_open()){
returnCodes[0] = 1;
return returnCodes;
}
while (getline(iFile, rowstring)) {
if (rowstring=="") continue; // empty line
rowcount ++; //let's start with 1
if(delimiter == decseparator){
returnCodes[0] = 2;
return returnCodes;
}
if(decseparator != "."){
// remove dots (used as thousand separators)
string::iterator end_pos = remove(rowstring.begin(), rowstring.end(), '.');
rowstring.erase(end_pos, rowstring.end());
// replace decimal separator with dots.
replace(rowstring.begin(), rowstring.end(),decseparator.c_str()[0], '.');
} else {
// remove commas (used as thousand separators)
string::iterator end_pos = remove(rowstring.begin(), rowstring.end(), ',');
rowstring.erase(end_pos, rowstring.end());
}
// tokenize..
vector<double> tokens;
// Skip delimiters at beginning.
string::size_type lastPos = rowstring.find_first_not_of(delimiter, 0);
// Find first "non-delimiter".
string::size_type pos = rowstring.find_first_of(delimiter, lastPos);
while (string::npos != pos || string::npos != lastPos){
// Found a token, convert it to double add it to the vector.
stringtoken = rowstring.substr(lastPos, pos - lastPos);
if (stringtoken == "") {
tokens.push_back(0.0);
} else {
istringstream totalSString(stringtoken);
totalSString >> doubletoken;
tokens.push_back(doubletoken);
}
// Skip delimiters. Note the "not_of"
lastPos = rowstring.find_first_not_of(delimiter, pos);
// Find next "non-delimiter"
pos = rowstring.find_first_of(delimiter, lastPos);
}
if(rowcount == 1){
fieldcount = tokens.size();
returnCodes[2] = tokens.size();
} else {
if ( tokens.size() != fieldcount){
returnCodes[2] = -1;
}
}
data.push_back(tokens);
}
iFile.close();
returnCodes[1] = rowcount;
return returnCodes;
}
其他回答
不好意思,但是为了隐藏几行代码,这似乎是非常复杂的语法。
为什么不这样呢:
/**
Read line from a CSV file
@param[in] fp file pointer to open file
@param[in] vls reference to vector of strings to hold next line
*/
void readCSV( FILE *fp, std::vector<std::string>& vls )
{
vls.clear();
if( ! fp )
return;
char buf[10000];
if( ! fgets( buf,999,fp) )
return;
std::string s = buf;
int p,q;
q = -1;
// loop over columns
while( 1 ) {
p = q;
q = s.find_first_of(",\n",p+1);
if( q == -1 )
break;
vls.push_back( s.substr(p+1,q-p-1) );
}
}
int _tmain(int argc, _TCHAR* argv[])
{
std::vector<std::string> vls;
FILE * fp = fopen( argv[1], "r" );
if( ! fp )
return 1;
readCSV( fp, vls );
readCSV( fp, vls );
readCSV( fp, vls );
std::cout << "row 3, col 4 is " << vls[3].c_str() << "\n";
return 0;
}
你可能想看看我的自由/开源软件项目CSVfix(更新链接),这是一个用c++编写的CSV流编辑器。CSV解析器不是什么好东西,但它完成了工作,整个包可以在不编写任何代码的情况下满足您的需要。
CSV解析器请参见alib/src/a_csv.cpp,使用示例请参见csvlib/src/csved_ioman.cpp (IOManager::ReadCSV)。
c++ String工具箱库(StrTk)有一个令牌网格类,它允许你从文本文件、字符串或字符缓冲区加载数据,并以行-列的方式解析/处理它们。
您可以指定行分隔符和列分隔符,或者只使用默认值。
void foo()
{
std::string data = "1,2,3,4,5\n"
"0,2,4,6,8\n"
"1,3,5,7,9\n";
strtk::token_grid grid(data,data.size(),",");
for(std::size_t i = 0; i < grid.row_count(); ++i)
{
strtk::token_grid::row_type r = grid.row(i);
for(std::size_t j = 0; j < r.size(); ++j)
{
std::cout << r.get<int>(j) << "\t";
}
std::cout << std::endl;
}
std::cout << std::endl;
}
更多的例子可以在这里找到
如果你不关心转义逗号和换行符, 并且你不能在引号中嵌入逗号和换行符(如果你不能转义那么…) 那么它只有大约三行代码(好的14 ->,但它只有15读取整个文件)。
std::vector<std::string> getNextLineAndSplitIntoTokens(std::istream& str)
{
std::vector<std::string> result;
std::string line;
std::getline(str,line);
std::stringstream lineStream(line);
std::string cell;
while(std::getline(lineStream,cell, ','))
{
result.push_back(cell);
}
// This checks for a trailing comma with no data after it.
if (!lineStream && cell.empty())
{
// If there was a trailing comma then add an empty element.
result.push_back("");
}
return result;
}
我只需要创建一个表示一行的类。 然后流到该对象:
#include <iterator>
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <string>
class CSVRow
{
public:
std::string_view operator[](std::size_t index) const
{
return std::string_view(&m_line[m_data[index] + 1], m_data[index + 1] - (m_data[index] + 1));
}
std::size_t size() const
{
return m_data.size() - 1;
}
void readNextRow(std::istream& str)
{
std::getline(str, m_line);
m_data.clear();
m_data.emplace_back(-1);
std::string::size_type pos = 0;
while((pos = m_line.find(',', pos)) != std::string::npos)
{
m_data.emplace_back(pos);
++pos;
}
// This checks for a trailing comma with no data after it.
pos = m_line.size();
m_data.emplace_back(pos);
}
private:
std::string m_line;
std::vector<int> m_data;
};
std::istream& operator>>(std::istream& str, CSVRow& data)
{
data.readNextRow(str);
return str;
}
int main()
{
std::ifstream file("plop.csv");
CSVRow row;
while(file >> row)
{
std::cout << "4th Element(" << row[3] << ")\n";
}
}
但只要做一点工作,我们就可以在技术上创建一个迭代器:
class CSVIterator
{
public:
typedef std::input_iterator_tag iterator_category;
typedef CSVRow value_type;
typedef std::size_t difference_type;
typedef CSVRow* pointer;
typedef CSVRow& reference;
CSVIterator(std::istream& str) :m_str(str.good()?&str:nullptr) { ++(*this); }
CSVIterator() :m_str(nullptr) {}
// Pre Increment
CSVIterator& operator++() {if (m_str) { if (!((*m_str) >> m_row)){m_str = nullptr;}}return *this;}
// Post increment
CSVIterator operator++(int) {CSVIterator tmp(*this);++(*this);return tmp;}
CSVRow const& operator*() const {return m_row;}
CSVRow const* operator->() const {return &m_row;}
bool operator==(CSVIterator const& rhs) {return ((this == &rhs) || ((this->m_str == nullptr) && (rhs.m_str == nullptr)));}
bool operator!=(CSVIterator const& rhs) {return !((*this) == rhs);}
private:
std::istream* m_str;
CSVRow m_row;
};
int main()
{
std::ifstream file("plop.csv");
for(CSVIterator loop(file); loop != CSVIterator(); ++loop)
{
std::cout << "4th Element(" << (*loop)[3] << ")\n";
}
}
现在我们已经到了2020年,让我们添加一个CSVRange对象:
class CSVRange
{
std::istream& stream;
public:
CSVRange(std::istream& str)
: stream(str)
{}
CSVIterator begin() const {return CSVIterator{stream};}
CSVIterator end() const {return CSVIterator{};}
};
int main()
{
std::ifstream file("plop.csv");
for(auto& row: CSVRange(file))
{
std::cout << "4th Element(" << row[3] << ")\n";
}
}
如果您所需要的只是加载一个双精度数据文件(没有整数,没有文本),那么这里有一个随时可用的函数。
#include <sstream>
#include <fstream>
#include <iterator>
#include <string>
#include <vector>
#include <algorithm>
using namespace std;
/**
* Parse a CSV data file and fill the 2d STL vector "data".
* Limits: only "pure datas" of doubles, not encapsulated by " and without \n inside.
* Further no formatting in the data (e.g. scientific notation)
* It however handles both dots and commas as decimal separators and removes thousand separator.
*
* returnCodes[0]: file access 0-> ok 1-> not able to read; 2-> decimal separator equal to comma separator
* returnCodes[1]: number of records
* returnCodes[2]: number of fields. -1 If rows have different field size
*
*/
vector<int>
readCsvData (vector <vector <double>>& data, const string& filename, const string& delimiter, const string& decseparator){
int vv[3] = { 0,0,0 };
vector<int> returnCodes(&vv[0], &vv[0]+3);
string rowstring, stringtoken;
double doubletoken;
int rowcount=0;
int fieldcount=0;
data.clear();
ifstream iFile(filename, ios_base::in);
if (!iFile.is_open()){
returnCodes[0] = 1;
return returnCodes;
}
while (getline(iFile, rowstring)) {
if (rowstring=="") continue; // empty line
rowcount ++; //let's start with 1
if(delimiter == decseparator){
returnCodes[0] = 2;
return returnCodes;
}
if(decseparator != "."){
// remove dots (used as thousand separators)
string::iterator end_pos = remove(rowstring.begin(), rowstring.end(), '.');
rowstring.erase(end_pos, rowstring.end());
// replace decimal separator with dots.
replace(rowstring.begin(), rowstring.end(),decseparator.c_str()[0], '.');
} else {
// remove commas (used as thousand separators)
string::iterator end_pos = remove(rowstring.begin(), rowstring.end(), ',');
rowstring.erase(end_pos, rowstring.end());
}
// tokenize..
vector<double> tokens;
// Skip delimiters at beginning.
string::size_type lastPos = rowstring.find_first_not_of(delimiter, 0);
// Find first "non-delimiter".
string::size_type pos = rowstring.find_first_of(delimiter, lastPos);
while (string::npos != pos || string::npos != lastPos){
// Found a token, convert it to double add it to the vector.
stringtoken = rowstring.substr(lastPos, pos - lastPos);
if (stringtoken == "") {
tokens.push_back(0.0);
} else {
istringstream totalSString(stringtoken);
totalSString >> doubletoken;
tokens.push_back(doubletoken);
}
// Skip delimiters. Note the "not_of"
lastPos = rowstring.find_first_not_of(delimiter, pos);
// Find next "non-delimiter"
pos = rowstring.find_first_of(delimiter, lastPos);
}
if(rowcount == 1){
fieldcount = tokens.size();
returnCodes[2] = tokens.size();
} else {
if ( tokens.size() != fieldcount){
returnCodes[2] = -1;
}
}
data.push_back(tokens);
}
iFile.close();
returnCodes[1] = rowcount;
return returnCodes;
}