我需要在c++中加载和使用CSV文件数据。在这一点上,它实际上只是一个以逗号分隔的解析器(即不用担心转义新行和逗号)。主要需要的是逐行解析器,它将在每次调用方法时为下一行返回一个向量。
我发现这篇文章看起来很有前途: http://www.boost.org/doc/libs/1_35_0/libs/spirit/example/fundamental/list_parser.cpp
我从未使用过Boost's Spirit,但我愿意尝试一下。但前提是我忽略了一个更直接的解决方案。
我需要在c++中加载和使用CSV文件数据。在这一点上,它实际上只是一个以逗号分隔的解析器(即不用担心转义新行和逗号)。主要需要的是逐行解析器,它将在每次调用方法时为下一行返回一个向量。
我发现这篇文章看起来很有前途: http://www.boost.org/doc/libs/1_35_0/libs/spirit/example/fundamental/list_parser.cpp
我从未使用过Boost's Spirit,但我愿意尝试一下。但前提是我忽略了一个更直接的解决方案。
当前回答
我需要一个易于使用的c++库来解析CSV文件,但找不到任何可用的库,所以我最终构建了一个。 Rapidcsv是一个c++ 11的纯头库,它可以直接访问已解析的列(或行),作为选择的数据类型的向量。例如:
#include <iostream>
#include <vector>
#include <rapidcsv.h>
int main()
{
rapidcsv::Document doc("../tests/msft.csv");
std::vector<float> close = doc.GetColumn<float>("Close");
std::cout << "Read " << close.size() << " values." << std::endl;
}
其他回答
不管怎样,下面是我的实现。它处理wstring输入,但是可以很容易地调整为string。它不处理字段中的换行符(因为我的应用程序也不这样做,但添加它的支持并不太难),它不符合RFC中的“\r\n”行尾(假设您使用std::getline),但它确实正确地处理空格修剪和双引号(希望如此)。
using namespace std;
// trim whitespaces around field or double-quotes, remove double-quotes and replace escaped double-quotes (double double-quotes)
wstring trimquote(const wstring& str, const wstring& whitespace, const wchar_t quotChar)
{
wstring ws;
wstring::size_type strBegin = str.find_first_not_of(whitespace);
if (strBegin == wstring::npos)
return L"";
wstring::size_type strEnd = str.find_last_not_of(whitespace);
wstring::size_type strRange = strEnd - strBegin + 1;
if((str[strBegin] == quotChar) && (str[strEnd] == quotChar))
{
ws = str.substr(strBegin+1, strRange-2);
strBegin = 0;
while((strEnd = ws.find(quotChar, strBegin)) != wstring::npos)
{
ws.erase(strEnd, 1);
strBegin = strEnd+1;
}
}
else
ws = str.substr(strBegin, strRange);
return ws;
}
pair<unsigned, unsigned> nextCSVQuotePair(const wstring& line, const wchar_t quotChar, unsigned ofs = 0)
{
pair<unsigned, unsigned> r;
r.first = line.find(quotChar, ofs);
r.second = wstring::npos;
if(r.first != wstring::npos)
{
r.second = r.first;
while(((r.second = line.find(quotChar, r.second+1)) != wstring::npos)
&& (line[r.second+1] == quotChar)) // WARNING: assumes null-terminated string such that line[r.second+1] always exist
r.second++;
}
return r;
}
unsigned parseLine(vector<wstring>& fields, const wstring& line)
{
unsigned ofs, ofs0, np;
const wchar_t delim = L',';
const wstring whitespace = L" \t\xa0\x3000\x2000\x2001\x2002\x2003\x2004\x2005\x2006\x2007\x2008\x2009\x200a\x202f\x205f";
const wchar_t quotChar = L'\"';
pair<unsigned, unsigned> quot;
fields.clear();
ofs = ofs0 = 0;
quot = nextCSVQuotePair(line, quotChar);
while((np = line.find(delim, ofs)) != wstring::npos)
{
if((np > quot.first) && (np < quot.second))
{ // skip delimiter inside quoted field
ofs = quot.second+1;
quot = nextCSVQuotePair(line, quotChar, ofs);
continue;
}
fields.push_back( trimquote(line.substr(ofs0, np-ofs0), whitespace, quotChar) );
ofs = ofs0 = np+1;
}
fields.push_back( trimquote(line.substr(ofs0), whitespace, quotChar) );
return fields.size();
}
您可以使用仅头文件的Csv::Parser库。
它完全支持RFC 4180,包括字段值中的引号、转义引号和换行。 它只需要标准的c++ (c++ 17)。 它支持在编译时从std::string_view读取CSV数据。 它使用Catch2进行了广泛的测试。
我写了一个很好的解析CSV文件的方法,我认为我应该把它作为一个答案:
#include <algorithm>
#include <fstream>
#include <iostream>
#include <stdlib.h>
#include <stdio.h>
struct CSVDict
{
std::vector< std::string > inputImages;
std::vector< double > inputLabels;
};
/**
\brief Splits the string
\param str String to split
\param delim Delimiter on the basis of which splitting is to be done
\return results Output in the form of vector of strings
*/
std::vector<std::string> stringSplit( const std::string &str, const std::string &delim )
{
std::vector<std::string> results;
for (size_t i = 0; i < str.length(); i++)
{
std::string tempString = "";
while ((str[i] != *delim.c_str()) && (i < str.length()))
{
tempString += str[i];
i++;
}
results.push_back(tempString);
}
return results;
}
/**
\brief Parse the supplied CSV File and obtain Row and Column information.
Assumptions:
1. Header information is in first row
2. Delimiters are only used to differentiate cell members
\param csvFileName The full path of the file to parse
\param inputColumns The string of input columns which contain the data to be used for further processing
\param inputLabels The string of input labels based on which further processing is to be done
\param delim The delimiters used in inputColumns and inputLabels
\return Vector of Vector of strings: Collection of rows and columns
*/
std::vector< CSVDict > parseCSVFile( const std::string &csvFileName, const std::string &inputColumns, const std::string &inputLabels, const std::string &delim )
{
std::vector< CSVDict > return_CSVDict;
std::vector< std::string > inputColumnsVec = stringSplit(inputColumns, delim), inputLabelsVec = stringSplit(inputLabels, delim);
std::vector< std::vector< std::string > > returnVector;
std::ifstream inFile(csvFileName.c_str());
int row = 0;
std::vector< size_t > inputColumnIndeces, inputLabelIndeces;
for (std::string line; std::getline(inFile, line, '\n');)
{
CSVDict tempDict;
std::vector< std::string > rowVec;
line.erase(std::remove(line.begin(), line.end(), '"'), line.end());
rowVec = stringSplit(line, delim);
// for the first row, record the indeces of the inputColumns and inputLabels
if (row == 0)
{
for (size_t i = 0; i < rowVec.size(); i++)
{
for (size_t j = 0; j < inputColumnsVec.size(); j++)
{
if (rowVec[i] == inputColumnsVec[j])
{
inputColumnIndeces.push_back(i);
}
}
for (size_t j = 0; j < inputLabelsVec.size(); j++)
{
if (rowVec[i] == inputLabelsVec[j])
{
inputLabelIndeces.push_back(i);
}
}
}
}
else
{
for (size_t i = 0; i < inputColumnIndeces.size(); i++)
{
tempDict.inputImages.push_back(rowVec[inputColumnIndeces[i]]);
}
for (size_t i = 0; i < inputLabelIndeces.size(); i++)
{
double test = std::atof(rowVec[inputLabelIndeces[i]].c_str());
tempDict.inputLabels.push_back(std::atof(rowVec[inputLabelIndeces[i]].c_str()));
}
return_CSVDict.push_back(tempDict);
}
row++;
}
return return_CSVDict;
}
下面是读取矩阵的代码,注意你在matlab中也有一个csvwrite函数
void loadFromCSV( const std::string& filename )
{
std::ifstream file( filename.c_str() );
std::vector< std::vector<std::string> > matrix;
std::vector<std::string> row;
std::string line;
std::string cell;
while( file )
{
std::getline(file,line);
std::stringstream lineStream(line);
row.clear();
while( std::getline( lineStream, cell, ',' ) )
row.push_back( cell );
if( !row.empty() )
matrix.push_back( row );
}
for( int i=0; i<int(matrix.size()); i++ )
{
for( int j=0; j<int(matrix[i].size()); j++ )
std::cout << matrix[i][j] << " ";
std::cout << std::endl;
}
}
当你使用boost::spirit这样漂亮的东西时,你应该感到自豪
这里我的一个解析器的尝试(几乎)符合这个链接的CSV规范(我不需要在字段中换行)。逗号周围的空格也被省略了)。
在你克服了编译这段代码需要等待10秒的令人震惊的经历之后:),你就可以坐下来享受了。
// csvparser.cpp
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <iostream>
#include <string>
namespace qi = boost::spirit::qi;
namespace bascii = boost::spirit::ascii;
template <typename Iterator>
struct csv_parser : qi::grammar<Iterator, std::vector<std::string>(),
bascii::space_type>
{
qi::rule<Iterator, char() > COMMA;
qi::rule<Iterator, char() > DDQUOTE;
qi::rule<Iterator, std::string(), bascii::space_type > non_escaped;
qi::rule<Iterator, std::string(), bascii::space_type > escaped;
qi::rule<Iterator, std::string(), bascii::space_type > field;
qi::rule<Iterator, std::vector<std::string>(), bascii::space_type > start;
csv_parser() : csv_parser::base_type(start)
{
using namespace qi;
using qi::lit;
using qi::lexeme;
using bascii::char_;
start = field % ',';
field = escaped | non_escaped;
escaped = lexeme['"' >> *( char_ -(char_('"') | ',') | COMMA | DDQUOTE) >> '"'];
non_escaped = lexeme[ *( char_ -(char_('"') | ',') ) ];
DDQUOTE = lit("\"\"") [_val = '"'];
COMMA = lit(",") [_val = ','];
}
};
int main()
{
std::cout << "Enter CSV lines [empty] to quit\n";
using bascii::space;
typedef std::string::const_iterator iterator_type;
typedef csv_parser<iterator_type> csv_parser;
csv_parser grammar;
std::string str;
int fid;
while (getline(std::cin, str))
{
fid = 0;
if (str.empty())
break;
std::vector<std::string> csv;
std::string::const_iterator it_beg = str.begin();
std::string::const_iterator it_end = str.end();
bool r = phrase_parse(it_beg, it_end, grammar, space, csv);
if (r && it_beg == it_end)
{
std::cout << "Parsing succeeded\n";
for (auto& field: csv)
{
std::cout << "field " << ++fid << ": " << field << std::endl;
}
}
else
{
std::cout << "Parsing failed\n";
}
}
return 0;
}
编译:
make csvparser
测试(例子摘自维基百科):
./csvparser
Enter CSV lines [empty] to quit
1999,Chevy,"Venture ""Extended Edition, Very Large""",,5000.00
Parsing succeeded
field 1: 1999
field 2: Chevy
field 3: Venture "Extended Edition, Very Large"
field 4:
field 5: 5000.00
1999,Chevy,"Venture ""Extended Edition, Very Large""",,5000.00"
Parsing failed