我需要在c++中加载和使用CSV文件数据。在这一点上,它实际上只是一个以逗号分隔的解析器(即不用担心转义新行和逗号)。主要需要的是逐行解析器,它将在每次调用方法时为下一行返回一个向量。
我发现这篇文章看起来很有前途: http://www.boost.org/doc/libs/1_35_0/libs/spirit/example/fundamental/list_parser.cpp
我从未使用过Boost's Spirit,但我愿意尝试一下。但前提是我忽略了一个更直接的解决方案。
我需要在c++中加载和使用CSV文件数据。在这一点上,它实际上只是一个以逗号分隔的解析器(即不用担心转义新行和逗号)。主要需要的是逐行解析器,它将在每次调用方法时为下一行返回一个向量。
我发现这篇文章看起来很有前途: http://www.boost.org/doc/libs/1_35_0/libs/spirit/example/fundamental/list_parser.cpp
我从未使用过Boost's Spirit,但我愿意尝试一下。但前提是我忽略了一个更直接的解决方案。
当前回答
您可以使用fopen,fscanf函数打开和读取.csv文件,但重要的是解析数据。使用分隔符解析数据的最简单方法。对于.csv,分隔符为','。
假设你的data1.csv文件如下所示:
A,45,76,01
B,77,67,02
C,63,76,03
D,65,44,04
您可以标记数据并存储在字符数组中,然后使用atoi()等函数进行适当的转换
FILE *fp;
char str1[10], str2[10], str3[10], str4[10];
fp = fopen("G:\\data1.csv", "r");
if(NULL == fp)
{
printf("\nError in opening file.");
return 0;
}
while(EOF != fscanf(fp, " %[^,], %[^,], %[^,], %s, %s, %s, %s ", str1, str2, str3, str4))
{
printf("\n%s %s %s %s", str1, str2, str3, str4);
}
fclose(fp);
[^,], ^ -它颠倒了逻辑,意思是匹配任何不包含逗号的字符串,然后最后,表示匹配终止前一个字符串的逗号。
其他回答
你可以使用这个库: https://github.com/vadamsky/csvworker
代码示例:
#include <iostream>
#include "csvworker.h"
using namespace std;
int main()
{
//
CsvWorker csv;
csv.loadFromFile("example.csv");
cout << csv.getRowsNumber() << " " << csv.getColumnsNumber() << endl;
csv.getFieldRef(0, 2) = "0";
csv.getFieldRef(1, 1) = "0";
csv.getFieldRef(1, 3) = "0";
csv.getFieldRef(2, 0) = "0";
csv.getFieldRef(2, 4) = "0";
csv.getFieldRef(3, 1) = "0";
csv.getFieldRef(3, 3) = "0";
csv.getFieldRef(4, 2) = "0";
for(unsigned int i=0;i<csv.getRowsNumber();++i)
{
//cout << csv.getRow(i) << endl;
for(unsigned int j=0;j<csv.getColumnsNumber();++j)
{
cout << csv.getField(i, j) << ".";
}
cout << endl;
}
csv.saveToFile("test.csv");
//
CsvWorker csv2(4,4);
csv2.getFieldRef(0, 0) = "a";
csv2.getFieldRef(0, 1) = "b";
csv2.getFieldRef(0, 2) = "r";
csv2.getFieldRef(0, 3) = "a";
csv2.getFieldRef(1, 0) = "c";
csv2.getFieldRef(1, 1) = "a";
csv2.getFieldRef(1, 2) = "d";
csv2.getFieldRef(2, 0) = "a";
csv2.getFieldRef(2, 1) = "b";
csv2.getFieldRef(2, 2) = "r";
csv2.getFieldRef(2, 3) = "a";
csv2.saveToFile("test2.csv");
return 0;
}
当你使用boost::spirit这样漂亮的东西时,你应该感到自豪
这里我的一个解析器的尝试(几乎)符合这个链接的CSV规范(我不需要在字段中换行)。逗号周围的空格也被省略了)。
在你克服了编译这段代码需要等待10秒的令人震惊的经历之后:),你就可以坐下来享受了。
// csvparser.cpp
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <iostream>
#include <string>
namespace qi = boost::spirit::qi;
namespace bascii = boost::spirit::ascii;
template <typename Iterator>
struct csv_parser : qi::grammar<Iterator, std::vector<std::string>(),
bascii::space_type>
{
qi::rule<Iterator, char() > COMMA;
qi::rule<Iterator, char() > DDQUOTE;
qi::rule<Iterator, std::string(), bascii::space_type > non_escaped;
qi::rule<Iterator, std::string(), bascii::space_type > escaped;
qi::rule<Iterator, std::string(), bascii::space_type > field;
qi::rule<Iterator, std::vector<std::string>(), bascii::space_type > start;
csv_parser() : csv_parser::base_type(start)
{
using namespace qi;
using qi::lit;
using qi::lexeme;
using bascii::char_;
start = field % ',';
field = escaped | non_escaped;
escaped = lexeme['"' >> *( char_ -(char_('"') | ',') | COMMA | DDQUOTE) >> '"'];
non_escaped = lexeme[ *( char_ -(char_('"') | ',') ) ];
DDQUOTE = lit("\"\"") [_val = '"'];
COMMA = lit(",") [_val = ','];
}
};
int main()
{
std::cout << "Enter CSV lines [empty] to quit\n";
using bascii::space;
typedef std::string::const_iterator iterator_type;
typedef csv_parser<iterator_type> csv_parser;
csv_parser grammar;
std::string str;
int fid;
while (getline(std::cin, str))
{
fid = 0;
if (str.empty())
break;
std::vector<std::string> csv;
std::string::const_iterator it_beg = str.begin();
std::string::const_iterator it_end = str.end();
bool r = phrase_parse(it_beg, it_end, grammar, space, csv);
if (r && it_beg == it_end)
{
std::cout << "Parsing succeeded\n";
for (auto& field: csv)
{
std::cout << "field " << ++fid << ": " << field << std::endl;
}
}
else
{
std::cout << "Parsing failed\n";
}
}
return 0;
}
编译:
make csvparser
测试(例子摘自维基百科):
./csvparser
Enter CSV lines [empty] to quit
1999,Chevy,"Venture ""Extended Edition, Very Large""",,5000.00
Parsing succeeded
field 1: 1999
field 2: Chevy
field 3: Venture "Extended Edition, Very Large"
field 4:
field 5: 5000.00
1999,Chevy,"Venture ""Extended Edition, Very Large""",,5000.00"
Parsing failed
不好意思,但是为了隐藏几行代码,这似乎是非常复杂的语法。
为什么不这样呢:
/**
Read line from a CSV file
@param[in] fp file pointer to open file
@param[in] vls reference to vector of strings to hold next line
*/
void readCSV( FILE *fp, std::vector<std::string>& vls )
{
vls.clear();
if( ! fp )
return;
char buf[10000];
if( ! fgets( buf,999,fp) )
return;
std::string s = buf;
int p,q;
q = -1;
// loop over columns
while( 1 ) {
p = q;
q = s.find_first_of(",\n",p+1);
if( q == -1 )
break;
vls.push_back( s.substr(p+1,q-p-1) );
}
}
int _tmain(int argc, _TCHAR* argv[])
{
std::vector<std::string> vls;
FILE * fp = fopen( argv[1], "r" );
if( ! fp )
return 1;
readCSV( fp, vls );
readCSV( fp, vls );
readCSV( fp, vls );
std::cout << "row 3, col 4 is " << vls[3].c_str() << "\n";
return 0;
}
下面是Unicode CSV解析器的另一个实现(使用wchar_t)。我写了一部分,乔纳森·莱弗勒写了剩下的部分。
注意:此解析器旨在尽可能地复制Excel的行为,特别是在导入损坏或格式错误的CSV文件时。
这是最初的问题-用多行字段和转义双引号解析CSV文件
这是作为SSCCE(简短,自包含,正确示例)的代码。
#include <stdbool.h>
#include <wchar.h>
#include <wctype.h>
extern const wchar_t *nextCsvField(const wchar_t *p, wchar_t sep, bool *newline);
// Returns a pointer to the start of the next field,
// or zero if this is the last field in the CSV
// p is the start position of the field
// sep is the separator used, i.e. comma or semicolon
// newline says whether the field ends with a newline or with a comma
const wchar_t *nextCsvField(const wchar_t *p, wchar_t sep, bool *newline)
{
// Parse quoted sequences
if ('"' == p[0]) {
p++;
while (1) {
// Find next double-quote
p = wcschr(p, L'"');
// If we don't find it or it's the last symbol
// then this is the last field
if (!p || !p[1])
return 0;
// Check for "", it is an escaped double-quote
if (p[1] != '"')
break;
// Skip the escaped double-quote
p += 2;
}
}
// Find next newline or comma.
wchar_t newline_or_sep[4] = L"\n\r ";
newline_or_sep[2] = sep;
p = wcspbrk(p, newline_or_sep);
// If no newline or separator, this is the last field.
if (!p)
return 0;
// Check if we had newline.
*newline = (p[0] == '\r' || p[0] == '\n');
// Handle "\r\n", otherwise just increment
if (p[0] == '\r' && p[1] == '\n')
p += 2;
else
p++;
return p;
}
static wchar_t *csvFieldData(const wchar_t *fld_s, const wchar_t *fld_e, wchar_t *buffer, size_t buflen)
{
wchar_t *dst = buffer;
wchar_t *end = buffer + buflen - 1;
const wchar_t *src = fld_s;
if (*src == L'"')
{
const wchar_t *p = src + 1;
while (p < fld_e && dst < end)
{
if (p[0] == L'"' && p+1 < fld_s && p[1] == L'"')
{
*dst++ = p[0];
p += 2;
}
else if (p[0] == L'"')
{
p++;
break;
}
else
*dst++ = *p++;
}
src = p;
}
while (src < fld_e && dst < end)
*dst++ = *src++;
if (dst >= end)
return 0;
*dst = L'\0';
return(buffer);
}
static void dissect(const wchar_t *line)
{
const wchar_t *start = line;
const wchar_t *next;
bool eol;
wprintf(L"Input %3zd: [%.*ls]\n", wcslen(line), wcslen(line)-1, line);
while ((next = nextCsvField(start, L',', &eol)) != 0)
{
wchar_t buffer[1024];
wprintf(L"Raw Field: [%.*ls] (eol = %d)\n", (next - start - eol), start, eol);
if (csvFieldData(start, next-1, buffer, sizeof(buffer)/sizeof(buffer[0])) != 0)
wprintf(L"Field %3zd: [%ls]\n", wcslen(buffer), buffer);
start = next;
}
}
static const wchar_t multiline[] =
L"First field of first row,\"This field is multiline\n"
"\n"
"but that's OK because it's enclosed in double quotes, and this\n"
"is an escaped \"\" double quote\" but this one \"\" is not\n"
" \"This is second field of second row, but it is not multiline\n"
" because it doesn't start \n"
" with an immediate double quote\"\n"
;
int main(void)
{
wchar_t line[1024];
while (fgetws(line, sizeof(line)/sizeof(line[0]), stdin))
dissect(line);
dissect(multiline);
return 0;
}
您可以使用仅头文件的Csv::Parser库。
它完全支持RFC 4180,包括字段值中的引号、转义引号和换行。 它只需要标准的c++ (c++ 17)。 它支持在编译时从std::string_view读取CSV数据。 它使用Catch2进行了广泛的测试。