我需要在c++中加载和使用CSV文件数据。在这一点上,它实际上只是一个以逗号分隔的解析器(即不用担心转义新行和逗号)。主要需要的是逐行解析器,它将在每次调用方法时为下一行返回一个向量。
我发现这篇文章看起来很有前途: http://www.boost.org/doc/libs/1_35_0/libs/spirit/example/fundamental/list_parser.cpp
我从未使用过Boost's Spirit,但我愿意尝试一下。但前提是我忽略了一个更直接的解决方案。
我需要在c++中加载和使用CSV文件数据。在这一点上,它实际上只是一个以逗号分隔的解析器(即不用担心转义新行和逗号)。主要需要的是逐行解析器,它将在每次调用方法时为下一行返回一个向量。
我发现这篇文章看起来很有前途: http://www.boost.org/doc/libs/1_35_0/libs/spirit/example/fundamental/list_parser.cpp
我从未使用过Boost's Spirit,但我愿意尝试一下。但前提是我忽略了一个更直接的解决方案。
当前回答
另一个类似于Loki Astari的答案的解决方案,在c++ 11中。这里的行是给定类型的std::元组。代码扫描一行,然后扫描到每个分隔符,然后将值直接转换并转储到元组中(使用一些模板代码)。
for (auto row : csv<std::string, int, float>(file, ',')) {
std::cout << "first col: " << std::get<0>(row) << std::endl;
}
优势:
非常干净,使用简单,只有c++ 11。 自动类型转换为std::tuple<t1,…>通过算子>>。
缺少什么:
转义和引用 没有错误处理的情况下畸形的CSV。
主要代码:
#include <iterator>
#include <sstream>
#include <string>
namespace csvtools {
/// Read the last element of the tuple without calling recursively
template <std::size_t idx, class... fields>
typename std::enable_if<idx >= std::tuple_size<std::tuple<fields...>>::value - 1>::type
read_tuple(std::istream &in, std::tuple<fields...> &out, const char delimiter) {
std::string cell;
std::getline(in, cell, delimiter);
std::stringstream cell_stream(cell);
cell_stream >> std::get<idx>(out);
}
/// Read the @p idx-th element of the tuple and then calls itself with @p idx + 1 to
/// read the next element of the tuple. Automatically falls in the previous case when
/// reaches the last element of the tuple thanks to enable_if
template <std::size_t idx, class... fields>
typename std::enable_if<idx < std::tuple_size<std::tuple<fields...>>::value - 1>::type
read_tuple(std::istream &in, std::tuple<fields...> &out, const char delimiter) {
std::string cell;
std::getline(in, cell, delimiter);
std::stringstream cell_stream(cell);
cell_stream >> std::get<idx>(out);
read_tuple<idx + 1, fields...>(in, out, delimiter);
}
}
/// Iterable csv wrapper around a stream. @p fields the list of types that form up a row.
template <class... fields>
class csv {
std::istream &_in;
const char _delim;
public:
typedef std::tuple<fields...> value_type;
class iterator;
/// Construct from a stream.
inline csv(std::istream &in, const char delim) : _in(in), _delim(delim) {}
/// Status of the underlying stream
/// @{
inline bool good() const {
return _in.good();
}
inline const std::istream &underlying_stream() const {
return _in;
}
/// @}
inline iterator begin();
inline iterator end();
private:
/// Reads a line into a stringstream, and then reads the line into a tuple, that is returned
inline value_type read_row() {
std::string line;
std::getline(_in, line);
std::stringstream line_stream(line);
std::tuple<fields...> retval;
csvtools::read_tuple<0, fields...>(line_stream, retval, _delim);
return retval;
}
};
/// Iterator; just calls recursively @ref csv::read_row and stores the result.
template <class... fields>
class csv<fields...>::iterator {
csv::value_type _row;
csv *_parent;
public:
typedef std::input_iterator_tag iterator_category;
typedef csv::value_type value_type;
typedef std::size_t difference_type;
typedef csv::value_type * pointer;
typedef csv::value_type & reference;
/// Construct an empty/end iterator
inline iterator() : _parent(nullptr) {}
/// Construct an iterator at the beginning of the @p parent csv object.
inline iterator(csv &parent) : _parent(parent.good() ? &parent : nullptr) {
++(*this);
}
/// Read one row, if possible. Set to end if parent is not good anymore.
inline iterator &operator++() {
if (_parent != nullptr) {
_row = _parent->read_row();
if (!_parent->good()) {
_parent = nullptr;
}
}
return *this;
}
inline iterator operator++(int) {
iterator copy = *this;
++(*this);
return copy;
}
inline csv::value_type const &operator*() const {
return _row;
}
inline csv::value_type const *operator->() const {
return &_row;
}
bool operator==(iterator const &other) {
return (this == &other) or (_parent == nullptr and other._parent == nullptr);
}
bool operator!=(iterator const &other) {
return not (*this == other);
}
};
template <class... fields>
typename csv<fields...>::iterator csv<fields...>::begin() {
return iterator(*this);
}
template <class... fields>
typename csv<fields...>::iterator csv<fields...>::end() {
return iterator();
}
我在GitHub上放了一个小的工作示例;我一直用它来解析一些数值数据,它达到了它的目的。
其他回答
如果你不关心转义逗号和换行符, 并且你不能在引号中嵌入逗号和换行符(如果你不能转义那么…) 那么它只有大约三行代码(好的14 ->,但它只有15读取整个文件)。
std::vector<std::string> getNextLineAndSplitIntoTokens(std::istream& str)
{
std::vector<std::string> result;
std::string line;
std::getline(str,line);
std::stringstream lineStream(line);
std::string cell;
while(std::getline(lineStream,cell, ','))
{
result.push_back(cell);
}
// This checks for a trailing comma with no data after it.
if (!lineStream && cell.empty())
{
// If there was a trailing comma then add an empty element.
result.push_back("");
}
return result;
}
我只需要创建一个表示一行的类。 然后流到该对象:
#include <iterator>
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <string>
class CSVRow
{
public:
std::string_view operator[](std::size_t index) const
{
return std::string_view(&m_line[m_data[index] + 1], m_data[index + 1] - (m_data[index] + 1));
}
std::size_t size() const
{
return m_data.size() - 1;
}
void readNextRow(std::istream& str)
{
std::getline(str, m_line);
m_data.clear();
m_data.emplace_back(-1);
std::string::size_type pos = 0;
while((pos = m_line.find(',', pos)) != std::string::npos)
{
m_data.emplace_back(pos);
++pos;
}
// This checks for a trailing comma with no data after it.
pos = m_line.size();
m_data.emplace_back(pos);
}
private:
std::string m_line;
std::vector<int> m_data;
};
std::istream& operator>>(std::istream& str, CSVRow& data)
{
data.readNextRow(str);
return str;
}
int main()
{
std::ifstream file("plop.csv");
CSVRow row;
while(file >> row)
{
std::cout << "4th Element(" << row[3] << ")\n";
}
}
但只要做一点工作,我们就可以在技术上创建一个迭代器:
class CSVIterator
{
public:
typedef std::input_iterator_tag iterator_category;
typedef CSVRow value_type;
typedef std::size_t difference_type;
typedef CSVRow* pointer;
typedef CSVRow& reference;
CSVIterator(std::istream& str) :m_str(str.good()?&str:nullptr) { ++(*this); }
CSVIterator() :m_str(nullptr) {}
// Pre Increment
CSVIterator& operator++() {if (m_str) { if (!((*m_str) >> m_row)){m_str = nullptr;}}return *this;}
// Post increment
CSVIterator operator++(int) {CSVIterator tmp(*this);++(*this);return tmp;}
CSVRow const& operator*() const {return m_row;}
CSVRow const* operator->() const {return &m_row;}
bool operator==(CSVIterator const& rhs) {return ((this == &rhs) || ((this->m_str == nullptr) && (rhs.m_str == nullptr)));}
bool operator!=(CSVIterator const& rhs) {return !((*this) == rhs);}
private:
std::istream* m_str;
CSVRow m_row;
};
int main()
{
std::ifstream file("plop.csv");
for(CSVIterator loop(file); loop != CSVIterator(); ++loop)
{
std::cout << "4th Element(" << (*loop)[3] << ")\n";
}
}
现在我们已经到了2020年,让我们添加一个CSVRange对象:
class CSVRange
{
std::istream& stream;
public:
CSVRange(std::istream& str)
: stream(str)
{}
CSVIterator begin() const {return CSVIterator{stream};}
CSVIterator end() const {return CSVIterator{};}
};
int main()
{
std::ifstream file("plop.csv");
for(auto& row: CSVRange(file))
{
std::cout << "4th Element(" << row[3] << ")\n";
}
}
你可能想看看我的自由/开源软件项目CSVfix(更新链接),这是一个用c++编写的CSV流编辑器。CSV解析器不是什么好东西,但它完成了工作,整个包可以在不编写任何代码的情况下满足您的需要。
CSV解析器请参见alib/src/a_csv.cpp,使用示例请参见csvlib/src/csved_ioman.cpp (IOManager::ReadCSV)。
我写了一个只有头文件的c++ 11 CSV解析器。它经过了良好的测试,快速,支持整个CSV规范(带引号的字段,引号中的分隔符/结束符,引号转义等),并且可以配置为不符合规范的CSV。
配置是通过一个流畅的接口完成的:
// constructor accepts any input stream
CsvParser parser = CsvParser(std::cin)
.delimiter(';') // delimited by ; instead of ,
.quote('\'') // quoted fields use ' instead of "
.terminator('\0'); // terminated by \0 instead of by \r\n, \n, or \r
解析只是一个基于范围的for循环:
#include <iostream>
#include "../parser.hpp"
using namespace aria::csv;
int main() {
std::ifstream f("some_file.csv");
CsvParser parser(f);
for (auto& row : parser) {
for (auto& field : row) {
std::cout << field << " | ";
}
std::cout << std::endl;
}
}
如果可以的话,这是我简单快速的贡献。 没有提高。
接受分隔符和分隔符中的分隔符,只要成对或远离分隔符即可。
#include <iostream>
#include <vector>
#include <fstream>
std::vector<std::string> SplitCSV(const std::string &data, char separator, char delimiter)
{
std::vector<std::string> Values;
std::string Val = "";
bool VDel = false; // Is within delimiter?
size_t CDel = 0; // Delimiters counter within delimiters.
const char *C = data.c_str();
size_t P = 0;
do
{
if ((Val.length() == 0) && (C[P] == delimiter))
{
VDel = !VDel;
CDel = 0;
P++;
continue;
}
if (VDel)
{
if (C[P] == delimiter)
{
if (((CDel % 2) == 0) && ( (C[P+1] == separator) || (C[P+1] == 0) || (C[P+1] == '\n') || (C[P+1] == '\r') ))
{
VDel = false;
CDel = 0;
P++;
continue;
}
else
CDel++;
}
}
else
{
if (C[P] == separator)
{
Values.push_back(Val);
Val = "";
P++;
continue;
}
if ((C[P] == 0) || (C[P] == '\n') || (C[P] == '\r'))
break;
}
Val += C[P];
P++;
} while(P < data.length());
Values.push_back(Val);
return Values;
}
bool ReadCsv(const std::string &fname, std::vector<std::vector<std::string>> &data,
char separator = ',', char delimiter = '\"')
{
bool Ret = false;
std::ifstream FCsv(fname);
if (FCsv)
{
FCsv.seekg(0, FCsv.end);
size_t Siz = FCsv.tellg();
if (Siz > 0)
{
FCsv.seekg(0);
data.clear();
std::string Line;
while (getline(FCsv, Line, '\n'))
data.push_back(SplitCSV(Line, separator, delimiter));
Ret = true;
}
FCsv.close();
}
return Ret;
}
int main(int argc, char *argv[])
{
std::vector<std::vector<std::string>> Data;
ReadCsv("fsample.csv", Data);
return 0;
}
我的版本只使用标准c++ 11库。它很好地处理Excel CSV引用:
spam eggs,"foo,bar","""fizz buzz"""
1.23,4.567,-8.00E+09
代码是作为有限状态机编写的,每次只消耗一个字符。我认为这更容易解释。
#include <istream>
#include <string>
#include <vector>
enum class CSVState {
UnquotedField,
QuotedField,
QuotedQuote
};
std::vector<std::string> readCSVRow(const std::string &row) {
CSVState state = CSVState::UnquotedField;
std::vector<std::string> fields {""};
size_t i = 0; // index of the current field
for (char c : row) {
switch (state) {
case CSVState::UnquotedField:
switch (c) {
case ',': // end of field
fields.push_back(""); i++;
break;
case '"': state = CSVState::QuotedField;
break;
default: fields[i].push_back(c);
break; }
break;
case CSVState::QuotedField:
switch (c) {
case '"': state = CSVState::QuotedQuote;
break;
default: fields[i].push_back(c);
break; }
break;
case CSVState::QuotedQuote:
switch (c) {
case ',': // , after closing quote
fields.push_back(""); i++;
state = CSVState::UnquotedField;
break;
case '"': // "" -> "
fields[i].push_back('"');
state = CSVState::QuotedField;
break;
default: // end of quote
state = CSVState::UnquotedField;
break; }
break;
}
}
return fields;
}
/// Read CSV file, Excel dialect. Accept "quoted fields ""with quotes"""
std::vector<std::vector<std::string>> readCSV(std::istream &in) {
std::vector<std::vector<std::string>> table;
std::string row;
while (!in.eof()) {
std::getline(in, row);
if (in.bad() || in.fail()) {
break;
}
auto fields = readCSVRow(row);
table.push_back(fields);
}
return table;
}