我需要在c++中加载和使用CSV文件数据。在这一点上,它实际上只是一个以逗号分隔的解析器(即不用担心转义新行和逗号)。主要需要的是逐行解析器,它将在每次调用方法时为下一行返回一个向量。
我发现这篇文章看起来很有前途: http://www.boost.org/doc/libs/1_35_0/libs/spirit/example/fundamental/list_parser.cpp
我从未使用过Boost's Spirit,但我愿意尝试一下。但前提是我忽略了一个更直接的解决方案。
我需要在c++中加载和使用CSV文件数据。在这一点上,它实际上只是一个以逗号分隔的解析器(即不用担心转义新行和逗号)。主要需要的是逐行解析器,它将在每次调用方法时为下一行返回一个向量。
我发现这篇文章看起来很有前途: http://www.boost.org/doc/libs/1_35_0/libs/spirit/example/fundamental/list_parser.cpp
我从未使用过Boost's Spirit,但我愿意尝试一下。但前提是我忽略了一个更直接的解决方案。
当前回答
不好意思,但是为了隐藏几行代码,这似乎是非常复杂的语法。
为什么不这样呢:
/**
Read line from a CSV file
@param[in] fp file pointer to open file
@param[in] vls reference to vector of strings to hold next line
*/
void readCSV( FILE *fp, std::vector<std::string>& vls )
{
vls.clear();
if( ! fp )
return;
char buf[10000];
if( ! fgets( buf,999,fp) )
return;
std::string s = buf;
int p,q;
q = -1;
// loop over columns
while( 1 ) {
p = q;
q = s.find_first_of(",\n",p+1);
if( q == -1 )
break;
vls.push_back( s.substr(p+1,q-p-1) );
}
}
int _tmain(int argc, _TCHAR* argv[])
{
std::vector<std::string> vls;
FILE * fp = fopen( argv[1], "r" );
if( ! fp )
return 1;
readCSV( fp, vls );
readCSV( fp, vls );
readCSV( fp, vls );
std::cout << "row 3, col 4 is " << vls[3].c_str() << "\n";
return 0;
}
其他回答
如果你不关心转义逗号和换行符, 并且你不能在引号中嵌入逗号和换行符(如果你不能转义那么…) 那么它只有大约三行代码(好的14 ->,但它只有15读取整个文件)。
std::vector<std::string> getNextLineAndSplitIntoTokens(std::istream& str)
{
std::vector<std::string> result;
std::string line;
std::getline(str,line);
std::stringstream lineStream(line);
std::string cell;
while(std::getline(lineStream,cell, ','))
{
result.push_back(cell);
}
// This checks for a trailing comma with no data after it.
if (!lineStream && cell.empty())
{
// If there was a trailing comma then add an empty element.
result.push_back("");
}
return result;
}
我只需要创建一个表示一行的类。 然后流到该对象:
#include <iterator>
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <string>
class CSVRow
{
public:
std::string_view operator[](std::size_t index) const
{
return std::string_view(&m_line[m_data[index] + 1], m_data[index + 1] - (m_data[index] + 1));
}
std::size_t size() const
{
return m_data.size() - 1;
}
void readNextRow(std::istream& str)
{
std::getline(str, m_line);
m_data.clear();
m_data.emplace_back(-1);
std::string::size_type pos = 0;
while((pos = m_line.find(',', pos)) != std::string::npos)
{
m_data.emplace_back(pos);
++pos;
}
// This checks for a trailing comma with no data after it.
pos = m_line.size();
m_data.emplace_back(pos);
}
private:
std::string m_line;
std::vector<int> m_data;
};
std::istream& operator>>(std::istream& str, CSVRow& data)
{
data.readNextRow(str);
return str;
}
int main()
{
std::ifstream file("plop.csv");
CSVRow row;
while(file >> row)
{
std::cout << "4th Element(" << row[3] << ")\n";
}
}
但只要做一点工作,我们就可以在技术上创建一个迭代器:
class CSVIterator
{
public:
typedef std::input_iterator_tag iterator_category;
typedef CSVRow value_type;
typedef std::size_t difference_type;
typedef CSVRow* pointer;
typedef CSVRow& reference;
CSVIterator(std::istream& str) :m_str(str.good()?&str:nullptr) { ++(*this); }
CSVIterator() :m_str(nullptr) {}
// Pre Increment
CSVIterator& operator++() {if (m_str) { if (!((*m_str) >> m_row)){m_str = nullptr;}}return *this;}
// Post increment
CSVIterator operator++(int) {CSVIterator tmp(*this);++(*this);return tmp;}
CSVRow const& operator*() const {return m_row;}
CSVRow const* operator->() const {return &m_row;}
bool operator==(CSVIterator const& rhs) {return ((this == &rhs) || ((this->m_str == nullptr) && (rhs.m_str == nullptr)));}
bool operator!=(CSVIterator const& rhs) {return !((*this) == rhs);}
private:
std::istream* m_str;
CSVRow m_row;
};
int main()
{
std::ifstream file("plop.csv");
for(CSVIterator loop(file); loop != CSVIterator(); ++loop)
{
std::cout << "4th Element(" << (*loop)[3] << ")\n";
}
}
现在我们已经到了2020年,让我们添加一个CSVRange对象:
class CSVRange
{
std::istream& stream;
public:
CSVRange(std::istream& str)
: stream(str)
{}
CSVIterator begin() const {return CSVIterator{stream};}
CSVIterator end() const {return CSVIterator{};}
};
int main()
{
std::ifstream file("plop.csv");
for(auto& row: CSVRange(file))
{
std::cout << "4th Element(" << row[3] << ")\n";
}
}
当你使用boost::spirit这样漂亮的东西时,你应该感到自豪
这里我的一个解析器的尝试(几乎)符合这个链接的CSV规范(我不需要在字段中换行)。逗号周围的空格也被省略了)。
在你克服了编译这段代码需要等待10秒的令人震惊的经历之后:),你就可以坐下来享受了。
// csvparser.cpp
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <iostream>
#include <string>
namespace qi = boost::spirit::qi;
namespace bascii = boost::spirit::ascii;
template <typename Iterator>
struct csv_parser : qi::grammar<Iterator, std::vector<std::string>(),
bascii::space_type>
{
qi::rule<Iterator, char() > COMMA;
qi::rule<Iterator, char() > DDQUOTE;
qi::rule<Iterator, std::string(), bascii::space_type > non_escaped;
qi::rule<Iterator, std::string(), bascii::space_type > escaped;
qi::rule<Iterator, std::string(), bascii::space_type > field;
qi::rule<Iterator, std::vector<std::string>(), bascii::space_type > start;
csv_parser() : csv_parser::base_type(start)
{
using namespace qi;
using qi::lit;
using qi::lexeme;
using bascii::char_;
start = field % ',';
field = escaped | non_escaped;
escaped = lexeme['"' >> *( char_ -(char_('"') | ',') | COMMA | DDQUOTE) >> '"'];
non_escaped = lexeme[ *( char_ -(char_('"') | ',') ) ];
DDQUOTE = lit("\"\"") [_val = '"'];
COMMA = lit(",") [_val = ','];
}
};
int main()
{
std::cout << "Enter CSV lines [empty] to quit\n";
using bascii::space;
typedef std::string::const_iterator iterator_type;
typedef csv_parser<iterator_type> csv_parser;
csv_parser grammar;
std::string str;
int fid;
while (getline(std::cin, str))
{
fid = 0;
if (str.empty())
break;
std::vector<std::string> csv;
std::string::const_iterator it_beg = str.begin();
std::string::const_iterator it_end = str.end();
bool r = phrase_parse(it_beg, it_end, grammar, space, csv);
if (r && it_beg == it_end)
{
std::cout << "Parsing succeeded\n";
for (auto& field: csv)
{
std::cout << "field " << ++fid << ": " << field << std::endl;
}
}
else
{
std::cout << "Parsing failed\n";
}
}
return 0;
}
编译:
make csvparser
测试(例子摘自维基百科):
./csvparser
Enter CSV lines [empty] to quit
1999,Chevy,"Venture ""Extended Edition, Very Large""",,5000.00
Parsing succeeded
field 1: 1999
field 2: Chevy
field 3: Venture "Extended Edition, Very Large"
field 4:
field 5: 5000.00
1999,Chevy,"Venture ""Extended Edition, Very Large""",,5000.00"
Parsing failed
如果你不想在你的项目中包含boost(如果你只打算使用它来进行CSV解析,它就相当大了……)
我在这里有幸使用CSV解析:
http://www.zedwood.com/article/112/cpp-csv-parser
它处理带引号的字段-但不处理内联\n字符(这可能对大多数用途都很好)。
就像每个人都把他的解决方案,这里是我的使用模板,lambda和tuple。
它可以将任何具有所需列的CSV转换为tuple的c++向量。
它通过在元组中定义每个CSV行元素类型来工作。
您还需要为每个元素定义std::string到类型转换Formatter lambda(例如使用std::atod)。
然后你就得到了这个结构的一个向量,对应于你的CSV数据。
您可以很容易地重用它来匹配任何CSV结构。
StringsHelpers.hpp
#include <string>
#include <fstream>
#include <vector>
#include <functional>
namespace StringHelpers
{
template<typename Tuple>
using Formatter = std::function<Tuple(const std::vector<std::string> &)>;
std::vector<std::string> split(const std::string &string, const std::string &delimiter);
template<typename Tuple>
std::vector<Tuple> readCsv(const std::string &path, const std::string &delimiter, Formatter<Tuple> formatter);
};
StringsHelpers.cpp
#include "StringHelpers.hpp"
namespace StringHelpers
{
/**
* Split a string with the given delimiter into several strings
*
* @param string - The string to extract the substrings from
* @param delimiter - The substrings delimiter
*
* @return The substrings
*/
std::vector<std::string> split(const std::string &string, const std::string &delimiter)
{
std::vector<std::string> result;
size_t last = 0,
next = 0;
while ((next = string.find(delimiter, last)) != std::string::npos) {
result.emplace_back(string.substr(last, next - last));
last = next + 1;
}
result.emplace_back(string.substr(last));
return result;
}
/**
* Read a CSV file and store its values into the given structure (Tuple with Formatter constructor)
*
* @tparam Tuple - The CSV line structure format
*
* @param path - The CSV file path
* @param delimiter - The CSV values delimiter
* @param formatter - The CSV values formatter that take a vector of strings in input and return a Tuple
*
* @return The CSV as vector of Tuple
*/
template<typename Tuple>
std::vector<Tuple> readCsv(const std::string &path, const std::string &delimiter, Formatter<Tuple> formatter)
{
std::ifstream file(path, std::ifstream::in);
std::string line;
std::vector<Tuple> result;
if (file.fail()) {
throw std::runtime_error("The file " + path + " could not be opened");
}
while (std::getline(file, line)) {
result.emplace_back(formatter(split(line, delimiter)));
}
file.close();
return result;
}
// Forward template declarations
template std::vector<std::tuple<double, double, double>> readCsv<std::tuple<double, double, double>>(const std::string &, const std::string &, Formatter<std::tuple<double, double, double>>);
} // End of StringHelpers namespace
Main.cpp(一些用法)
#include "StringHelpers.hpp"
/**
* Example of use with a CSV file which have (number,Red,Green,Blue) as line values. We do not want to use the 1st value
* of the line.
*/
int main(int argc, char **argv)
{
// Declare CSV line type, formatter and template type
typedef std::tuple<double, double, double> CSV_format;
typedef std::function<CSV_format(const std::vector<std::string> &)> formatterT;
enum RGB { Red = 1, Green, Blue };
const std::string COLOR_MAP_PATH = "/some/absolute/path";
// Load the color map
auto colorMap = StringHelpers::readCsv<CSV_format>(COLOR_MAP_PATH, ",", [](const std::vector<std::string> &values) {
return CSV_format {
// Here is the formatter lambda that convert each value from string to what you want
std::strtod(values[Red].c_str(), nullptr),
std::strtod(values[Green].c_str(), nullptr),
std::strtod(values[Blue].c_str(), nullptr)
};
});
// Use your colorMap as you wish...
}
你可能想看看我的自由/开源软件项目CSVfix(更新链接),这是一个用c++编写的CSV流编辑器。CSV解析器不是什么好东西,但它完成了工作,整个包可以在不编写任何代码的情况下满足您的需要。
CSV解析器请参见alib/src/a_csv.cpp,使用示例请参见csvlib/src/csved_ioman.cpp (IOManager::ReadCSV)。