我需要在c++中加载和使用CSV文件数据。在这一点上,它实际上只是一个以逗号分隔的解析器(即不用担心转义新行和逗号)。主要需要的是逐行解析器,它将在每次调用方法时为下一行返回一个向量。

我发现这篇文章看起来很有前途: http://www.boost.org/doc/libs/1_35_0/libs/spirit/example/fundamental/list_parser.cpp

我从未使用过Boost's Spirit,但我愿意尝试一下。但前提是我忽略了一个更直接的解决方案。


当前回答

另一个CSV I/O库可以在这里找到:

http://code.google.com/p/fast-cpp-csv-parser/

#include "csv.h"

int main(){
  io::CSVReader<3> in("ram.csv");
  in.read_header(io::ignore_extra_column, "vendor", "size", "speed");
  std::string vendor; int size; double speed;
  while(in.read_row(vendor, size, speed)){
    // do stuff with the data
  }
}

其他回答

如果你不关心转义逗号和换行符, 并且你不能在引号中嵌入逗号和换行符(如果你不能转义那么…) 那么它只有大约三行代码(好的14 ->,但它只有15读取整个文件)。

std::vector<std::string> getNextLineAndSplitIntoTokens(std::istream& str)
{
    std::vector<std::string>   result;
    std::string                line;
    std::getline(str,line);

    std::stringstream          lineStream(line);
    std::string                cell;

    while(std::getline(lineStream,cell, ','))
    {
        result.push_back(cell);
    }
    // This checks for a trailing comma with no data after it.
    if (!lineStream && cell.empty())
    {
        // If there was a trailing comma then add an empty element.
        result.push_back("");
    }
    return result;
}

我只需要创建一个表示一行的类。 然后流到该对象:

#include <iterator>
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <string>

class CSVRow
{
    public:
        std::string_view operator[](std::size_t index) const
        {
            return std::string_view(&m_line[m_data[index] + 1], m_data[index + 1] -  (m_data[index] + 1));
        }
        std::size_t size() const
        {
            return m_data.size() - 1;
        }
        void readNextRow(std::istream& str)
        {
            std::getline(str, m_line);

            m_data.clear();
            m_data.emplace_back(-1);
            std::string::size_type pos = 0;
            while((pos = m_line.find(',', pos)) != std::string::npos)
            {
                m_data.emplace_back(pos);
                ++pos;
            }
            // This checks for a trailing comma with no data after it.
            pos   = m_line.size();
            m_data.emplace_back(pos);
        }
    private:
        std::string         m_line;
        std::vector<int>    m_data;
};

std::istream& operator>>(std::istream& str, CSVRow& data)
{
    data.readNextRow(str);
    return str;
}   
int main()
{
    std::ifstream       file("plop.csv");

    CSVRow              row;
    while(file >> row)
    {
        std::cout << "4th Element(" << row[3] << ")\n";
    }
}

但只要做一点工作,我们就可以在技术上创建一个迭代器:

class CSVIterator
{   
    public:
        typedef std::input_iterator_tag     iterator_category;
        typedef CSVRow                      value_type;
        typedef std::size_t                 difference_type;
        typedef CSVRow*                     pointer;
        typedef CSVRow&                     reference;

        CSVIterator(std::istream& str)  :m_str(str.good()?&str:nullptr) { ++(*this); }
        CSVIterator()                   :m_str(nullptr) {}

        // Pre Increment
        CSVIterator& operator++()               {if (m_str) { if (!((*m_str) >> m_row)){m_str = nullptr;}}return *this;}
        // Post increment
        CSVIterator operator++(int)             {CSVIterator    tmp(*this);++(*this);return tmp;}
        CSVRow const& operator*()   const       {return m_row;}
        CSVRow const* operator->()  const       {return &m_row;}

        bool operator==(CSVIterator const& rhs) {return ((this == &rhs) || ((this->m_str == nullptr) && (rhs.m_str == nullptr)));}
        bool operator!=(CSVIterator const& rhs) {return !((*this) == rhs);}
    private:
        std::istream*       m_str;
        CSVRow              m_row;
};


int main()
{
    std::ifstream       file("plop.csv");

    for(CSVIterator loop(file); loop != CSVIterator(); ++loop)
    {
        std::cout << "4th Element(" << (*loop)[3] << ")\n";
    }
}

现在我们已经到了2020年,让我们添加一个CSVRange对象:

class CSVRange
{
    std::istream&   stream;
    public:
        CSVRange(std::istream& str)
            : stream(str)
        {}
        CSVIterator begin() const {return CSVIterator{stream};}
        CSVIterator end()   const {return CSVIterator{};}
};

int main()
{
    std::ifstream       file("plop.csv");

    for(auto& row: CSVRange(file))
    {
        std::cout << "4th Element(" << row[3] << ")\n";
    }
}

如果您正在使用Visual Studio / MFC,下面的解决方案可能会使您的工作更轻松。它支持Unicode和MBCS,有注释,除了CString之外没有其他依赖项,对我来说工作得很好。它不支持在带引号的字符串中嵌入换行符,但我不在乎,只要它在这种情况下不崩溃,它不会崩溃。

总体策略是,将带引号的字符串和空字符串作为特殊情况处理,其余使用Tokenize。对于带引号的字符串,策略是找到真正的结束引号,跟踪是否遇到了连续的引号对。如果是,则使用Replace将成对转换为单个。毫无疑问,有更有效的方法,但在我的案例中,性能还不够重要,不足以证明进一步优化的合理性。

class CParseCSV {
public:
// Construction
    CParseCSV(const CString& sLine);

// Attributes
    bool    GetString(CString& sDest);

protected:
    CString m_sLine;    // line to extract tokens from
    int     m_nLen;     // line length in characters
    int     m_iPos;     // index of current position
};

CParseCSV::CParseCSV(const CString& sLine) : m_sLine(sLine)
{
    m_nLen = m_sLine.GetLength();
    m_iPos = 0;
}

bool CParseCSV::GetString(CString& sDest)
{
    if (m_iPos < 0 || m_iPos > m_nLen)  // if position out of range
        return false;
    if (m_iPos == m_nLen) { // if at end of string
        sDest.Empty();  // return empty token
        m_iPos = -1;    // really done now
        return true;
    }
    if (m_sLine[m_iPos] == '\"') {  // if current char is double quote
        m_iPos++;   // advance to next char
        int iTokenStart = m_iPos;
        bool    bHasEmbeddedQuotes = false;
        while (m_iPos < m_nLen) {   // while more chars to parse
            if (m_sLine[m_iPos] == '\"') {  // if current char is double quote
                // if next char exists and is also double quote
                if (m_iPos < m_nLen - 1 && m_sLine[m_iPos + 1] == '\"') {
                    // found pair of consecutive double quotes
                    bHasEmbeddedQuotes = true;  // request conversion
                    m_iPos++;   // skip first quote in pair
                } else  // next char doesn't exist or is normal
                    break;  // found closing quote; exit loop
            }
            m_iPos++;   // advance to next char
        }
        sDest = m_sLine.Mid(iTokenStart, m_iPos - iTokenStart);
        if (bHasEmbeddedQuotes) // if string contains embedded quote pairs
            sDest.Replace(_T("\"\""), _T("\""));    // convert pairs to singles
        m_iPos += 2;    // skip closing quote and trailing delimiter if any
    } else if (m_sLine[m_iPos] == ',') {    // else if char is comma
        sDest.Empty();  // return empty token
        m_iPos++;   // advance to next char
    } else {    // else get next comma-delimited token
        sDest = m_sLine.Tokenize(_T(","), m_iPos);
    }
    return true;
}

// calling code should look something like this:

    CStdioFile  fIn(pszPath, CFile::modeRead);
    CString sLine, sToken;
    while (fIn.ReadString(sLine)) { // for each line of input file
        if (!sLine.IsEmpty()) { // ignore blank lines
            CParseCSV   csv(sLine);
            while (csv.GetString(sToken)) {
                // do something with sToken here
            }
        }
    }

可以使用std::regex。

根据文件大小和可用内存,可以逐行读取,也可以完全在std::string中读取。

读取文件可以使用:

std::ifstream t("file.txt");
std::string sin((std::istreambuf_iterator<char>(t)),
                 std::istreambuf_iterator<char>());

然后你可以和这个相匹配,它实际上是根据你的需要定制的。

std::regex word_regex(",\\s]+");
auto what = 
    std::sregex_iterator(sin.begin(), sin.end(), word_regex);
auto wend = std::sregex_iterator();

std::vector<std::string> v;
for (;what!=wend ; wend) {
    std::smatch match = *what;
    v.push_back(match.str());
}

您还可以看看Qt库的功能。

它有正则表达式支持,QString类有很好的方法,例如split()返回QStringList,通过使用提供的分隔符分割原始字符串获得的字符串列表。应该足以为csv文件..

要获得具有给定标题名的列,我使用以下方法:c++继承Qt问题qstring

另一种快速简单的方法是使用Boost。I / O:融合

#include <iostream>
#include <sstream>

#include <boost/fusion/adapted/boost_tuple.hpp>
#include <boost/fusion/sequence/io.hpp>

namespace fusion = boost::fusion;

struct CsvString
{
    std::string value;

    // Stop reading a string once a CSV delimeter is encountered.
    friend std::istream& operator>>(std::istream& s, CsvString& v) {
        v.value.clear();
        for(;;) {
            auto c = s.peek();
            if(std::istream::traits_type::eof() == c || ',' == c || '\n' == c)
                break;
            v.value.push_back(c);
            s.get();
        }
        return s;
    }

    friend std::ostream& operator<<(std::ostream& s, CsvString const& v) {
        return s << v.value;
    }
};

int main() {
    std::stringstream input("abc,123,true,3.14\n"
                            "def,456,false,2.718\n");

    typedef boost::tuple<CsvString, int, bool, double> CsvRow;

    using fusion::operator<<;
    std::cout << std::boolalpha;

    using fusion::operator>>;
    input >> std::boolalpha;
    input >> fusion::tuple_open("") >> fusion::tuple_close("\n") >> fusion::tuple_delimiter(',');

    for(CsvRow row; input >> row;)
        std::cout << row << '\n';
}

输出:

(abc 123 true 3.14)
(def 456 false 2.718)