我需要在c++中加载和使用CSV文件数据。在这一点上,它实际上只是一个以逗号分隔的解析器(即不用担心转义新行和逗号)。主要需要的是逐行解析器,它将在每次调用方法时为下一行返回一个向量。

我发现这篇文章看起来很有前途: http://www.boost.org/doc/libs/1_35_0/libs/spirit/example/fundamental/list_parser.cpp

我从未使用过Boost's Spirit,但我愿意尝试一下。但前提是我忽略了一个更直接的解决方案。


当前回答

c++ String工具箱库(StrTk)有一个令牌网格类,它允许你从文本文件、字符串或字符缓冲区加载数据,并以行-列的方式解析/处理它们。

您可以指定行分隔符和列分隔符,或者只使用默认值。

void foo()
{
   std::string data = "1,2,3,4,5\n"
                      "0,2,4,6,8\n"
                      "1,3,5,7,9\n";

   strtk::token_grid grid(data,data.size(),",");

   for(std::size_t i = 0; i < grid.row_count(); ++i)
   {
      strtk::token_grid::row_type r = grid.row(i);
      for(std::size_t j = 0; j < r.size(); ++j)
      {
         std::cout << r.get<int>(j) << "\t";
      }
      std::cout << std::endl;
   }
   std::cout << std::endl;
}

更多的例子可以在这里找到

其他回答

您可以使用fopen,fscanf函数打开和读取.csv文件,但重要的是解析数据。使用分隔符解析数据的最简单方法。对于.csv,分隔符为','。

假设你的data1.csv文件如下所示:

A,45,76,01
B,77,67,02
C,63,76,03
D,65,44,04

您可以标记数据并存储在字符数组中,然后使用atoi()等函数进行适当的转换

FILE *fp;
char str1[10], str2[10], str3[10], str4[10];

fp = fopen("G:\\data1.csv", "r");
if(NULL == fp)
{
    printf("\nError in opening file.");
    return 0;
}
while(EOF != fscanf(fp, " %[^,], %[^,], %[^,], %s, %s, %s, %s ", str1, str2, str3, str4))
{
    printf("\n%s %s %s %s", str1, str2, str3, str4);
}
fclose(fp);

[^,], ^ -它颠倒了逻辑,意思是匹配任何不包含逗号的字符串,然后最后,表示匹配终止前一个字符串的逗号。

由于所有CSV问题似乎都被重定向到这里,我想我应该在这里发布我的答案。这个回答并没有直接回答提问者的问题。我希望能够读取已知的CSV格式的流,而且每个字段的类型都已经知道。当然,可以使用下面的方法将每个字段处理为字符串类型。

作为我希望能够使用CSV输入流的一个例子,考虑以下输入(取自维基百科的CSV页面):

const char input[] =
"Year,Make,Model,Description,Price\n"
"1997,Ford,E350,\"ac, abs, moon\",3000.00\n"
"1999,Chevy,\"Venture \"\"Extended Edition\"\"\",\"\",4900.00\n"
"1999,Chevy,\"Venture \"\"Extended Edition, Very Large\"\"\",\"\",5000.00\n"
"1996,Jeep,Grand Cherokee,\"MUST SELL!\n\
air, moon roof, loaded\",4799.00\n"
;

然后,我希望能够像这样读取数据:

std::istringstream ss(input);
std::string title[5];
int year;
std::string make, model, desc;
float price;
csv_istream(ss)
    >> title[0] >> title[1] >> title[2] >> title[3] >> title[4];
while (csv_istream(ss)
       >> year >> make >> model >> desc >> price) {
    //...do something with the record...
}

这就是我最后得到的解。

struct csv_istream {
    std::istream &is_;
    csv_istream (std::istream &is) : is_(is) {}
    void scan_ws () const {
        while (is_.good()) {
            int c = is_.peek();
            if (c != ' ' && c != '\t') break;
            is_.get();
        }
    }
    void scan (std::string *s = 0) const {
        std::string ws;
        int c = is_.get();
        if (is_.good()) {
            do {
                if (c == ',' || c == '\n') break;
                if (s) {
                    ws += c;
                    if (c != ' ' && c != '\t') {
                        *s += ws;
                        ws.clear();
                    }
                }
                c = is_.get();
            } while (is_.good());
            if (is_.eof()) is_.clear();
        }
    }
    template <typename T, bool> struct set_value {
        void operator () (std::string in, T &v) const {
            std::istringstream(in) >> v;
        }
    };
    template <typename T> struct set_value<T, true> {
        template <bool SIGNED> void convert (std::string in, T &v) const {
            if (SIGNED) v = ::strtoll(in.c_str(), 0, 0);
            else v = ::strtoull(in.c_str(), 0, 0);
        }
        void operator () (std::string in, T &v) const {
            convert<is_signed_int<T>::val>(in, v);
        }
    };
    template <typename T> const csv_istream & operator >> (T &v) const {
        std::string tmp;
        scan(&tmp);
        set_value<T, is_int<T>::val>()(tmp, v);
        return *this;
    }
    const csv_istream & operator >> (std::string &v) const {
        v.clear();
        scan_ws();
        if (is_.peek() != '"') scan(&v);
        else {
            std::string tmp;
            is_.get();
            std::getline(is_, tmp, '"');
            while (is_.peek() == '"') {
                v += tmp;
                v += is_.get();
                std::getline(is_, tmp, '"');
            }
            v += tmp;
            scan();
        }
        return *this;
    }
    template <typename T>
    const csv_istream & operator >> (T &(*manip)(T &)) const {
        is_ >> manip;
        return *this;
    }
    operator bool () const { return !is_.fail(); }
};

使用以下helper,可以通过c++ 11中的新积分特征模板进行简化:

template <typename T> struct is_signed_int { enum { val = false }; };
template <> struct is_signed_int<short> { enum { val = true}; };
template <> struct is_signed_int<int> { enum { val = true}; };
template <> struct is_signed_int<long> { enum { val = true}; };
template <> struct is_signed_int<long long> { enum { val = true}; };

template <typename T> struct is_unsigned_int { enum { val = false }; };
template <> struct is_unsigned_int<unsigned short> { enum { val = true}; };
template <> struct is_unsigned_int<unsigned int> { enum { val = true}; };
template <> struct is_unsigned_int<unsigned long> { enum { val = true}; };
template <> struct is_unsigned_int<unsigned long long> { enum { val = true}; };

template <typename T> struct is_int {
    enum { val = (is_signed_int<T>::val || is_unsigned_int<T>::val) };
};

在网上试试!

另一个类似于Loki Astari的答案的解决方案,在c++ 11中。这里的行是给定类型的std::元组。代码扫描一行,然后扫描到每个分隔符,然后将值直接转换并转储到元组中(使用一些模板代码)。

for (auto row : csv<std::string, int, float>(file, ',')) {
    std::cout << "first col: " << std::get<0>(row) << std::endl;
}

优势:

非常干净,使用简单,只有c++ 11。 自动类型转换为std::tuple<t1,…>通过算子>>。

缺少什么:

转义和引用 没有错误处理的情况下畸形的CSV。

主要代码:

#include <iterator>
#include <sstream>
#include <string>

namespace csvtools {
    /// Read the last element of the tuple without calling recursively
    template <std::size_t idx, class... fields>
    typename std::enable_if<idx >= std::tuple_size<std::tuple<fields...>>::value - 1>::type
    read_tuple(std::istream &in, std::tuple<fields...> &out, const char delimiter) {
        std::string cell;
        std::getline(in, cell, delimiter);
        std::stringstream cell_stream(cell);
        cell_stream >> std::get<idx>(out);
    }

    /// Read the @p idx-th element of the tuple and then calls itself with @p idx + 1 to
    /// read the next element of the tuple. Automatically falls in the previous case when
    /// reaches the last element of the tuple thanks to enable_if
    template <std::size_t idx, class... fields>
    typename std::enable_if<idx < std::tuple_size<std::tuple<fields...>>::value - 1>::type
    read_tuple(std::istream &in, std::tuple<fields...> &out, const char delimiter) {
        std::string cell;
        std::getline(in, cell, delimiter);
        std::stringstream cell_stream(cell);
        cell_stream >> std::get<idx>(out);
        read_tuple<idx + 1, fields...>(in, out, delimiter);
    }
}

/// Iterable csv wrapper around a stream. @p fields the list of types that form up a row.
template <class... fields>
class csv {
    std::istream &_in;
    const char _delim;
public:
    typedef std::tuple<fields...> value_type;
    class iterator;

    /// Construct from a stream.
    inline csv(std::istream &in, const char delim) : _in(in), _delim(delim) {}

    /// Status of the underlying stream
    /// @{
    inline bool good() const {
        return _in.good();
    }
    inline const std::istream &underlying_stream() const {
        return _in;
    }
    /// @}

    inline iterator begin();
    inline iterator end();
private:

    /// Reads a line into a stringstream, and then reads the line into a tuple, that is returned
    inline value_type read_row() {
        std::string line;
        std::getline(_in, line);
        std::stringstream line_stream(line);
        std::tuple<fields...> retval;
        csvtools::read_tuple<0, fields...>(line_stream, retval, _delim);
        return retval;
    }
};

/// Iterator; just calls recursively @ref csv::read_row and stores the result.
template <class... fields>
class csv<fields...>::iterator {
    csv::value_type _row;
    csv *_parent;
public:
    typedef std::input_iterator_tag iterator_category;
    typedef csv::value_type         value_type;
    typedef std::size_t             difference_type;
    typedef csv::value_type *       pointer;
    typedef csv::value_type &       reference;

    /// Construct an empty/end iterator
    inline iterator() : _parent(nullptr) {}
    /// Construct an iterator at the beginning of the @p parent csv object.
    inline iterator(csv &parent) : _parent(parent.good() ? &parent : nullptr) {
        ++(*this);
    }

    /// Read one row, if possible. Set to end if parent is not good anymore.
    inline iterator &operator++() {
        if (_parent != nullptr) {
            _row = _parent->read_row();
            if (!_parent->good()) {
                _parent = nullptr;
            }
        }
        return *this;
    }

    inline iterator operator++(int) {
        iterator copy = *this;
        ++(*this);
        return copy;
    }

    inline csv::value_type const &operator*() const {
        return _row;
    }

    inline csv::value_type const *operator->() const {
        return &_row;
    }

    bool operator==(iterator const &other) {
        return (this == &other) or (_parent == nullptr and other._parent == nullptr);
    }
    bool operator!=(iterator const &other) {
        return not (*this == other);
    }
};

template <class... fields>
typename csv<fields...>::iterator csv<fields...>::begin() {
    return iterator(*this);
}

template <class... fields>
typename csv<fields...>::iterator csv<fields...>::end() {
    return iterator();
}

我在GitHub上放了一个小的工作示例;我一直用它来解析一些数值数据,它达到了它的目的。

@sastanin的解决方案的一个小版本,以便它可以处理引号中的换行。

std::vector<std::vector<std::string>> readCSV(std::istream &in) {
    std::vector<std::vector<std::string>> table;

    while (!in.eof()) {
        CSVState state = CSVState::UnquotedField;
        std::vector<std::string> fields {""};
        size_t i = 0; // index of the current field
        for (char c : row) {
            switch (state) {
                case CSVState::UnquotedField:
                    switch (c) {
                        case ',': // end of field
                                  fields.push_back(""); i++;
                                  break;
                        case '"': state = CSVState::QuotedField;
                                  break;
                        default:  fields[i].push_back(c);
                                  break; }
                    break;
                case CSVState::QuotedField:
                    switch (c) {
                        case '"': state = CSVState::QuotedQuote;
                                  break;
                        default:  fields[i].push_back(c);
                                  break; }
                    break;
                case CSVState::QuotedQuote:
                    switch (c) {
                        case ',': // , after closing quote
                                  fields.push_back(""); i++;
                                  state = CSVState::UnquotedField;
                                  break;
                        case '"': // "" -> "
                                  fields[i].push_back('"');
                                  state = CSVState::QuotedField;
                                  break;
                        case '\n': // newline
                                  table.push_back(fields);
                                  state = CSVState::UnquotedField;
                                  fields = vector<string>{""};
                                  i = 0;
                        default:  // end of quote
                                  state = CSVState::UnquotedField;
                                  break; }
                    break;
            }
        }
    }
    return table;
}

如果你不关心转义逗号和换行符, 并且你不能在引号中嵌入逗号和换行符(如果你不能转义那么…) 那么它只有大约三行代码(好的14 ->,但它只有15读取整个文件)。

std::vector<std::string> getNextLineAndSplitIntoTokens(std::istream& str)
{
    std::vector<std::string>   result;
    std::string                line;
    std::getline(str,line);

    std::stringstream          lineStream(line);
    std::string                cell;

    while(std::getline(lineStream,cell, ','))
    {
        result.push_back(cell);
    }
    // This checks for a trailing comma with no data after it.
    if (!lineStream && cell.empty())
    {
        // If there was a trailing comma then add an empty element.
        result.push_back("");
    }
    return result;
}

我只需要创建一个表示一行的类。 然后流到该对象:

#include <iterator>
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <string>

class CSVRow
{
    public:
        std::string_view operator[](std::size_t index) const
        {
            return std::string_view(&m_line[m_data[index] + 1], m_data[index + 1] -  (m_data[index] + 1));
        }
        std::size_t size() const
        {
            return m_data.size() - 1;
        }
        void readNextRow(std::istream& str)
        {
            std::getline(str, m_line);

            m_data.clear();
            m_data.emplace_back(-1);
            std::string::size_type pos = 0;
            while((pos = m_line.find(',', pos)) != std::string::npos)
            {
                m_data.emplace_back(pos);
                ++pos;
            }
            // This checks for a trailing comma with no data after it.
            pos   = m_line.size();
            m_data.emplace_back(pos);
        }
    private:
        std::string         m_line;
        std::vector<int>    m_data;
};

std::istream& operator>>(std::istream& str, CSVRow& data)
{
    data.readNextRow(str);
    return str;
}   
int main()
{
    std::ifstream       file("plop.csv");

    CSVRow              row;
    while(file >> row)
    {
        std::cout << "4th Element(" << row[3] << ")\n";
    }
}

但只要做一点工作,我们就可以在技术上创建一个迭代器:

class CSVIterator
{   
    public:
        typedef std::input_iterator_tag     iterator_category;
        typedef CSVRow                      value_type;
        typedef std::size_t                 difference_type;
        typedef CSVRow*                     pointer;
        typedef CSVRow&                     reference;

        CSVIterator(std::istream& str)  :m_str(str.good()?&str:nullptr) { ++(*this); }
        CSVIterator()                   :m_str(nullptr) {}

        // Pre Increment
        CSVIterator& operator++()               {if (m_str) { if (!((*m_str) >> m_row)){m_str = nullptr;}}return *this;}
        // Post increment
        CSVIterator operator++(int)             {CSVIterator    tmp(*this);++(*this);return tmp;}
        CSVRow const& operator*()   const       {return m_row;}
        CSVRow const* operator->()  const       {return &m_row;}

        bool operator==(CSVIterator const& rhs) {return ((this == &rhs) || ((this->m_str == nullptr) && (rhs.m_str == nullptr)));}
        bool operator!=(CSVIterator const& rhs) {return !((*this) == rhs);}
    private:
        std::istream*       m_str;
        CSVRow              m_row;
};


int main()
{
    std::ifstream       file("plop.csv");

    for(CSVIterator loop(file); loop != CSVIterator(); ++loop)
    {
        std::cout << "4th Element(" << (*loop)[3] << ")\n";
    }
}

现在我们已经到了2020年,让我们添加一个CSVRange对象:

class CSVRange
{
    std::istream&   stream;
    public:
        CSVRange(std::istream& str)
            : stream(str)
        {}
        CSVIterator begin() const {return CSVIterator{stream};}
        CSVIterator end()   const {return CSVIterator{};}
};

int main()
{
    std::ifstream       file("plop.csv");

    for(auto& row: CSVRange(file))
    {
        std::cout << "4th Element(" << row[3] << ")\n";
    }
}