在c#中是否有默认/官方/推荐的方法来解析CSV文件?我不想滚动自己的解析器。

另外,我也见过人们使用ODBC/OLE DB通过文本驱动程序读取CSV的实例,很多人因为它的“缺点”而不鼓励这样做。这些缺点是什么?

理想情况下,我正在寻找一种方法,通过它我可以通过列名读取CSV,使用第一个记录作为报头/字段名。给出的一些答案是正确的,但基本上是将文件反序列化为类。


当前回答

如果任何人想要一个代码片段,他们可以直接输入自己的代码,而不必绑定库或下载包。以下是我写的一个版本:

    public static string FormatCSV(List<string> parts)
    {
        string result = "";

        foreach (string s in parts)
        {
            if (result.Length > 0)
            {
                result += ",";

                if (s.Length == 0)
                    continue;
            }

            if (s.Length > 0)
            {
                result += "\"" + s.Replace("\"", "\"\"") + "\"";
            }
            else
            {
                // cannot output double quotes since its considered an escape for a quote
                result += ",";
            }
        }

        return result;
    }

    enum CSVMode
    {
        CLOSED = 0,
        OPENED_RAW = 1,
        OPENED_QUOTE = 2
    }

    public static List<string> ParseCSV(string input)
    {
        List<string> results;

        CSVMode mode;

        char[] letters;

        string content;


        mode = CSVMode.CLOSED;

        content = "";
        results = new List<string>();
        letters = input.ToCharArray();

        for (int i = 0; i < letters.Length; i++)
        {
            char letter = letters[i];
            char nextLetter = '\0';

            if (i < letters.Length - 1)
                nextLetter = letters[i + 1];

            // If its a quote character
            if (letter == '"')
            {
                // If that next letter is a quote
                if (nextLetter == '"' && mode == CSVMode.OPENED_QUOTE)
                {
                    // Then this quote is escaped and should be added to the content

                    content += letter;

                    // Skip the escape character
                    i++;
                    continue;
                }
                else
                {
                    // otherwise its not an escaped quote and is an opening or closing one
                    // Character is skipped

                    // If it was open, then close it
                    if (mode == CSVMode.OPENED_QUOTE)
                    {
                        results.Add(content);

                        // reset the content
                        content = "";

                        mode = CSVMode.CLOSED;

                        // If there is a next letter available
                        if (nextLetter != '\0')
                        {
                            // If it is a comma
                            if (nextLetter == ',')
                            {
                                i++;
                                continue;
                            }
                            else
                            {
                                throw new Exception("Expected comma. Found: " + nextLetter);
                            }
                        }
                    }
                    else if (mode == CSVMode.OPENED_RAW)
                    {
                        // If it was opened raw, then just add the quote 
                        content += letter;
                    }
                    else if (mode == CSVMode.CLOSED)
                    {
                        // Otherwise open it as a quote 

                        mode = CSVMode.OPENED_QUOTE;
                    }
                }
            }
            // If its a comma seperator
            else if (letter == ',')
            {
                // If in quote mode
                if (mode == CSVMode.OPENED_QUOTE)
                {
                    // Just read it
                    content += letter;
                }
                // If raw, then close the content
                else if (mode == CSVMode.OPENED_RAW)
                {
                    results.Add(content);

                    content = "";

                    mode = CSVMode.CLOSED;
                }
                // If it was closed, then open it raw
                else if (mode == CSVMode.CLOSED)
                {
                    mode = CSVMode.OPENED_RAW;

                    results.Add(content);

                    content = "";
                }
            }
            else
            {
                // If opened quote, just read it
                if (mode == CSVMode.OPENED_QUOTE)
                {
                    content += letter;
                }
                // If opened raw, then read it
                else if (mode == CSVMode.OPENED_RAW)
                {
                    content += letter;
                }
                // It closed, then open raw
                else if (mode == CSVMode.CLOSED)
                {
                    mode = CSVMode.OPENED_RAW;

                    content += letter;
                }
            }
        }

        // If it was still reading when the buffer finished
        if (mode != CSVMode.CLOSED)
        {
            results.Add(content);
        }

        return results;
    }

其他回答

这个解决方案使用的是官方的微软。VisualBasic程序集来解析CSV。

优点:

分隔符逃离 忽略了头 装饰空间 忽略评论

代码:

    using Microsoft.VisualBasic.FileIO;

    public static List<List<string>> ParseCSV (string csv)
    {
        List<List<string>> result = new List<List<string>>();


        // To use the TextFieldParser a reference to the Microsoft.VisualBasic assembly has to be added to the project. 
        using (TextFieldParser parser = new TextFieldParser(new StringReader(csv))) 
        {
            parser.CommentTokens = new string[] { "#" };
            parser.SetDelimiters(new string[] { ";" });
            parser.HasFieldsEnclosedInQuotes = true;

            // Skip over header line.
            //parser.ReadLine();

            while (!parser.EndOfData)
            {
                var values = new List<string>();

                var readFields = parser.ReadFields();
                if (readFields != null)
                    values.AddRange(readFields);
                result.Add(values);
            }
        }

        return result;
    }

这个解析器支持在列中嵌套逗号和引号:

static class CSVParser
{
    public static string[] ParseLine(string line)
    {
        List<string> cols = new List<string>();
        string value = null;

        for(int i = 0; i < line.Length; i++)
        {
            switch(line[i])
            {
                case ',':
                    cols.Add(value);
                    value = null;
                    if(i == line.Length - 1)
                    {// It ends with comma
                        cols.Add(null);
                    }
                    break;
                case '"':
                    cols.Add(ParseEnclosedColumn(line, ref i));
                    i++;
                    break;
                default:
                    value += line[i];
                    if (i == line.Length - 1)
                    {// Last character
                        cols.Add(value);                           
                    }
                    break;
            }
        }

        return cols.ToArray();
    }//ParseLine

    static string ParseEnclosedColumn(string line, ref int index)
    {// Example: "b"",bb"
        string value = null;
        int numberQuotes = 1;
        int index2 = index;

        for (int i = index + 1; i < line.Length; i++)
        {
            index2 = i;
            switch (line[i])
            {
                case '"':
                    numberQuotes++;
                    if (numberQuotes % 2 == 0)
                    {
                        if (i < line.Length - 1 && line[i + 1] == ',')
                        {
                            index = i;
                            return value;
                        }
                    }
                    else if (i > index + 1 && line[i - 1] == '"')
                    {
                        value += '"';
                    }
                    break;
                default:
                    value += line[i];
                    break;
            }
        }

        index = index2;
        return value;
    }//ParseEnclosedColumn 
}//class CSVParser

这里有一个我经常使用的helper类,以防有人回到这个线程(我想分享它)。

我这样做是为了简单地将它移植到可以使用的项目中:

public class CSVHelper : List<string[]>
{
  protected string csv = string.Empty;
  protected string separator = ",";

  public CSVHelper(string csv, string separator = "\",\"")
  {
    this.csv = csv;
    this.separator = separator;

    foreach (string line in Regex.Split(csv, System.Environment.NewLine).ToList().Where(s => !string.IsNullOrEmpty(s)))
    {
      string[] values = Regex.Split(line, separator);

      for (int i = 0; i < values.Length; i++)
      {
        //Trim values
        values[i] = values[i].Trim('\"');
      }

      this.Add(values);
    }
  }
}

像这样使用它:

public List<Person> GetPeople(string csvContent)
{
  List<Person> people = new List<Person>();
  CSVHelper csv = new CSVHelper(csvContent);
  foreach(string[] line in csv)
  {
    Person person = new Person();
    person.Name = line[0];
    person.TelephoneNo = line[1];
    people.Add(person);
  }
  return people;
}

[更新的csv helper:修复了最后一个新行字符创建新行的错误]

我知道有点晚了,但刚刚找到了Microsoft.VisualBasic.FileIO库,其中有TextFieldParser类来处理csv文件。

基于unlimit的帖子如何使用c# split()函数正确分割CSV ?:

string[] tokens = System.Text.RegularExpressions.Regex.Split(paramString, ",");

注意:这并不处理转义/嵌套的逗号等,因此只适用于某些简单的CSV列表。