我正在读取*.csv文件。

*.csv-file由分号(";")分隔的两列组成。

我能够阅读*.csv文件使用StreamReader,并能够通过使用Split()函数分离每一行。我想将每一列存储到一个单独的数组中,然后显示它。

有可能做到吗?


当前回答

您可以使用c#中的Microsoft.VisualBasic.FileIO.TextFieldParser dll来获得更好的性能

从上面的文章中获取下面的代码示例

static void Main()
{
    string csv_file_path=@"C:\Users\Administrator\Desktop\test.csv";

    DataTable csvData = GetDataTabletFromCSVFile(csv_file_path);

    Console.WriteLine("Rows count:" + csvData.Rows.Count);

    Console.ReadLine();
}


private static DataTable GetDataTabletFromCSVFile(string csv_file_path)
{
    DataTable csvData = new DataTable();

    try
    {

    using(TextFieldParser csvReader = new TextFieldParser(csv_file_path))
        {
            csvReader.SetDelimiters(new string[] { "," });
            csvReader.HasFieldsEnclosedInQuotes = true;
            string[] colFields = csvReader.ReadFields();
            foreach (string column in colFields)
            {
                DataColumn datecolumn = new DataColumn(column);
                datecolumn.AllowDBNull = true;
                csvData.Columns.Add(datecolumn);
            }

            while (!csvReader.EndOfData)
            {
                string[] fieldData = csvReader.ReadFields();
                //Making empty value as null
                for (int i = 0; i < fieldData.Length; i++)
                {
                    if (fieldData[i] == "")
                    {
                        fieldData[i] = null;
                    }
                }
                csvData.Rows.Add(fieldData);
            }
        }
    }
    catch (Exception ex)
    {
    }
    return csvData;
}

其他回答

LINQ:

var lines = File.ReadAllLines("test.txt").Select(a => a.Split(';'));
var csv = from line in lines
          select (from piece in line
                  select piece);

^^错误-尼克编辑

原来的应答者似乎试图用一个2维数组填充csv -一个包含数组的数组。第一个数组中的每一项都包含一个表示行号的数组,嵌套数组中的每一项都包含该特定列的数据。

var csv = from line in lines
          select (line.Split(',')).ToArray();

这是我的2个简单的静态方法,将文本从csv文件转换为列表<列表<字符串>>,反之亦然。每种方法都使用行转换器。

这段代码应该考虑csv文件的所有可能性。您可以定义自己的csv分隔符,该方法尝试纠正转义双引号字符,并处理当所有文本在引号中是一个单元格,csv分隔符在引号字符串中,包括一个单元格中的多行,可以忽略空行。

最后一种方法仅用于测试。所以你可以忽略它,或者用这个测试方法测试你自己的或别人的解决方案:)。为了测试,我使用了这个硬csv, 4行2行:

0,a,""bc,d
"e, f",g,"this,is, o
ne ""lo
ng, cell""",h

这是最终代码。为了简单起见,我删除了所有的try catch块。

using System;
using System.Collections.Generic;
using System.Linq;

public static class Csv {
  public static string FromListToString(List<List<string>> csv, string separator = ",", char quotation = '"', bool returnFirstRow = true)
  {
    string content = "";
    for (int row = 0; row < csv.Count; row++) {
      content += (row > 0 ? Environment.NewLine : "") + RowFromListToString(csv[row], separator, quotation);
    }
    return content;
  }

  public static List<List<string>> FromStringToList(string content, string separator = ",", char quotation = '"', bool returnFirstRow = true, bool ignoreEmptyRows = true)
  {
    List<List<string>> csv = new List<List<string>>();
    string[] rows = content.Split(new string[] { Environment.NewLine }, StringSplitOptions.None);
    if (rows.Length <= (returnFirstRow ? 0 : 1)) { return csv; }
    List<string> csvRow = null;
    for (int rowIndex = 0; rowIndex < rows.Length; rowIndex++) {
      (List<string> row, bool rowClosed) = RowFromStringToList(rows[rowIndex], csvRow, separator, quotation);
      if (rowClosed) { if (!ignoreEmptyRows || row.Any(rowItem => rowItem.Length > 0)) { csv.Add(row); csvRow = null; } } // row ok, add to list
      else { csvRow = row; } // not fully created, continue
    }
    if (!returnFirstRow) { csv.RemoveAt(0); } // remove header
    return csv;
  }

  public static string RowFromListToString(List<string> csvData, string separator = ",", char quotation = '"')
  {
    csvData = csvData.Select(element =>
    {
      if (element.Contains(quotation)) {
        element = element.Replace(quotation.ToString(), quotation.ToString() + quotation.ToString());
      }
      if (element.Contains(separator) || element.Contains(Environment.NewLine)) {
        element = "\"" + element + "\"";
      }
      return element;
    }).ToList();
    return string.Join(separator, csvData);
  }

  public static (List<string>, bool) RowFromStringToList(string csvRow, List<string> continueWithRow = null, string separator = ",", char quotation = '"')
  {
    bool rowClosed = true;
    if (continueWithRow != null && continueWithRow.Count > 0) {
      // in previous result quotation are fixed so i need convert back to double quotation
      string previousCell = quotation.ToString() + continueWithRow.Last().Replace(quotation.ToString(), quotation.ToString() + quotation.ToString()) + Environment.NewLine;
      continueWithRow.RemoveAt(continueWithRow.Count - 1);
      csvRow = previousCell + csvRow;
    }

    char tempQuote = (char)162;
    while (csvRow.Contains(tempQuote)) { tempQuote = (char)(tempQuote + 1); }
    char tempSeparator = (char)(tempQuote + 1);
    while (csvRow.Contains(tempSeparator)) { tempSeparator = (char)(tempSeparator + 1); }

    csvRow = csvRow.Replace(quotation.ToString() + quotation.ToString(), tempQuote.ToString());
    if(csvRow.Split(new char[] { quotation }, StringSplitOptions.None).Length % 2 == 0) { rowClosed = !rowClosed; }
    string[] csvSplit = csvRow.Split(new string[] { separator }, StringSplitOptions.None);

    List<string> csvList = csvSplit
      .ToList()
      .Aggregate("",
          (string row, string item) => {
              if (row.Count((ch) => ch == quotation) % 2 == 0) { return row + (row.Length > 0 ? tempSeparator.ToString() : "") + item; }
              else { return row + separator + item; }
          },
          (string row) => row.Split(tempSeparator).Select((string item) => item.Trim(quotation).Replace(tempQuote, quotation))
      ).ToList();
    if (continueWithRow != null && continueWithRow.Count > 0) {
      return (continueWithRow.Concat(csvList).ToList(), rowClosed);
    }
    return (csvList, rowClosed);
  }

  public static bool Test()
  {
    string csvText = "0,a,\"\"bc,d" + Environment.NewLine + "\"e, f\",g,\"this,is, o" + Environment.NewLine + "ne \"\"lo" + Environment.NewLine + "ng, cell\"\"\",h";
    List<List<string>> csvList = new List<List<string>>() { new List<string>() { "0", "a", "\"bc", "d" }, new List<string>() { "e, f", "g", "this,is, o" + Environment.NewLine + "ne \"lo" + Environment.NewLine + "ng, cell\"", "h" } };

    List<List<string>> csvTextAsList = Csv.FromStringToList(csvText);
    bool ok = Enumerable.SequenceEqual(csvList[0], csvTextAsList[0]) && Enumerable.SequenceEqual(csvList[1], csvTextAsList[1]);
    string csvListAsText = Csv.FromListToString(csvList);
    return ok && csvListAsText == csvText;
  }
}

使用例子:

// get List<List<string>> representation of csv
var csvFromText = Csv.FromStringToList(csvAsText);

// read csv file with custom separator and quote
// return no header and ignore empty rows
var csvFile = File.ReadAllText(csvFileFullPath);
var csvFromFile = Csv.FromStringToList(csvFile, ";", '"', false, false);

// get text representation of csvData from List<List<string>>
var csvAsText = Csv.FromListToString(csvData);

注: 这:char tempQuote = (char)162;是ASCI表中的第一个稀有字符。脚本会搜索这个字符,或者不是文本中的第一个接下来的几个ascii字符,并将其用作临时转义和引用字符。

这里有一个特殊的情况,其中一个数据字段有分号(“;”)作为它的数据的一部分,在这种情况下,上面的大多数答案将失败。

这种情况下的解决方案是

string[] csvRows = System.IO.File.ReadAllLines(FullyQaulifiedFileName);
string[] fields = null;
List<string> lstFields;
string field;
bool quoteStarted = false;
foreach (string csvRow in csvRows)
{
    lstFields = new List<string>();
    field = "";
    for (int i = 0; i < csvRow.Length; i++)
    {
        string tmp = csvRow.ElementAt(i).ToString();
        if(String.Compare(tmp,"\"")==0)
        {
            quoteStarted = !quoteStarted;
        }
        if (String.Compare(tmp, ";") == 0 && !quoteStarted)
        {
            lstFields.Add(field);
            field = "";
        }
        else if (String.Compare(tmp, "\"") != 0)
        {
            field += tmp;
        }
    }
    if(!string.IsNullOrEmpty(field))
    {
        lstFields.Add(field);
        field = "";
    }
// This will hold values for each column for current row under processing
    fields = lstFields.ToArray(); 
}

我一直在使用csvreader.com(付费组件)多年,我从来没有遇到过问题。它结实、小巧、快速,但你必须为此付费。您可以将分隔符设置为您喜欢的任何值。

using (CsvReader reader = new CsvReader(s) {
    reader.Settings.Delimiter = ';';
    reader.ReadHeaders();  // if headers on a line by themselves.  Makes reader.Headers[] available
    while (reader.ReadRecord())
        ... use reader.Values[col_i] ...
}

您不能立即创建数组,因为您需要从一开始就知道行数(这将需要读取csv文件两次)。

您可以将值存储在两个List<T>中,然后使用它们或使用List<T>.ToArray()将它们转换为数组

非常简单的例子:

var column1 = new List<string>();
var column2 = new List<string>();
using (var rd = new StreamReader("filename.csv"))
{
    while (!rd.EndOfStream)
    {
        var splits = rd.ReadLine().Split(';');
        column1.Add(splits[0]);
        column2.Add(splits[1]);
    }
}
// print column1
Console.WriteLine("Column 1:");
foreach (var element in column1)
    Console.WriteLine(element);

// print column2
Console.WriteLine("Column 2:");
foreach (var element in column2)
    Console.WriteLine(element);

N.B.

请注意,这只是一个非常简单的例子。使用字符串。Split不考虑某些记录包含分隔符的情况;在里面。 为了更安全的方法,可以考虑使用一些特定于csv的库,比如nuget上的CsvHelper。