我正在读取*.csv文件。

*.csv-file由分号(";")分隔的两列组成。

我能够阅读*.csv文件使用StreamReader,并能够通过使用Split()函数分离每一行。我想将每一列存储到一个单独的数组中,然后显示它。

有可能做到吗?


当前回答

这是我的2个简单的静态方法,将文本从csv文件转换为列表<列表<字符串>>,反之亦然。每种方法都使用行转换器。

这段代码应该考虑csv文件的所有可能性。您可以定义自己的csv分隔符,该方法尝试纠正转义双引号字符,并处理当所有文本在引号中是一个单元格,csv分隔符在引号字符串中,包括一个单元格中的多行,可以忽略空行。

最后一种方法仅用于测试。所以你可以忽略它,或者用这个测试方法测试你自己的或别人的解决方案:)。为了测试,我使用了这个硬csv, 4行2行:

0,a,""bc,d
"e, f",g,"this,is, o
ne ""lo
ng, cell""",h

这是最终代码。为了简单起见,我删除了所有的try catch块。

using System;
using System.Collections.Generic;
using System.Linq;

public static class Csv {
  public static string FromListToString(List<List<string>> csv, string separator = ",", char quotation = '"', bool returnFirstRow = true)
  {
    string content = "";
    for (int row = 0; row < csv.Count; row++) {
      content += (row > 0 ? Environment.NewLine : "") + RowFromListToString(csv[row], separator, quotation);
    }
    return content;
  }

  public static List<List<string>> FromStringToList(string content, string separator = ",", char quotation = '"', bool returnFirstRow = true, bool ignoreEmptyRows = true)
  {
    List<List<string>> csv = new List<List<string>>();
    string[] rows = content.Split(new string[] { Environment.NewLine }, StringSplitOptions.None);
    if (rows.Length <= (returnFirstRow ? 0 : 1)) { return csv; }
    List<string> csvRow = null;
    for (int rowIndex = 0; rowIndex < rows.Length; rowIndex++) {
      (List<string> row, bool rowClosed) = RowFromStringToList(rows[rowIndex], csvRow, separator, quotation);
      if (rowClosed) { if (!ignoreEmptyRows || row.Any(rowItem => rowItem.Length > 0)) { csv.Add(row); csvRow = null; } } // row ok, add to list
      else { csvRow = row; } // not fully created, continue
    }
    if (!returnFirstRow) { csv.RemoveAt(0); } // remove header
    return csv;
  }

  public static string RowFromListToString(List<string> csvData, string separator = ",", char quotation = '"')
  {
    csvData = csvData.Select(element =>
    {
      if (element.Contains(quotation)) {
        element = element.Replace(quotation.ToString(), quotation.ToString() + quotation.ToString());
      }
      if (element.Contains(separator) || element.Contains(Environment.NewLine)) {
        element = "\"" + element + "\"";
      }
      return element;
    }).ToList();
    return string.Join(separator, csvData);
  }

  public static (List<string>, bool) RowFromStringToList(string csvRow, List<string> continueWithRow = null, string separator = ",", char quotation = '"')
  {
    bool rowClosed = true;
    if (continueWithRow != null && continueWithRow.Count > 0) {
      // in previous result quotation are fixed so i need convert back to double quotation
      string previousCell = quotation.ToString() + continueWithRow.Last().Replace(quotation.ToString(), quotation.ToString() + quotation.ToString()) + Environment.NewLine;
      continueWithRow.RemoveAt(continueWithRow.Count - 1);
      csvRow = previousCell + csvRow;
    }

    char tempQuote = (char)162;
    while (csvRow.Contains(tempQuote)) { tempQuote = (char)(tempQuote + 1); }
    char tempSeparator = (char)(tempQuote + 1);
    while (csvRow.Contains(tempSeparator)) { tempSeparator = (char)(tempSeparator + 1); }

    csvRow = csvRow.Replace(quotation.ToString() + quotation.ToString(), tempQuote.ToString());
    if(csvRow.Split(new char[] { quotation }, StringSplitOptions.None).Length % 2 == 0) { rowClosed = !rowClosed; }
    string[] csvSplit = csvRow.Split(new string[] { separator }, StringSplitOptions.None);

    List<string> csvList = csvSplit
      .ToList()
      .Aggregate("",
          (string row, string item) => {
              if (row.Count((ch) => ch == quotation) % 2 == 0) { return row + (row.Length > 0 ? tempSeparator.ToString() : "") + item; }
              else { return row + separator + item; }
          },
          (string row) => row.Split(tempSeparator).Select((string item) => item.Trim(quotation).Replace(tempQuote, quotation))
      ).ToList();
    if (continueWithRow != null && continueWithRow.Count > 0) {
      return (continueWithRow.Concat(csvList).ToList(), rowClosed);
    }
    return (csvList, rowClosed);
  }

  public static bool Test()
  {
    string csvText = "0,a,\"\"bc,d" + Environment.NewLine + "\"e, f\",g,\"this,is, o" + Environment.NewLine + "ne \"\"lo" + Environment.NewLine + "ng, cell\"\"\",h";
    List<List<string>> csvList = new List<List<string>>() { new List<string>() { "0", "a", "\"bc", "d" }, new List<string>() { "e, f", "g", "this,is, o" + Environment.NewLine + "ne \"lo" + Environment.NewLine + "ng, cell\"", "h" } };

    List<List<string>> csvTextAsList = Csv.FromStringToList(csvText);
    bool ok = Enumerable.SequenceEqual(csvList[0], csvTextAsList[0]) && Enumerable.SequenceEqual(csvList[1], csvTextAsList[1]);
    string csvListAsText = Csv.FromListToString(csvList);
    return ok && csvListAsText == csvText;
  }
}

使用例子:

// get List<List<string>> representation of csv
var csvFromText = Csv.FromStringToList(csvAsText);

// read csv file with custom separator and quote
// return no header and ignore empty rows
var csvFile = File.ReadAllText(csvFileFullPath);
var csvFromFile = Csv.FromStringToList(csvFile, ";", '"', false, false);

// get text representation of csvData from List<List<string>>
var csvAsText = Csv.FromListToString(csvData);

注: 这:char tempQuote = (char)162;是ASCI表中的第一个稀有字符。脚本会搜索这个字符,或者不是文本中的第一个接下来的几个ascii字符,并将其用作临时转义和引用字符。

其他回答

我有一个图书馆,正是你所需要的。

前段时间,我写了一个简单而快速的库来处理CSV文件。你可以通过以下链接找到它:https://github.com/ukushu/DataExporter/blob/master/Csv.cs

它与CSV的工作方式类似于二维数组。正是你需要的。

例如,如果你需要第三行的所有值,你只需要写:

Csv csv = new Csv();

csv.FileOpen("c:\\file1.csv");

var allValuesOf3rdRow = csv.Rows[2];

或者读取第3行第2单元格:

var value = csv.Rows[2][1];

如果你需要跳过(头部)行和/或列,你可以使用它来创建一个2维数组:

    var lines = File.ReadAllLines(path).Select(a => a.Split(';'));
    var csv = (from line in lines               
               select (from col in line
               select col).Skip(1).ToArray() // skip the first column
              ).Skip(2).ToArray(); // skip 2 headlines

如果您需要在进一步处理数据之前对其进行塑形,这是非常有用的(假设前两行由标题组成,而第一列是行标题—您不需要在数组中拥有行标题,因为您只想考虑数据)。

注意:您可以通过使用以下代码轻松获得标题和第一列:

    var coltitle = (from line in lines 
                    select line.Skip(1).ToArray() // skip 1st column
                   ).Skip(1).Take(1).FirstOrDefault().ToArray(); // take the 2nd row
    var rowtitle = (from line in lines select line[0] // take 1st column
                   ).Skip(2).ToArray(); // skip 2 headlines

这个代码示例假设你的*.csv文件的结构如下:

注意:如果你需要跳过空行——有时这很方便,你可以通过插入来实现

    where line.Any(a=>!string.IsNullOrWhiteSpace(a))

在上面的LINQ代码示例中的from和select语句之间。

LINQ:

var lines = File.ReadAllLines("test.txt").Select(a => a.Split(';'));
var csv = from line in lines
          select (from piece in line
                  select piece);

^^错误-尼克编辑

原来的应答者似乎试图用一个2维数组填充csv -一个包含数组的数组。第一个数组中的每一项都包含一个表示行号的数组,嵌套数组中的每一项都包含该特定列的数据。

var csv = from line in lines
          select (line.Split(',')).ToArray();

这里有一个特殊的情况,其中一个数据字段有分号(“;”)作为它的数据的一部分,在这种情况下,上面的大多数答案将失败。

这种情况下的解决方案是

string[] csvRows = System.IO.File.ReadAllLines(FullyQaulifiedFileName);
string[] fields = null;
List<string> lstFields;
string field;
bool quoteStarted = false;
foreach (string csvRow in csvRows)
{
    lstFields = new List<string>();
    field = "";
    for (int i = 0; i < csvRow.Length; i++)
    {
        string tmp = csvRow.ElementAt(i).ToString();
        if(String.Compare(tmp,"\"")==0)
        {
            quoteStarted = !quoteStarted;
        }
        if (String.Compare(tmp, ";") == 0 && !quoteStarted)
        {
            lstFields.Add(field);
            field = "";
        }
        else if (String.Compare(tmp, "\"") != 0)
        {
            field += tmp;
        }
    }
    if(!string.IsNullOrEmpty(field))
    {
        lstFields.Add(field);
        field = "";
    }
// This will hold values for each column for current row under processing
    fields = lstFields.ToArray(); 
}

您可以使用c#中的Microsoft.VisualBasic.FileIO.TextFieldParser dll来获得更好的性能

从上面的文章中获取下面的代码示例

static void Main()
{
    string csv_file_path=@"C:\Users\Administrator\Desktop\test.csv";

    DataTable csvData = GetDataTabletFromCSVFile(csv_file_path);

    Console.WriteLine("Rows count:" + csvData.Rows.Count);

    Console.ReadLine();
}


private static DataTable GetDataTabletFromCSVFile(string csv_file_path)
{
    DataTable csvData = new DataTable();

    try
    {

    using(TextFieldParser csvReader = new TextFieldParser(csv_file_path))
        {
            csvReader.SetDelimiters(new string[] { "," });
            csvReader.HasFieldsEnclosedInQuotes = true;
            string[] colFields = csvReader.ReadFields();
            foreach (string column in colFields)
            {
                DataColumn datecolumn = new DataColumn(column);
                datecolumn.AllowDBNull = true;
                csvData.Columns.Add(datecolumn);
            }

            while (!csvReader.EndOfData)
            {
                string[] fieldData = csvReader.ReadFields();
                //Making empty value as null
                for (int i = 0; i < fieldData.Length; i++)
                {
                    if (fieldData[i] == "")
                    {
                        fieldData[i] = null;
                    }
                }
                csvData.Rows.Add(fieldData);
            }
        }
    }
    catch (Exception ex)
    {
    }
    return csvData;
}