在c#中是否有默认/官方/推荐的方法来解析CSV文件?我不想滚动自己的解析器。
另外,我也见过人们使用ODBC/OLE DB通过文本驱动程序读取CSV的实例,很多人因为它的“缺点”而不鼓励这样做。这些缺点是什么?
理想情况下,我正在寻找一种方法,通过它我可以通过列名读取CSV,使用第一个记录作为报头/字段名。给出的一些答案是正确的,但基本上是将文件反序列化为类。
在c#中是否有默认/官方/推荐的方法来解析CSV文件?我不想滚动自己的解析器。
另外,我也见过人们使用ODBC/OLE DB通过文本驱动程序读取CSV的实例,很多人因为它的“缺点”而不鼓励这样做。这些缺点是什么?
理想情况下,我正在寻找一种方法,通过它我可以通过列名读取CSV,使用第一个记录作为报头/字段名。给出的一些答案是正确的,但基本上是将文件反序列化为类。
当前回答
如果任何人想要一个代码片段,他们可以直接输入自己的代码,而不必绑定库或下载包。以下是我写的一个版本:
public static string FormatCSV(List<string> parts)
{
string result = "";
foreach (string s in parts)
{
if (result.Length > 0)
{
result += ",";
if (s.Length == 0)
continue;
}
if (s.Length > 0)
{
result += "\"" + s.Replace("\"", "\"\"") + "\"";
}
else
{
// cannot output double quotes since its considered an escape for a quote
result += ",";
}
}
return result;
}
enum CSVMode
{
CLOSED = 0,
OPENED_RAW = 1,
OPENED_QUOTE = 2
}
public static List<string> ParseCSV(string input)
{
List<string> results;
CSVMode mode;
char[] letters;
string content;
mode = CSVMode.CLOSED;
content = "";
results = new List<string>();
letters = input.ToCharArray();
for (int i = 0; i < letters.Length; i++)
{
char letter = letters[i];
char nextLetter = '\0';
if (i < letters.Length - 1)
nextLetter = letters[i + 1];
// If its a quote character
if (letter == '"')
{
// If that next letter is a quote
if (nextLetter == '"' && mode == CSVMode.OPENED_QUOTE)
{
// Then this quote is escaped and should be added to the content
content += letter;
// Skip the escape character
i++;
continue;
}
else
{
// otherwise its not an escaped quote and is an opening or closing one
// Character is skipped
// If it was open, then close it
if (mode == CSVMode.OPENED_QUOTE)
{
results.Add(content);
// reset the content
content = "";
mode = CSVMode.CLOSED;
// If there is a next letter available
if (nextLetter != '\0')
{
// If it is a comma
if (nextLetter == ',')
{
i++;
continue;
}
else
{
throw new Exception("Expected comma. Found: " + nextLetter);
}
}
}
else if (mode == CSVMode.OPENED_RAW)
{
// If it was opened raw, then just add the quote
content += letter;
}
else if (mode == CSVMode.CLOSED)
{
// Otherwise open it as a quote
mode = CSVMode.OPENED_QUOTE;
}
}
}
// If its a comma seperator
else if (letter == ',')
{
// If in quote mode
if (mode == CSVMode.OPENED_QUOTE)
{
// Just read it
content += letter;
}
// If raw, then close the content
else if (mode == CSVMode.OPENED_RAW)
{
results.Add(content);
content = "";
mode = CSVMode.CLOSED;
}
// If it was closed, then open it raw
else if (mode == CSVMode.CLOSED)
{
mode = CSVMode.OPENED_RAW;
results.Add(content);
content = "";
}
}
else
{
// If opened quote, just read it
if (mode == CSVMode.OPENED_QUOTE)
{
content += letter;
}
// If opened raw, then read it
else if (mode == CSVMode.OPENED_RAW)
{
content += letter;
}
// It closed, then open raw
else if (mode == CSVMode.CLOSED)
{
mode = CSVMode.OPENED_RAW;
content += letter;
}
}
}
// If it was still reading when the buffer finished
if (mode != CSVMode.CLOSED)
{
results.Add(content);
}
return results;
}
其他回答
这里有一个我经常使用的helper类,以防有人回到这个线程(我想分享它)。
我这样做是为了简单地将它移植到可以使用的项目中:
public class CSVHelper : List<string[]>
{
protected string csv = string.Empty;
protected string separator = ",";
public CSVHelper(string csv, string separator = "\",\"")
{
this.csv = csv;
this.separator = separator;
foreach (string line in Regex.Split(csv, System.Environment.NewLine).ToList().Where(s => !string.IsNullOrEmpty(s)))
{
string[] values = Regex.Split(line, separator);
for (int i = 0; i < values.Length; i++)
{
//Trim values
values[i] = values[i].Trim('\"');
}
this.Add(values);
}
}
}
像这样使用它:
public List<Person> GetPeople(string csvContent)
{
List<Person> people = new List<Person>();
CSVHelper csv = new CSVHelper(csvContent);
foreach(string[] line in csv)
{
Person person = new Person();
person.Name = line[0];
person.TelephoneNo = line[1];
people.Add(person);
}
return people;
}
[更新的csv helper:修复了最后一个新行字符创建新行的错误]
这个列表中的另一个,Cinchoo ETL -一个开源库,可以读写多种文件格式(CSV,平面文件,Xml, JSON等)
下面的示例显示了如何快速读取CSV文件(不需要POCO对象)
string csv = @"Id, Name
1, Carl
2, Tom
3, Mark";
using (var p = ChoCSVReader.LoadText(csv)
.WithFirstLineHeader()
)
{
foreach (var rec in p)
{
Console.WriteLine($"Id: {rec.Id}");
Console.WriteLine($"Name: {rec.Name}");
}
}
下面的示例展示了如何使用POCO对象读取CSV文件
public partial class EmployeeRec
{
public int Id { get; set; }
public string Name { get; set; }
}
static void CSVTest()
{
string csv = @"Id, Name
1, Carl
2, Tom
3, Mark";
using (var p = ChoCSVReader<EmployeeRec>.LoadText(csv)
.WithFirstLineHeader()
)
{
foreach (var rec in p)
{
Console.WriteLine($"Id: {rec.Id}");
Console.WriteLine($"Name: {rec.Name}");
}
}
}
请在CodeProject上查看如何使用它的文章。
对于较小的CSV输入数据,LINQ是完全足够的。 以以下CSV文件内容为例:
schema_name、描述utype “IX_HE”、“高能量数据”,“x” “III_spectro”、“Spectrosopic数据”、“d” “VI_misc”、“杂”、“f” “vcds1”,“目录只在cd上提供”,“d” “J_other”,“其他期刊发表的文章”,“b”
当我们将整个内容读入一个名为data的字符串时,则
using System;
using System.IO;
using System.Linq;
var data = File.ReadAllText(Path2CSV);
// helper split characters
var newline = Environment.NewLine.ToCharArray();
var comma = ",".ToCharArray();
var quote = "\"".ToCharArray();
// split input string data to lines
var lines = data.Split(newline);
// first line is header, take the header fields
foreach (var col in lines.First().Split(comma)) {
// do something with "col"
}
// we skip the first line, all the rest are real data lines/fields
foreach (var line in lines.Skip(1)) {
// first we split the data line by comma character
// next we remove double qoutes from each splitted element using Trim()
// finally we make an array
var fields = line.Split(comma)
.Select(_ => { _ = _.Trim(quote); return _; })
.ToArray();
// do something with the "fields" array
}
在商业应用中,我使用codeproject.com上的开源项目,CSVReader。
它工作良好,具有良好的性能。我提供的链接上有一些基准测试。
一个简单的例子,从项目页面复制:
using (CsvReader csv = new CsvReader(new StreamReader("data.csv"), true))
{
int fieldCount = csv.FieldCount;
string[] headers = csv.GetFieldHeaders();
while (csv.ReadNextRecord())
{
for (int i = 0; i < fieldCount; i++)
Console.Write(string.Format("{0} = {1};", headers[i], csv[i]));
Console.WriteLine();
}
}
如你所见,这很容易处理。
这是我的KISS实现…
using System;
using System.Collections.Generic;
using System.Text;
class CsvParser
{
public static List<string> Parse(string line)
{
const char escapeChar = '"';
const char splitChar = ',';
bool inEscape = false;
bool priorEscape = false;
List<string> result = new List<string>();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < line.Length; i++)
{
char c = line[i];
switch (c)
{
case escapeChar:
if (!inEscape)
inEscape = true;
else
{
if (!priorEscape)
{
if (i + 1 < line.Length && line[i + 1] == escapeChar)
priorEscape = true;
else
inEscape = false;
}
else
{
sb.Append(c);
priorEscape = false;
}
}
break;
case splitChar:
if (inEscape) //if in escape
sb.Append(c);
else
{
result.Add(sb.ToString());
sb.Length = 0;
}
break;
default:
sb.Append(c);
break;
}
}
if (sb.Length > 0)
result.Add(sb.ToString());
return result;
}
}