我可以在哪里找到一些JavaScript代码来解析CSV数据?
当前回答
就我个人而言,我喜欢使用deno std库,因为大多数模块都与浏览器正式兼容
问题是std是typescript,但官方解决方案可能会在未来发生https://github.com/denoland/deno_std/issues/641 https://github.com/denoland/dotland/issues/1728
目前有一个积极维护的飞行转译器https://bundle.deno.dev/
你可以像这样简单地使用它
<script type="module">
import { parse } from "https://bundle.deno.dev/https://deno.land/std@0.126.0/encoding/csv.ts"
console.log(await parse("a,b,c\n1,2,3"))
</script>
其他回答
csvToArray v1.3
一个紧凑(645字节),但兼容的函数,将CSV字符串转换为2D数组,符合RFC4180标准。
https://code.google.com/archive/p/csv-to-array/downloads
常用用法:jQuery
$.ajax({
url: "test.csv",
dataType: 'text',
cache: false
}).done(function(csvAsString){
csvAsArray=csvAsString.csvToArray();
});
常用用法:JavaScript
csvAsArray = csvAsString.csvToArray();
覆盖字段分隔符
csvAsArray = csvAsString.csvToArray("|");
覆盖记录分离器
csvAsArray = csvAsString.csvToArray("", "#");
覆盖跳过报头
csvAsArray = csvAsString.csvToArray("", "", 1);
覆盖所有
csvAsArray = csvAsString.csvToArray("|", "#", 1);
下面是我的PEG(.js)语法,它在RFC 4180中似乎做得不错(即它处理http://en.wikipedia.org/wiki/Comma-separated_values):上的示例)
start
= [\n\r]* first:line rest:([\n\r]+ data:line { return data; })* [\n\r]* { rest.unshift(first); return rest; }
line
= first:field rest:("," text:field { return text; })*
& { return !!first || rest.length; } // ignore blank lines
{ rest.unshift(first); return rest; }
field
= '"' text:char* '"' { return text.join(''); }
/ text:[^\n\r,]* { return text.join(''); }
char
= '"' '"' { return '"'; }
/ [^"]
在http://jsfiddle.net/knvzk/10或http://pegjs.majda.cz/online上试试吧。从https://gist.github.com/3362830下载生成的解析器。
我已经构造了这个JavaScript脚本来解析字符串到数组对象中的CSV。我发现最好将整个CSV分解成行、字段并相应地处理它们。我认为这将使您更容易更改代码以满足您的需要。
//
//
// CSV to object
//
//
const new_line_char = '\n';
const field_separator_char = ',';
function parse_csv(csv_str) {
var result = [];
let line_end_index_moved = false;
let line_start_index = 0;
let line_end_index = 0;
let csr_index = 0;
let cursor_val = csv_str[csr_index];
let found_new_line_char = get_new_line_char(csv_str);
let in_quote = false;
// Handle \r\n
if (found_new_line_char == '\r\n') {
csv_str = csv_str.split(found_new_line_char).join(new_line_char);
}
// Handle the last character is not \n
if (csv_str[csv_str.length - 1] !== new_line_char) {
csv_str += new_line_char;
}
while (csr_index < csv_str.length) {
if (cursor_val === '"') {
in_quote = !in_quote;
} else if (cursor_val === new_line_char) {
if (in_quote === false) {
if (line_end_index_moved && (line_start_index <= line_end_index)) {
result.push(parse_csv_line(csv_str.substring(line_start_index, line_end_index)));
line_start_index = csr_index + 1;
} // Else: just ignore line_end_index has not moved or line has not been sliced for parsing the line
} // Else: just ignore because we are in a quote
}
csr_index++;
cursor_val = csv_str[csr_index];
line_end_index = csr_index;
line_end_index_moved = true;
}
// Handle \r\n
if (found_new_line_char == '\r\n') {
let new_result = [];
let curr_row;
for (var i = 0; i < result.length; i++) {
curr_row = [];
for (var j = 0; j < result[i].length; j++) {
curr_row.push(result[i][j].split(new_line_char).join('\r\n'));
}
new_result.push(curr_row);
}
result = new_result;
}
return result;
}
function parse_csv_line(csv_line_str) {
var result = [];
//let field_end_index_moved = false;
let field_start_index = 0;
let field_end_index = 0;
let csr_index = 0;
let cursor_val = csv_line_str[csr_index];
let in_quote = false;
// Pretend that the last char is the separator_char to complete the loop
csv_line_str += field_separator_char;
while (csr_index < csv_line_str.length) {
if (cursor_val === '"') {
in_quote = !in_quote;
} else if (cursor_val === field_separator_char) {
if (in_quote === false) {
if (field_start_index <= field_end_index) {
result.push(parse_csv_field(csv_line_str.substring(field_start_index, field_end_index)));
field_start_index = csr_index + 1;
} // Else: just ignore field_end_index has not moved or field has not been sliced for parsing the field
} // Else: just ignore because we are in quote
}
csr_index++;
cursor_val = csv_line_str[csr_index];
field_end_index = csr_index;
field_end_index_moved = true;
}
return result;
}
function parse_csv_field(csv_field_str) {
with_quote = (csv_field_str[0] === '"');
if (with_quote) {
csv_field_str = csv_field_str.substring(1, csv_field_str.length - 1); // remove the start and end quotes
csv_field_str = csv_field_str.split('""').join('"'); // handle double quotes
}
return csv_field_str;
}
// Initial method: check the first newline character only
function get_new_line_char(csv_str) {
if (csv_str.indexOf('\r\n') > -1) {
return '\r\n';
} else {
return '\n'
}
}
这是另一个解决方案。这个用途:
一个粗略的全局正则表达式,用于分割CSV字符串(包括引号和逗号) 用于清除周围引号和尾随逗号的细粒度正则表达式 此外,还具有区分字符串、数字、布尔值和空值的类型更正
对于以下输入字符串:
"This is\, a value",Hello,4,-123,3.1415,'This is also\, possible',true,
代码输出:
[
"This is, a value",
"Hello",
4,
-123,
3.1415,
"This is also, possible",
true,
null
]
下面是我在一个可运行的代码片段中实现的parseCSVLine():
function parseCSVLine(text) { return text.match( /\s*(\"[^"]*\"|'[^']*'|[^,]*)\s*(,|$)/g ).map( function (text) { let m; if (m = text.match(/^\s*,?$/)) return null; // null value if (m = text.match(/^\s*\"([^"]*)\"\s*,?$/)) return m[1]; // Double Quoted Text if (m = text.match(/^\s*'([^']*)'\s*,?$/)) return m[1]; // Single Quoted Text if (m = text.match(/^\s*(true|false)\s*,?$/)) return m[1] === "true"; // Boolean if (m = text.match(/^\s*((?:\+|\-)?\d+)\s*,?$/)) return parseInt(m[1]); // Integer Number if (m = text.match(/^\s*((?:\+|\-)?\d*\.\d*)\s*,?$/)) return parseFloat(m[1]); // Floating Number if (m = text.match(/^\s*(.*?)\s*,?$/)) return m[1]; // Unquoted Text return text; } ); } let data = `"This is\, a value",Hello,4,-123,3.1415,'This is also\, possible',true,`; let obj = parseCSVLine(data); console.log( JSON.stringify( obj, undefined, 2 ) );
正则表达式拯救你!这几行代码根据RFC 4180标准处理带有嵌入逗号、引号和换行符的正确引用字段。
function parseCsv(data, fieldSep, newLine) {
fieldSep = fieldSep || ',';
newLine = newLine || '\n';
var nSep = '\x1D';
var qSep = '\x1E';
var cSep = '\x1F';
var nSepRe = new RegExp(nSep, 'g');
var qSepRe = new RegExp(qSep, 'g');
var cSepRe = new RegExp(cSep, 'g');
var fieldRe = new RegExp('(?<=(^|[' + fieldSep + '\\n]))"(|[\\s\\S]+?(?<![^"]"))"(?=($|[' + fieldSep + '\\n]))', 'g');
var grid = [];
data.replace(/\r/g, '').replace(/\n+$/, '').replace(fieldRe, function(match, p1, p2) {
return p2.replace(/\n/g, nSep).replace(/""/g, qSep).replace(/,/g, cSep);
}).split(/\n/).forEach(function(line) {
var row = line.split(fieldSep).map(function(cell) {
return cell.replace(nSepRe, newLine).replace(qSepRe, '"').replace(cSepRe, ',');
});
grid.push(row);
});
return grid;
}
const csv = 'A1,B1,C1\n"A ""2""","B, 2","C\n2"';
const separator = ','; // field separator, default: ','
const newline = ' <br /> '; // newline representation in case a field contains newlines, default: '\n'
var grid = parseCsv(csv, separator, newline);
// expected: [ [ 'A1', 'B1', 'C1' ], [ 'A "2"', 'B, 2', 'C <br /> 2' ] ]
您不需要像lex/yacc这样的解析器-生成器。正则表达式可以正确地处理RFC 4180,这要归功于正向向后查找、反向向后查找和正向向前查找。
克隆/下载代码https://github.com/peterthoeny/parse-csv-js
推荐文章
- Javascript和regex:分割字符串并保留分隔符
- 如何检查DST(日光节约时间)是否有效,如果是,偏移量?
- 如何打破_。在underscore.js中的每个函数
- 如何在jQuery中获得当前日期?
- 如何创建一个日期对象从字符串在javascript
- 输入触发器按钮单击
- 获取对象的属性名
- 如何检查用户是否可以回到浏览器历史
- 相当于字符串。jQuery格式
- 如何在vue-cli项目中更改端口号
- Angular 2模板中的标签是什么意思?
- JavaScript .includes()方法的多个条件
- 窗口。亲近与自我。close不关闭Chrome中的窗口
- 同步和异步编程(在node.js中)的区别是什么?
- 在d3.js中调整窗口大小时调整svg的大小