我可以在哪里找到一些JavaScript代码来解析CSV数据?
当前回答
我已经构造了这个JavaScript脚本来解析字符串到数组对象中的CSV。我发现最好将整个CSV分解成行、字段并相应地处理它们。我认为这将使您更容易更改代码以满足您的需要。
//
//
// CSV to object
//
//
const new_line_char = '\n';
const field_separator_char = ',';
function parse_csv(csv_str) {
var result = [];
let line_end_index_moved = false;
let line_start_index = 0;
let line_end_index = 0;
let csr_index = 0;
let cursor_val = csv_str[csr_index];
let found_new_line_char = get_new_line_char(csv_str);
let in_quote = false;
// Handle \r\n
if (found_new_line_char == '\r\n') {
csv_str = csv_str.split(found_new_line_char).join(new_line_char);
}
// Handle the last character is not \n
if (csv_str[csv_str.length - 1] !== new_line_char) {
csv_str += new_line_char;
}
while (csr_index < csv_str.length) {
if (cursor_val === '"') {
in_quote = !in_quote;
} else if (cursor_val === new_line_char) {
if (in_quote === false) {
if (line_end_index_moved && (line_start_index <= line_end_index)) {
result.push(parse_csv_line(csv_str.substring(line_start_index, line_end_index)));
line_start_index = csr_index + 1;
} // Else: just ignore line_end_index has not moved or line has not been sliced for parsing the line
} // Else: just ignore because we are in a quote
}
csr_index++;
cursor_val = csv_str[csr_index];
line_end_index = csr_index;
line_end_index_moved = true;
}
// Handle \r\n
if (found_new_line_char == '\r\n') {
let new_result = [];
let curr_row;
for (var i = 0; i < result.length; i++) {
curr_row = [];
for (var j = 0; j < result[i].length; j++) {
curr_row.push(result[i][j].split(new_line_char).join('\r\n'));
}
new_result.push(curr_row);
}
result = new_result;
}
return result;
}
function parse_csv_line(csv_line_str) {
var result = [];
//let field_end_index_moved = false;
let field_start_index = 0;
let field_end_index = 0;
let csr_index = 0;
let cursor_val = csv_line_str[csr_index];
let in_quote = false;
// Pretend that the last char is the separator_char to complete the loop
csv_line_str += field_separator_char;
while (csr_index < csv_line_str.length) {
if (cursor_val === '"') {
in_quote = !in_quote;
} else if (cursor_val === field_separator_char) {
if (in_quote === false) {
if (field_start_index <= field_end_index) {
result.push(parse_csv_field(csv_line_str.substring(field_start_index, field_end_index)));
field_start_index = csr_index + 1;
} // Else: just ignore field_end_index has not moved or field has not been sliced for parsing the field
} // Else: just ignore because we are in quote
}
csr_index++;
cursor_val = csv_line_str[csr_index];
field_end_index = csr_index;
field_end_index_moved = true;
}
return result;
}
function parse_csv_field(csv_field_str) {
with_quote = (csv_field_str[0] === '"');
if (with_quote) {
csv_field_str = csv_field_str.substring(1, csv_field_str.length - 1); // remove the start and end quotes
csv_field_str = csv_field_str.split('""').join('"'); // handle double quotes
}
return csv_field_str;
}
// Initial method: check the first newline character only
function get_new_line_char(csv_str) {
if (csv_str.indexOf('\r\n') > -1) {
return '\r\n';
} else {
return '\n'
}
}
其他回答
只是随便说说而已。我最近遇到了用Javascript解析CSV列的需求,于是我选择了自己的简单解决方案。它满足了我的需要,也可能帮助到其他人。
const csvString = '"Some text, some text",,"",true,false,"more text","more,text, more, text ",true'; const parseCSV = text => { const lines = text.split('\n'); const output = []; lines.forEach(line => { line = line.trim(); if (line.length === 0) return; const skipIndexes = {}; const columns = line.split(','); output.push(columns.reduce((result, item, index) => { if (skipIndexes[index]) return result; if (item.startsWith('"') && !item.endsWith('"')) { while (!columns[index + 1].endsWith('"')) { index++; item += `,${columns[index]}`; skipIndexes[index] = true; } index++; skipIndexes[index] = true; item += `,${columns[index]}`; } result.push(item); return result; }, [])); }); return output; }; console.log(parseCSV(csvString));
我已经构造了这个JavaScript脚本来解析字符串到数组对象中的CSV。我发现最好将整个CSV分解成行、字段并相应地处理它们。我认为这将使您更容易更改代码以满足您的需要。
//
//
// CSV to object
//
//
const new_line_char = '\n';
const field_separator_char = ',';
function parse_csv(csv_str) {
var result = [];
let line_end_index_moved = false;
let line_start_index = 0;
let line_end_index = 0;
let csr_index = 0;
let cursor_val = csv_str[csr_index];
let found_new_line_char = get_new_line_char(csv_str);
let in_quote = false;
// Handle \r\n
if (found_new_line_char == '\r\n') {
csv_str = csv_str.split(found_new_line_char).join(new_line_char);
}
// Handle the last character is not \n
if (csv_str[csv_str.length - 1] !== new_line_char) {
csv_str += new_line_char;
}
while (csr_index < csv_str.length) {
if (cursor_val === '"') {
in_quote = !in_quote;
} else if (cursor_val === new_line_char) {
if (in_quote === false) {
if (line_end_index_moved && (line_start_index <= line_end_index)) {
result.push(parse_csv_line(csv_str.substring(line_start_index, line_end_index)));
line_start_index = csr_index + 1;
} // Else: just ignore line_end_index has not moved or line has not been sliced for parsing the line
} // Else: just ignore because we are in a quote
}
csr_index++;
cursor_val = csv_str[csr_index];
line_end_index = csr_index;
line_end_index_moved = true;
}
// Handle \r\n
if (found_new_line_char == '\r\n') {
let new_result = [];
let curr_row;
for (var i = 0; i < result.length; i++) {
curr_row = [];
for (var j = 0; j < result[i].length; j++) {
curr_row.push(result[i][j].split(new_line_char).join('\r\n'));
}
new_result.push(curr_row);
}
result = new_result;
}
return result;
}
function parse_csv_line(csv_line_str) {
var result = [];
//let field_end_index_moved = false;
let field_start_index = 0;
let field_end_index = 0;
let csr_index = 0;
let cursor_val = csv_line_str[csr_index];
let in_quote = false;
// Pretend that the last char is the separator_char to complete the loop
csv_line_str += field_separator_char;
while (csr_index < csv_line_str.length) {
if (cursor_val === '"') {
in_quote = !in_quote;
} else if (cursor_val === field_separator_char) {
if (in_quote === false) {
if (field_start_index <= field_end_index) {
result.push(parse_csv_field(csv_line_str.substring(field_start_index, field_end_index)));
field_start_index = csr_index + 1;
} // Else: just ignore field_end_index has not moved or field has not been sliced for parsing the field
} // Else: just ignore because we are in quote
}
csr_index++;
cursor_val = csv_line_str[csr_index];
field_end_index = csr_index;
field_end_index_moved = true;
}
return result;
}
function parse_csv_field(csv_field_str) {
with_quote = (csv_field_str[0] === '"');
if (with_quote) {
csv_field_str = csv_field_str.substring(1, csv_field_str.length - 1); // remove the start and end quotes
csv_field_str = csv_field_str.split('""').join('"'); // handle double quotes
}
return csv_field_str;
}
// Initial method: check the first newline character only
function get_new_line_char(csv_str) {
if (csv_str.indexOf('\r\n') > -1) {
return '\r\n';
} else {
return '\n'
}
}
下面是一个极其简单的CSV解析器,它处理带有逗号、新行和转义双引号的引号字段。没有分裂或正则表达式。它每次扫描输入字符串1-2个字符,并构建一个数组。
在http://jsfiddle.net/vHKYH/上进行测试。
function parseCSV(str) {
var arr = [];
var quote = false; // 'true' means we're inside a quoted field
// Iterate over each character, keep track of current row and column (of the returned array)
for (var row = 0, col = 0, c = 0; c < str.length; c++) {
var cc = str[c], nc = str[c+1]; // Current character, next character
arr[row] = arr[row] || []; // Create a new row if necessary
arr[row][col] = arr[row][col] || ''; // Create a new column (start with empty string) if necessary
// If the current character is a quotation mark, and we're inside a
// quoted field, and the next character is also a quotation mark,
// add a quotation mark to the current column and skip the next character
if (cc == '"' && quote && nc == '"') { arr[row][col] += cc; ++c; continue; }
// If it's just one quotation mark, begin/end quoted field
if (cc == '"') { quote = !quote; continue; }
// If it's a comma and we're not in a quoted field, move on to the next column
if (cc == ',' && !quote) { ++col; continue; }
// If it's a newline (CRLF) and we're not in a quoted field, skip the next character
// and move on to the next row and move to column 0 of that new row
if (cc == '\r' && nc == '\n' && !quote) { ++row; col = 0; ++c; continue; }
// If it's a newline (LF or CR) and we're not in a quoted field,
// move on to the next row and move to column 0 of that new row
if (cc == '\n' && !quote) { ++row; col = 0; continue; }
if (cc == '\r' && !quote) { ++row; col = 0; continue; }
// Otherwise, append the current character to the current column
arr[row][col] += cc;
}
return arr;
}
我不知道为什么我不能让Kirtan的例子对我有用。它似乎在空字段或带尾随逗号的字段上失败了……
这个似乎可以同时处理这两个问题。
我没有编写解析器代码,只是对解析器函数进行了包装,以使其适用于文件。看到归因。
var Strings = {
/**
* Wrapped CSV line parser
* @param s String delimited CSV string
* @param sep Separator override
* @attribution: http://www.greywyvern.com/?post=258 (comments closed on blog :( )
*/
parseCSV : function(s,sep) {
// http://stackoverflow.com/questions/1155678/javascript-string-newline-character
var universalNewline = /\r\n|\r|\n/g;
var a = s.split(universalNewline);
for(var i in a){
for (var f = a[i].split(sep = sep || ","), x = f.length - 1, tl; x >= 0; x--) {
if (f[x].replace(/"\s+$/, '"').charAt(f[x].length - 1) == '"') {
if ((tl = f[x].replace(/^\s+"/, '"')).length > 1 && tl.charAt(0) == '"') {
f[x] = f[x].replace(/^\s*"|"\s*$/g, '').replace(/""/g, '"');
} else if (x) {
f.splice(x - 1, 2, [f[x - 1], f[x]].join(sep));
} else f = f.shift().split(sep).concat(f);
} else f[x].replace(/""/g, '"');
} a[i] = f;
}
return a;
}
}
csvToArray v1.3
一个紧凑(645字节),但兼容的函数,将CSV字符串转换为2D数组,符合RFC4180标准。
https://code.google.com/archive/p/csv-to-array/downloads
常用用法:jQuery
$.ajax({
url: "test.csv",
dataType: 'text',
cache: false
}).done(function(csvAsString){
csvAsArray=csvAsString.csvToArray();
});
常用用法:JavaScript
csvAsArray = csvAsString.csvToArray();
覆盖字段分隔符
csvAsArray = csvAsString.csvToArray("|");
覆盖记录分离器
csvAsArray = csvAsString.csvToArray("", "#");
覆盖跳过报头
csvAsArray = csvAsString.csvToArray("", "", 1);
覆盖所有
csvAsArray = csvAsString.csvToArray("|", "#", 1);