我可以在哪里找到一些JavaScript代码来解析CSV数据?
当前回答
您可以使用本博客条目中提到的CSVToArray()函数。
<script type="text/javascript">
// ref: http://stackoverflow.com/a/1293163/2343
// This will parse a delimited string into an array of
// arrays. The default delimiter is the comma, but this
// can be overriden in the second argument.
function CSVToArray( strData, strDelimiter ){
// Check to see if the delimiter is defined. If not,
// then default to comma.
strDelimiter = (strDelimiter || ",");
// Create a regular expression to parse the CSV values.
var objPattern = new RegExp(
(
// Delimiters.
"(\\" + strDelimiter + "|\\r?\\n|\\r|^)" +
// Quoted fields.
"(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|" +
// Standard fields.
"([^\"\\" + strDelimiter + "\\r\\n]*))"
),
"gi"
);
// Create an array to hold our data. Give the array
// a default empty first row.
var arrData = [[]];
// Create an array to hold our individual pattern
// matching groups.
var arrMatches = null;
// Keep looping over the regular expression matches
// until we can no longer find a match.
while (arrMatches = objPattern.exec( strData )){
// Get the delimiter that was found.
var strMatchedDelimiter = arrMatches[ 1 ];
// Check to see if the given delimiter has a length
// (is not the start of string) and if it matches
// field delimiter. If id does not, then we know
// that this delimiter is a row delimiter.
if (
strMatchedDelimiter.length &&
strMatchedDelimiter !== strDelimiter
){
// Since we have reached a new row of data,
// add an empty row to our data array.
arrData.push( [] );
}
var strMatchedValue;
// Now that we have our delimiter out of the way,
// let's check to see which kind of value we
// captured (quoted or unquoted).
if (arrMatches[ 2 ]){
// We found a quoted value. When we capture
// this value, unescape any double quotes.
strMatchedValue = arrMatches[ 2 ].replace(
new RegExp( "\"\"", "g" ),
"\""
);
} else {
// We found a non-quoted value.
strMatchedValue = arrMatches[ 3 ];
}
// Now that we have our value string, let's add
// it to the data array.
arrData[ arrData.length - 1 ].push( strMatchedValue );
}
// Return the parsed data.
return( arrData );
}
</script>
其他回答
下面是我的PEG(.js)语法,它在RFC 4180中似乎做得不错(即它处理http://en.wikipedia.org/wiki/Comma-separated_values):上的示例)
start
= [\n\r]* first:line rest:([\n\r]+ data:line { return data; })* [\n\r]* { rest.unshift(first); return rest; }
line
= first:field rest:("," text:field { return text; })*
& { return !!first || rest.length; } // ignore blank lines
{ rest.unshift(first); return rest; }
field
= '"' text:char* '"' { return text.join(''); }
/ text:[^\n\r,]* { return text.join(''); }
char
= '"' '"' { return '"'; }
/ [^"]
在http://jsfiddle.net/knvzk/10或http://pegjs.majda.cz/online上试试吧。从https://gist.github.com/3362830下载生成的解析器。
我不知道为什么我不能让Kirtan的例子对我有用。它似乎在空字段或带尾随逗号的字段上失败了……
这个似乎可以同时处理这两个问题。
我没有编写解析器代码,只是对解析器函数进行了包装,以使其适用于文件。看到归因。
var Strings = {
/**
* Wrapped CSV line parser
* @param s String delimited CSV string
* @param sep Separator override
* @attribution: http://www.greywyvern.com/?post=258 (comments closed on blog :( )
*/
parseCSV : function(s,sep) {
// http://stackoverflow.com/questions/1155678/javascript-string-newline-character
var universalNewline = /\r\n|\r|\n/g;
var a = s.split(universalNewline);
for(var i in a){
for (var f = a[i].split(sep = sep || ","), x = f.length - 1, tl; x >= 0; x--) {
if (f[x].replace(/"\s+$/, '"').charAt(f[x].length - 1) == '"') {
if ((tl = f[x].replace(/^\s+"/, '"')).length > 1 && tl.charAt(0) == '"') {
f[x] = f[x].replace(/^\s*"|"\s*$/g, '').replace(/""/g, '"');
} else if (x) {
f.splice(x - 1, 2, [f[x - 1], f[x]].join(sep));
} else f = f.shift().split(sep).concat(f);
} else f[x].replace(/""/g, '"');
} a[i] = f;
}
return a;
}
}
就我个人而言,我喜欢使用deno std库,因为大多数模块都与浏览器正式兼容
问题是std是typescript,但官方解决方案可能会在未来发生https://github.com/denoland/deno_std/issues/641 https://github.com/denoland/dotland/issues/1728
目前有一个积极维护的飞行转译器https://bundle.deno.dev/
你可以像这样简单地使用它
<script type="module">
import { parse } from "https://bundle.deno.dev/https://deno.land/std@0.126.0/encoding/csv.ts"
console.log(await parse("a,b,c\n1,2,3"))
</script>
下面是我简单的JavaScript代码:
let a = 'one,two,"three, but with a comma",four,"five, with ""quotes"" in it.."'
console.log(splitQuotes(a))
function splitQuotes(line) {
if(line.indexOf('"') < 0)
return line.split(',')
let result = [], cell = '', quote = false;
for(let i = 0; i < line.length; i++) {
char = line[i]
if(char == '"' && line[i+1] == '"') {
cell += char
i++
} else if(char == '"') {
quote = !quote;
} else if(!quote && char == ',') {
result.push(cell)
cell = ''
} else {
cell += char
}
if ( i == line.length-1 && cell) {
result.push(cell)
}
}
return result
}
我有一个实现作为电子表格项目的一部分。
此代码尚未经过全面测试,但欢迎任何人使用它。
正如一些答案所指出的那样,如果您实际上有DSV或TSV文件,您的实现可以简单得多,因为它们不允许在值中使用记录和字段分隔符。另一方面,CSV实际上可以在字段中使用逗号和换行符,这打破了大多数正则表达式和基于分割的方法。
var CSV = {
parse: function(csv, reviver) {
reviver = reviver || function(r, c, v) { return v; };
var chars = csv.split(''), c = 0, cc = chars.length, start, end, table = [], row;
while (c < cc) {
table.push(row = []);
while (c < cc && '\r' !== chars[c] && '\n' !== chars[c]) {
start = end = c;
if ('"' === chars[c]){
start = end = ++c;
while (c < cc) {
if ('"' === chars[c]) {
if ('"' !== chars[c+1]) {
break;
}
else {
chars[++c] = ''; // unescape ""
}
}
end = ++c;
}
if ('"' === chars[c]) {
++c;
}
while (c < cc && '\r' !== chars[c] && '\n' !== chars[c] && ',' !== chars[c]) {
++c;
}
} else {
while (c < cc && '\r' !== chars[c] && '\n' !== chars[c] && ',' !== chars[c]) {
end = ++c;
}
}
row.push(reviver(table.length-1, row.length, chars.slice(start, end).join('')));
if (',' === chars[c]) {
++c;
}
}
if ('\r' === chars[c]) {
++c;
}
if ('\n' === chars[c]) {
++c;
}
}
return table;
},
stringify: function(table, replacer) {
replacer = replacer || function(r, c, v) { return v; };
var csv = '', c, cc, r, rr = table.length, cell;
for (r = 0; r < rr; ++r) {
if (r) {
csv += '\r\n';
}
for (c = 0, cc = table[r].length; c < cc; ++c) {
if (c) {
csv += ',';
}
cell = replacer(r, c, table[r][c]);
if (/[,\r\n"]/.test(cell)) {
cell = '"' + cell.replace(/"/g, '""') + '"';
}
csv += (cell || 0 === cell) ? cell : '';
}
}
return csv;
}
};
推荐文章
- 给一个数字加上st, nd, rd和th(序数)后缀
- 如何以编程方式触发引导模式?
- setTimeout带引号和不带括号的区别
- 在JS的Chrome CPU配置文件中,'self'和'total'之间的差异
- 用javascript检查输入字符串中是否包含数字
- 如何使用JavaScript分割逗号分隔字符串?
- 在Javascript中~~(“双波浪号”)做什么?
- 谷歌chrome扩展::console.log()从后台页面?
- 未捕获的SyntaxError:
- [].slice的解释。调用javascript?
- jQuery日期/时间选择器
- 我如何预填充一个jQuery Datepicker文本框与今天的日期?
- 数组的indexOf函数和findIndex函数的区别
- jQuery添加必要的输入字段
- Access-Control-Allow-Origin不允许Origin < Origin >