我可以在哪里找到一些JavaScript代码来解析CSV数据?


当前回答

这是另一个解决方案。这个用途:

一个粗略的全局正则表达式,用于分割CSV字符串(包括引号和逗号) 用于清除周围引号和尾随逗号的细粒度正则表达式 此外,还具有区分字符串、数字、布尔值和空值的类型更正

对于以下输入字符串:

"This is\, a value",Hello,4,-123,3.1415,'This is also\, possible',true,

代码输出:

[
  "This is, a value",
  "Hello",
  4,
  -123,
  3.1415,
  "This is also, possible",
  true,
  null
]

下面是我在一个可运行的代码片段中实现的parseCSVLine():

function parseCSVLine(text) { return text.match( /\s*(\"[^"]*\"|'[^']*'|[^,]*)\s*(,|$)/g ).map( function (text) { let m; if (m = text.match(/^\s*,?$/)) return null; // null value if (m = text.match(/^\s*\"([^"]*)\"\s*,?$/)) return m[1]; // Double Quoted Text if (m = text.match(/^\s*'([^']*)'\s*,?$/)) return m[1]; // Single Quoted Text if (m = text.match(/^\s*(true|false)\s*,?$/)) return m[1] === "true"; // Boolean if (m = text.match(/^\s*((?:\+|\-)?\d+)\s*,?$/)) return parseInt(m[1]); // Integer Number if (m = text.match(/^\s*((?:\+|\-)?\d*\.\d*)\s*,?$/)) return parseFloat(m[1]); // Floating Number if (m = text.match(/^\s*(.*?)\s*,?$/)) return m[1]; // Unquoted Text return text; } ); } let data = `"This is\, a value",Hello,4,-123,3.1415,'This is also\, possible',true,`; let obj = parseCSVLine(data); console.log( JSON.stringify( obj, undefined, 2 ) );

其他回答

您可以使用本博客条目中提到的CSVToArray()函数。

<script type="text/javascript">
    // ref: http://stackoverflow.com/a/1293163/2343
    // This will parse a delimited string into an array of
    // arrays. The default delimiter is the comma, but this
    // can be overriden in the second argument.
    function CSVToArray( strData, strDelimiter ){
        // Check to see if the delimiter is defined. If not,
        // then default to comma.
        strDelimiter = (strDelimiter || ",");

        // Create a regular expression to parse the CSV values.
        var objPattern = new RegExp(
            (
                // Delimiters.
                "(\\" + strDelimiter + "|\\r?\\n|\\r|^)" +

                // Quoted fields.
                "(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|" +

                // Standard fields.
                "([^\"\\" + strDelimiter + "\\r\\n]*))"
            ),
            "gi"
            );


        // Create an array to hold our data. Give the array
        // a default empty first row.
        var arrData = [[]];

        // Create an array to hold our individual pattern
        // matching groups.
        var arrMatches = null;


        // Keep looping over the regular expression matches
        // until we can no longer find a match.
        while (arrMatches = objPattern.exec( strData )){

            // Get the delimiter that was found.
            var strMatchedDelimiter = arrMatches[ 1 ];

            // Check to see if the given delimiter has a length
            // (is not the start of string) and if it matches
            // field delimiter. If id does not, then we know
            // that this delimiter is a row delimiter.
            if (
                strMatchedDelimiter.length &&
                strMatchedDelimiter !== strDelimiter
                ){

                // Since we have reached a new row of data,
                // add an empty row to our data array.
                arrData.push( [] );

            }

            var strMatchedValue;

            // Now that we have our delimiter out of the way,
            // let's check to see which kind of value we
            // captured (quoted or unquoted).
            if (arrMatches[ 2 ]){

                // We found a quoted value. When we capture
                // this value, unescape any double quotes.
                strMatchedValue = arrMatches[ 2 ].replace(
                    new RegExp( "\"\"", "g" ),
                    "\""
                    );

            } else {

                // We found a non-quoted value.
                strMatchedValue = arrMatches[ 3 ];

            }


            // Now that we have our value string, let's add
            // it to the data array.
            arrData[ arrData.length - 1 ].push( strMatchedValue );
        }

        // Return the parsed data.
        return( arrData );
    }

</script>

正则表达式拯救你!这几行代码根据RFC 4180标准处理带有嵌入逗号、引号和换行符的正确引用字段。

function parseCsv(data, fieldSep, newLine) {
    fieldSep = fieldSep || ',';
    newLine = newLine || '\n';
    var nSep = '\x1D';
    var qSep = '\x1E';
    var cSep = '\x1F';
    var nSepRe = new RegExp(nSep, 'g');
    var qSepRe = new RegExp(qSep, 'g');
    var cSepRe = new RegExp(cSep, 'g');
    var fieldRe = new RegExp('(?<=(^|[' + fieldSep + '\\n]))"(|[\\s\\S]+?(?<![^"]"))"(?=($|[' + fieldSep + '\\n]))', 'g');
    var grid = [];
    data.replace(/\r/g, '').replace(/\n+$/, '').replace(fieldRe, function(match, p1, p2) {
        return p2.replace(/\n/g, nSep).replace(/""/g, qSep).replace(/,/g, cSep);
    }).split(/\n/).forEach(function(line) {
        var row = line.split(fieldSep).map(function(cell) {
            return cell.replace(nSepRe, newLine).replace(qSepRe, '"').replace(cSepRe, ',');
        });
        grid.push(row);
    });
    return grid;
}

const csv = 'A1,B1,C1\n"A ""2""","B, 2","C\n2"';
const separator = ',';      // field separator, default: ','
const newline = ' <br /> '; // newline representation in case a field contains newlines, default: '\n' 
var grid = parseCsv(csv, separator, newline);
// expected: [ [ 'A1', 'B1', 'C1' ], [ 'A "2"', 'B, 2', 'C <br /> 2' ] ]

您不需要像lex/yacc这样的解析器-生成器。正则表达式可以正确地处理RFC 4180,这要归功于正向向后查找、反向向后查找和正向向前查找。

克隆/下载代码https://github.com/peterthoeny/parse-csv-js

就我个人而言,我喜欢使用deno std库,因为大多数模块都与浏览器正式兼容

问题是std是typescript,但官方解决方案可能会在未来发生https://github.com/denoland/deno_std/issues/641 https://github.com/denoland/dotland/issues/1728

目前有一个积极维护的飞行转译器https://bundle.deno.dev/

你可以像这样简单地使用它

<script type="module">
import { parse } from "https://bundle.deno.dev/https://deno.land/std@0.126.0/encoding/csv.ts"
console.log(await parse("a,b,c\n1,2,3"))
</script>

只是随便说说而已。我最近遇到了用Javascript解析CSV列的需求,于是我选择了自己的简单解决方案。它满足了我的需要,也可能帮助到其他人。

const csvString = '"Some text, some text",,"",true,false,"more text","more,text, more, text ",true'; const parseCSV = text => { const lines = text.split('\n'); const output = []; lines.forEach(line => { line = line.trim(); if (line.length === 0) return; const skipIndexes = {}; const columns = line.split(','); output.push(columns.reduce((result, item, index) => { if (skipIndexes[index]) return result; if (item.startsWith('"') && !item.endsWith('"')) { while (!columns[index + 1].endsWith('"')) { index++; item += `,${columns[index]}`; skipIndexes[index] = true; } index++; skipIndexes[index] = true; item += `,${columns[index]}`; } result.push(item); return result; }, [])); }); return output; }; console.log(parseCSV(csvString));

我不知道为什么我不能让Kirtan的例子对我有用。它似乎在空字段或带尾随逗号的字段上失败了……

这个似乎可以同时处理这两个问题。

我没有编写解析器代码,只是对解析器函数进行了包装,以使其适用于文件。看到归因。

    var Strings = {
        /**
         * Wrapped CSV line parser
         * @param s      String delimited CSV string
         * @param sep    Separator override
         * @attribution: http://www.greywyvern.com/?post=258 (comments closed on blog :( )
         */
        parseCSV : function(s,sep) {
            // http://stackoverflow.com/questions/1155678/javascript-string-newline-character
            var universalNewline = /\r\n|\r|\n/g;
            var a = s.split(universalNewline);
            for(var i in a){
                for (var f = a[i].split(sep = sep || ","), x = f.length - 1, tl; x >= 0; x--) {
                    if (f[x].replace(/"\s+$/, '"').charAt(f[x].length - 1) == '"') {
                        if ((tl = f[x].replace(/^\s+"/, '"')).length > 1 && tl.charAt(0) == '"') {
                            f[x] = f[x].replace(/^\s*"|"\s*$/g, '').replace(/""/g, '"');
                          } else if (x) {
                        f.splice(x - 1, 2, [f[x - 1], f[x]].join(sep));
                      } else f = f.shift().split(sep).concat(f);
                    } else f[x].replace(/""/g, '"');
                  } a[i] = f;
        }
        return a;
        }
    }