我可以在哪里找到一些JavaScript代码来解析CSV数据?


当前回答

我已经构造了这个JavaScript脚本来解析字符串到数组对象中的CSV。我发现最好将整个CSV分解成行、字段并相应地处理它们。我认为这将使您更容易更改代码以满足您的需要。

    //
    //
    // CSV to object
    //
    //

    const new_line_char = '\n';
    const field_separator_char = ',';

    function parse_csv(csv_str) {

        var result = [];

        let line_end_index_moved = false;
        let line_start_index = 0;
        let line_end_index = 0;
        let csr_index = 0;
        let cursor_val = csv_str[csr_index];
        let found_new_line_char = get_new_line_char(csv_str);
        let in_quote = false;

        // Handle \r\n
        if (found_new_line_char == '\r\n') {
            csv_str = csv_str.split(found_new_line_char).join(new_line_char);
        }
        // Handle the last character is not \n
        if (csv_str[csv_str.length - 1] !== new_line_char) {
            csv_str += new_line_char;
        }

        while (csr_index < csv_str.length) {
            if (cursor_val === '"') {
                in_quote = !in_quote;
            } else if (cursor_val === new_line_char) {
                if (in_quote === false) {
                    if (line_end_index_moved && (line_start_index <= line_end_index)) {
                        result.push(parse_csv_line(csv_str.substring(line_start_index, line_end_index)));
                        line_start_index = csr_index + 1;
                    } // Else: just ignore line_end_index has not moved or line has not been sliced for parsing the line
                } // Else: just ignore because we are in a quote
            }
            csr_index++;
            cursor_val = csv_str[csr_index];
            line_end_index = csr_index;
            line_end_index_moved = true;
        }

        // Handle \r\n
        if (found_new_line_char == '\r\n') {
            let new_result = [];
            let curr_row;
            for (var i = 0; i < result.length; i++) {
                curr_row = [];
                for (var j = 0; j < result[i].length; j++) {
                    curr_row.push(result[i][j].split(new_line_char).join('\r\n'));
                }
                new_result.push(curr_row);
            }
            result = new_result;
        }
        return result;
    }

    function parse_csv_line(csv_line_str) {

        var result = [];

        //let field_end_index_moved = false;
        let field_start_index = 0;
        let field_end_index = 0;
        let csr_index = 0;
        let cursor_val = csv_line_str[csr_index];
        let in_quote = false;

        // Pretend that the last char is the separator_char to complete the loop
        csv_line_str += field_separator_char;

        while (csr_index < csv_line_str.length) {
            if (cursor_val === '"') {
                in_quote = !in_quote;
            } else if (cursor_val === field_separator_char) {
                if (in_quote === false) {
                    if (field_start_index <= field_end_index) {
                        result.push(parse_csv_field(csv_line_str.substring(field_start_index, field_end_index)));
                        field_start_index = csr_index + 1;
                    } // Else: just ignore field_end_index has not moved or field has not been sliced for parsing the field
                } // Else: just ignore because we are in quote
            }
            csr_index++;
            cursor_val = csv_line_str[csr_index];
            field_end_index = csr_index;
            field_end_index_moved = true;
        }
        return result;
    }

    function parse_csv_field(csv_field_str) {
        with_quote = (csv_field_str[0] === '"');

        if (with_quote) {
            csv_field_str = csv_field_str.substring(1, csv_field_str.length - 1); // remove the start and end quotes
            csv_field_str = csv_field_str.split('""').join('"'); // handle double quotes
        }
        return csv_field_str;
    }

    // Initial method: check the first newline character only
    function get_new_line_char(csv_str) {
        if (csv_str.indexOf('\r\n') > -1) {
            return '\r\n';
        } else {
            return '\n'
        }
    }

其他回答

就我个人而言,我喜欢使用deno std库,因为大多数模块都与浏览器正式兼容

问题是std是typescript,但官方解决方案可能会在未来发生https://github.com/denoland/deno_std/issues/641 https://github.com/denoland/dotland/issues/1728

目前有一个积极维护的飞行转译器https://bundle.deno.dev/

你可以像这样简单地使用它

<script type="module">
import { parse } from "https://bundle.deno.dev/https://deno.land/std@0.126.0/encoding/csv.ts"
console.log(await parse("a,b,c\n1,2,3"))
</script>

我不知道为什么我不能让Kirtan的例子对我有用。它似乎在空字段或带尾随逗号的字段上失败了……

这个似乎可以同时处理这两个问题。

我没有编写解析器代码,只是对解析器函数进行了包装,以使其适用于文件。看到归因。

    var Strings = {
        /**
         * Wrapped CSV line parser
         * @param s      String delimited CSV string
         * @param sep    Separator override
         * @attribution: http://www.greywyvern.com/?post=258 (comments closed on blog :( )
         */
        parseCSV : function(s,sep) {
            // http://stackoverflow.com/questions/1155678/javascript-string-newline-character
            var universalNewline = /\r\n|\r|\n/g;
            var a = s.split(universalNewline);
            for(var i in a){
                for (var f = a[i].split(sep = sep || ","), x = f.length - 1, tl; x >= 0; x--) {
                    if (f[x].replace(/"\s+$/, '"').charAt(f[x].length - 1) == '"') {
                        if ((tl = f[x].replace(/^\s+"/, '"')).length > 1 && tl.charAt(0) == '"') {
                            f[x] = f[x].replace(/^\s*"|"\s*$/g, '').replace(/""/g, '"');
                          } else if (x) {
                        f.splice(x - 1, 2, [f[x - 1], f[x]].join(sep));
                      } else f = f.shift().split(sep).concat(f);
                    } else f[x].replace(/""/g, '"');
                  } a[i] = f;
        }
        return a;
        }
    }

只是随便说说而已。我最近遇到了用Javascript解析CSV列的需求,于是我选择了自己的简单解决方案。它满足了我的需要,也可能帮助到其他人。

const csvString = '"Some text, some text",,"",true,false,"more text","more,text, more, text ",true'; const parseCSV = text => { const lines = text.split('\n'); const output = []; lines.forEach(line => { line = line.trim(); if (line.length === 0) return; const skipIndexes = {}; const columns = line.split(','); output.push(columns.reduce((result, item, index) => { if (skipIndexes[index]) return result; if (item.startsWith('"') && !item.endsWith('"')) { while (!columns[index + 1].endsWith('"')) { index++; item += `,${columns[index]}`; skipIndexes[index] = true; } index++; skipIndexes[index] = true; item += `,${columns[index]}`; } result.push(item); return result; }, [])); }); return output; }; console.log(parseCSV(csvString));

我已经构造了这个JavaScript脚本来解析字符串到数组对象中的CSV。我发现最好将整个CSV分解成行、字段并相应地处理它们。我认为这将使您更容易更改代码以满足您的需要。

    //
    //
    // CSV to object
    //
    //

    const new_line_char = '\n';
    const field_separator_char = ',';

    function parse_csv(csv_str) {

        var result = [];

        let line_end_index_moved = false;
        let line_start_index = 0;
        let line_end_index = 0;
        let csr_index = 0;
        let cursor_val = csv_str[csr_index];
        let found_new_line_char = get_new_line_char(csv_str);
        let in_quote = false;

        // Handle \r\n
        if (found_new_line_char == '\r\n') {
            csv_str = csv_str.split(found_new_line_char).join(new_line_char);
        }
        // Handle the last character is not \n
        if (csv_str[csv_str.length - 1] !== new_line_char) {
            csv_str += new_line_char;
        }

        while (csr_index < csv_str.length) {
            if (cursor_val === '"') {
                in_quote = !in_quote;
            } else if (cursor_val === new_line_char) {
                if (in_quote === false) {
                    if (line_end_index_moved && (line_start_index <= line_end_index)) {
                        result.push(parse_csv_line(csv_str.substring(line_start_index, line_end_index)));
                        line_start_index = csr_index + 1;
                    } // Else: just ignore line_end_index has not moved or line has not been sliced for parsing the line
                } // Else: just ignore because we are in a quote
            }
            csr_index++;
            cursor_val = csv_str[csr_index];
            line_end_index = csr_index;
            line_end_index_moved = true;
        }

        // Handle \r\n
        if (found_new_line_char == '\r\n') {
            let new_result = [];
            let curr_row;
            for (var i = 0; i < result.length; i++) {
                curr_row = [];
                for (var j = 0; j < result[i].length; j++) {
                    curr_row.push(result[i][j].split(new_line_char).join('\r\n'));
                }
                new_result.push(curr_row);
            }
            result = new_result;
        }
        return result;
    }

    function parse_csv_line(csv_line_str) {

        var result = [];

        //let field_end_index_moved = false;
        let field_start_index = 0;
        let field_end_index = 0;
        let csr_index = 0;
        let cursor_val = csv_line_str[csr_index];
        let in_quote = false;

        // Pretend that the last char is the separator_char to complete the loop
        csv_line_str += field_separator_char;

        while (csr_index < csv_line_str.length) {
            if (cursor_val === '"') {
                in_quote = !in_quote;
            } else if (cursor_val === field_separator_char) {
                if (in_quote === false) {
                    if (field_start_index <= field_end_index) {
                        result.push(parse_csv_field(csv_line_str.substring(field_start_index, field_end_index)));
                        field_start_index = csr_index + 1;
                    } // Else: just ignore field_end_index has not moved or field has not been sliced for parsing the field
                } // Else: just ignore because we are in quote
            }
            csr_index++;
            cursor_val = csv_line_str[csr_index];
            field_end_index = csr_index;
            field_end_index_moved = true;
        }
        return result;
    }

    function parse_csv_field(csv_field_str) {
        with_quote = (csv_field_str[0] === '"');

        if (with_quote) {
            csv_field_str = csv_field_str.substring(1, csv_field_str.length - 1); // remove the start and end quotes
            csv_field_str = csv_field_str.split('""').join('"'); // handle double quotes
        }
        return csv_field_str;
    }

    // Initial method: check the first newline character only
    function get_new_line_char(csv_str) {
        if (csv_str.indexOf('\r\n') > -1) {
            return '\r\n';
        } else {
            return '\n'
        }
    }

这是另一个解决方案。这个用途:

一个粗略的全局正则表达式,用于分割CSV字符串(包括引号和逗号) 用于清除周围引号和尾随逗号的细粒度正则表达式 此外,还具有区分字符串、数字、布尔值和空值的类型更正

对于以下输入字符串:

"This is\, a value",Hello,4,-123,3.1415,'This is also\, possible',true,

代码输出:

[
  "This is, a value",
  "Hello",
  4,
  -123,
  3.1415,
  "This is also, possible",
  true,
  null
]

下面是我在一个可运行的代码片段中实现的parseCSVLine():

function parseCSVLine(text) { return text.match( /\s*(\"[^"]*\"|'[^']*'|[^,]*)\s*(,|$)/g ).map( function (text) { let m; if (m = text.match(/^\s*,?$/)) return null; // null value if (m = text.match(/^\s*\"([^"]*)\"\s*,?$/)) return m[1]; // Double Quoted Text if (m = text.match(/^\s*'([^']*)'\s*,?$/)) return m[1]; // Single Quoted Text if (m = text.match(/^\s*(true|false)\s*,?$/)) return m[1] === "true"; // Boolean if (m = text.match(/^\s*((?:\+|\-)?\d+)\s*,?$/)) return parseInt(m[1]); // Integer Number if (m = text.match(/^\s*((?:\+|\-)?\d*\.\d*)\s*,?$/)) return parseFloat(m[1]); // Floating Number if (m = text.match(/^\s*(.*?)\s*,?$/)) return m[1]; // Unquoted Text return text; } ); } let data = `"This is\, a value",Hello,4,-123,3.1415,'This is also\, possible',true,`; let obj = parseCSVLine(data); console.log( JSON.stringify( obj, undefined, 2 ) );