在node.js中读取文件一次一行?

我试图读取一个大文件一行在一次。我在Quora上找到了一个关于这个主题的问题，但我错过了一些联系，把整个事情联系在一起。

 var Lazy=require("lazy");
 new Lazy(process.stdin)
     .lines
     .forEach(
          function(line) { 
              console.log(line.toString()); 
          }
 );
 process.stdin.resume();

我想要弄清楚的是如何一次从文件中读取一行，而不是像本例中那样读取STDIN。

我试着:

 fs.open('./VeryBigFile.csv', 'r', '0666', Process);

 function Process(err, fd) {
    if (err) throw err;
    // DO lazy read 
 }

但这并不奏效。我知道在必要时我可以使用PHP之类的东西，但我想弄清楚这个问题。

我不认为其他答案会起作用，因为文件比我运行它的服务器的内存大得多。

当前回答

var fs = require('fs');

function readfile(name,online,onend,encoding) {
    var bufsize = 1024;
    var buffer = new Buffer(bufsize);
    var bufread = 0;
    var fd = fs.openSync(name,'r');
    var position = 0;
    var eof = false;
    var data = "";
    var lines = 0;

    encoding = encoding || "utf8";

    function readbuf() {
        bufread = fs.readSync(fd,buffer,0,bufsize,position);
        position += bufread;
        eof = bufread ? false : true;
        data += buffer.toString(encoding,0,bufread);
    }

    function getLine() {
        var nl = data.indexOf("\r"), hasnl = nl !== -1;
        if (!hasnl && eof) return fs.closeSync(fd), online(data,++lines), onend(lines); 
        if (!hasnl && !eof) readbuf(), nl = data.indexOf("\r"), hasnl = nl !== -1;
        if (!hasnl) return process.nextTick(getLine);
        var line = data.substr(0,nl);
        data = data.substr(nl+1);
        if (data[0] === "\n") data = data.substr(1);
        online(line,++lines);
        process.nextTick(getLine);
    }
    getLine();
}

我也有同样的问题，想出了上面的解决方案看起来类似于其他，但是异步的，可以读取大文件非常快

希望这能有所帮助

2016-05-01 10:10:39

其他回答

从Node.js v0.12和Node.js v4.0.0开始，有一个稳定的readline核心模块。下面是从文件中读取行，不需要任何外部模块的最简单方法:

const fs = require('fs');
const readline = require('readline');

async function processLineByLine() {
  const fileStream = fs.createReadStream('input.txt');

  const rl = readline.createInterface({
    input: fileStream,
    crlfDelay: Infinity
  });
  // Note: we use the crlfDelay option to recognize all instances of CR LF
  // ('\r\n') in input.txt as a single line break.

  for await (const line of rl) {
    // Each line in input.txt will be successively available here as `line`.
    console.log(`Line from file: ${line}`);
  }
}

processLineByLine();

或者:

var lineReader = require('readline').createInterface({
  input: require('fs').createReadStream('file.in')
});

lineReader.on('line', function (line) {
  console.log('Line from file:', line);
});

最后一行被正确读取(截至Node v0.12或更高版本)，即使没有最后的\n。

更新:此示例已添加到Node的API官方文档中。

2015-09-16 02:59:40

您不必打开文件，而是必须创建一个ReadStream。

fs.createReadStream

然后将该流传递给Lazy

2011-05-27 19:10:30

2019年更新

Nodejs官方文档中已经发布了一个很棒的例子。在这里

这需要在您的机器上安装最新的Nodejs。> 11.4

const fs = require('fs');
const readline = require('readline');

async function processLineByLine() {
  const fileStream = fs.createReadStream('input.txt');

  const rl = readline.createInterface({
    input: fileStream,
    crlfDelay: Infinity
  });
  // Note: we use the crlfDelay option to recognize all instances of CR LF
  // ('\r\n') in input.txt as a single line break.

  for await (const line of rl) {
    // Each line in input.txt will be successively available here as `line`.
    console.log(`Line from file: ${line}`);
  }
}

processLineByLine();

2019-05-09 15:37:00

我对缺乏全面的解决方案感到沮丧，所以我把自己的尝试(git / npm)放在一起。复制粘贴功能列表:

Interactive line processing (callback-based, no loading the entire file into RAM) Optionally, return all lines in an array (detailed or raw mode) Interactively interrupt streaming, or perform map/filter like processing Detect any newline convention (PC/Mac/Linux) Correct eof / last line treatment Correct handling of multi-byte UTF-8 characters Retrieve byte offset and byte length information on per-line basis Random access, using line-based or byte-based offsets Automatically map line-offset information, to speed up random access Zero dependencies Tests

国家卫生研究院?你决定:-)

2014-05-20 22:01:10

当我试图处理这些行并将它们写入另一个流时，我最终使用Lazy逐行读取大量内存泄漏，这是由于节点工作中的drain/pause/resume方式(参见:http://elegantcode.com/2011/04/06/taking-baby-steps-with-node-js-pumping-data-between-streams/(我喜欢这个家伙顺便说一句))。我还没有仔细研究Lazy，无法确切地理解其中的原因，但是我无法暂停读流以允许在Lazy退出的情况下进行排泄。

我写了代码来处理大量的csv文件到xml文档，你可以在这里看到代码:https://github.com/j03m/node-csv2xml

如果你用Lazy line运行之前的版本，它就会泄露。最新的版本完全没有泄露，你可以把它作为一个阅读器/处理器的基础。虽然我有一些定制的东西在里面。

编辑:我想我还应该指出，我用Lazy编写的代码工作得很好，直到我发现自己编写了足够大的xml片段，因为必要而耗尽/暂停/恢复。对于较小的块，这是可以的。

2011-11-09 03:40:49

在node.js中读取文件一次一行?

推荐文章

最新文章

标签