我试图读取一个大文件一行在一次。我在Quora上找到了一个关于这个主题的问题,但我错过了一些联系,把整个事情联系在一起。
var Lazy=require("lazy");
new Lazy(process.stdin)
.lines
.forEach(
function(line) {
console.log(line.toString());
}
);
process.stdin.resume();
我想要弄清楚的是如何一次从文件中读取一行,而不是像本例中那样读取STDIN。
我试着:
fs.open('./VeryBigFile.csv', 'r', '0666', Process);
function Process(err, fd) {
if (err) throw err;
// DO lazy read
}
但这并不奏效。我知道在必要时我可以使用PHP之类的东西,但我想弄清楚这个问题。
我不认为其他答案会起作用,因为文件比我运行它的服务器的内存大得多。
这是我最喜欢的浏览文件的方式,是使用现代async/await进行渐进式(不是“slurp”或全内存方式)文件读取的简单本机解决方案。在处理大型文本文件时,我发现这是一种“自然”的解决方案,而不必求助于readline包或任何非核心依赖项。
let buf = '';
for await ( const chunk of fs.createReadStream('myfile') ) {
const lines = buf.concat(chunk).split(/\r?\n/);
buf = lines.pop();
for( const line of lines ) {
console.log(line);
}
}
if(buf.length) console.log(buf); // last line, if file does not end with newline
您可以在fs中调整编码。creatererestream或使用chunk.toString(<arg>)。这也让你更好地微调线分裂到你的口味,即。使用.split(/\n+/)跳过空行,用{highWaterMark: <chunkSize>}控制块大小。
Don't forget to create a function like processLine(line) to avoid repeating the line processing code twice due to the ending buf leftover. Unfortunately, the ReadStream instance does not update its end-of-file flags in this setup, so there's no way, afaik, to detect within the loop that we're in the last iteration without some more verbose tricks like comparing the file size from a fs.Stats() with .bytesRead. Hence the final buf processing solution, unless you're absolutely sure your file ends with a newline \n, in which case the for await loop should suffice.
★如果你更喜欢事件异步版本,这将是它:
let buf = '';
fs.createReadStream('myfile')
.on('data', chunk => {
const lines = buf.concat(chunk).split(/\r?\n/);
buf = lines.pop();
for( const line of lines ) {
console.log(line);
}
})
.on('end', () => buf.length && console.log(buf) );
★现在如果你不介意导入流核心包,那么这是等效的管道流版本,它允许链接转换,如gzip解压:
const { Writable } = require('stream');
let buf = '';
fs.createReadStream('myfile').pipe(
new Writable({
write: (chunk, enc, next) => {
const lines = buf.concat(chunk).split(/\r?\n/);
buf = lines.pop();
for (const line of lines) {
console.log(line);
}
next();
}
})
).on('finish', () => buf.length && console.log(buf) );
function createLineReader(fileName){
var EM = require("events").EventEmitter
var ev = new EM()
var stream = require("fs").createReadStream(fileName)
var remainder = null;
stream.on("data",function(data){
if(remainder != null){//append newly received data chunk
var tmp = new Buffer(remainder.length+data.length)
remainder.copy(tmp)
data.copy(tmp,remainder.length)
data = tmp;
}
var start = 0;
for(var i=0; i<data.length; i++){
if(data[i] == 10){ //\n new line
var line = data.slice(start,i)
ev.emit("line", line)
start = i+1;
}
}
if(start<data.length){
remainder = data.slice(start);
}else{
remainder = null;
}
})
stream.on("end",function(){
if(null!=remainder) ev.emit("line",remainder)
})
return ev
}
//---------main---------------
fileName = process.argv[2]
lineReader = createLineReader(fileName)
lineReader.on("line",function(line){
console.log(line.toString())
//console.log("++++++++++++++++++++")
})