关于使用fs.readdir进行异步目录搜索有什么想法吗?我意识到我们可以引入递归,并调用read目录函数来读取下一个目录,但我有点担心它不是异步的…

什么好主意吗?我已经看了node-walk,它很棒,但它不能像readdir那样只给我数组中的文件。虽然

寻找这样的输出…

['file1.txt', 'file2.txt', 'dir/file3.txt']

当前回答

Filehound库是另一种选择。它将递归地搜索给定目录(默认为工作目录)。它支持各种过滤器、回调、承诺和同步搜索。

例如,搜索当前工作目录中的所有文件(使用回调):

const Filehound = require('filehound');

Filehound.create()
.find((err, files) => {
    if (err) {
        return console.error(`error: ${err}`);
    }
    console.log(files); // array of files
});

或承诺,并指定特定的目录:

const Filehound = require('filehound');

Filehound.create()
.paths("/tmp")
.find()
.each(console.log);

更多的用例和使用示例请参考文档:https://github.com/nspragg/filehound

声明:我是作者。

其他回答

这是另一个实现。上述解决方案都没有任何限制,因此如果您的目录结构很大,它们都会崩溃并最终耗尽资源。

var async = require('async');
var fs = require('fs');
var resolve = require('path').resolve;

var scan = function(path, concurrency, callback) {
    var list = [];

    var walker = async.queue(function(path, callback) {
        fs.stat(path, function(err, stats) {
            if (err) {
                return callback(err);
            } else {
                if (stats.isDirectory()) {
                    fs.readdir(path, function(err, files) {
                        if (err) {
                            callback(err);
                        } else {
                            for (var i = 0; i < files.length; i++) {
                                walker.push(resolve(path, files[i]));
                            }
                            callback();
                        }
                    });
                } else {
                    list.push(path);
                    callback();
                }
            }
        });
    }, concurrency);

    walker.push(path);

    walker.drain = function() {
        callback(list);
    }
};

使用50的并发工作得非常好,并且几乎和小型目录结构的简单实现一样快。

为了好玩,这里有一个基于流的版本,它与highland.js streams库一起工作。作者之一是维克多·伍。

###
  directory >---m------> dirFilesStream >---------o----> out
                |                                 |
                |                                 |
                +--------< returnPipe <-----------+

  legend: (m)erge  (o)bserve

 + directory         has the initial file
 + dirListStream     does a directory listing
 + out               prints out the full path of the file
 + returnPipe        runs stat and filters on directories

###

_ = require('highland')
fs = require('fs')
fsPath = require('path')

directory = _(['someDirectory'])
mergePoint = _()
dirFilesStream = mergePoint.merge().flatMap((parentPath) ->
  _.wrapCallback(fs.readdir)(parentPath).sequence().map (path) ->
    fsPath.join parentPath, path
)
out = dirFilesStream
# Create the return pipe
returnPipe = dirFilesStream.observe().flatFilter((path) ->
  _.wrapCallback(fs.stat)(path).map (v) ->
    v.isDirectory()
)
# Connect up the merge point now that we have all of our streams.
mergePoint.write directory
mergePoint.write returnPipe
mergePoint.end()
# Release backpressure.  This will print files as they are discovered
out.each H.log
# Another way would be to queue them all up and then print them all out at once.
# out.toArray((files)-> console.log(files))

只是简单的散步

let pending = [baseFolderPath]
function walk () {
    pending.shift();
    // do stuffs width pending[0] and change pending items
    if (pending[0]) walk(pending[0])
}
walk(pending[0])

下面是一个获得所有文件包括子目录的递归方法。

const FileSystem = require("fs");
const Path = require("path");

//...

function getFiles(directory) {
    directory = Path.normalize(directory);
    let files = FileSystem.readdirSync(directory).map((file) => directory + Path.sep + file);

    files.forEach((file, index) => {
        if (FileSystem.statSync(file).isDirectory()) {
            Array.prototype.splice.apply(files, [index, 1].concat(getFiles(file)));
        }
    });

    return files;
}

为了以防有人发现它有用,我还整理了一个同步版本。

var walk = function(dir) {
    var results = [];
    var list = fs.readdirSync(dir);
    list.forEach(function(file) {
        file = dir + '/' + file;
        var stat = fs.statSync(file);
        if (stat && stat.isDirectory()) { 
            /* Recurse into a subdirectory */
            results = results.concat(walk(file));
        } else { 
            /* Is a file */
            results.push(file);
        }
    });
    return results;
}

提示:在筛选时使用更少的资源。这个函数本身的过滤器。例如:替换results.push(文件);下面的代码。根据需要调整:

    file_type = file.split(".").pop();
    file_name = file.split(/(\\|\/)/g).pop();
    if (file_type == "json") results.push(file);