关于使用fs.readdir进行异步目录搜索有什么想法吗?我意识到我们可以引入递归,并调用read目录函数来读取下一个目录,但我有点担心它不是异步的…

什么好主意吗?我已经看了node-walk,它很棒,但它不能像readdir那样只给我数组中的文件。虽然

寻找这样的输出…

['file1.txt', 'file2.txt', 'dir/file3.txt']

当前回答

现代基于promise的读dir递归版本:

const fs = require('fs');
const path = require('path');

const readDirRecursive = async (filePath) => {
    const dir = await fs.promises.readdir(filePath);
    const files = await Promise.all(dir.map(async relativePath => {
        const absolutePath = path.join(filePath, relativePath);
        const stat = await fs.promises.lstat(absolutePath);

        return stat.isDirectory() ? readDirRecursive(absolutePath) : absolutePath;
    }));

    return files.flat();
}

其他回答

我最近编写了这个代码,并认为在这里分享它是有意义的。代码使用了异步库。

var fs = require('fs');
var async = require('async');

var scan = function(dir, suffix, callback) {
  fs.readdir(dir, function(err, files) {
    var returnFiles = [];
    async.each(files, function(file, next) {
      var filePath = dir + '/' + file;
      fs.stat(filePath, function(err, stat) {
        if (err) {
          return next(err);
        }
        if (stat.isDirectory()) {
          scan(filePath, suffix, function(err, results) {
            if (err) {
              return next(err);
            }
            returnFiles = returnFiles.concat(results);
            next();
          })
        }
        else if (stat.isFile()) {
          if (file.indexOf(suffix, file.length - suffix.length) !== -1) {
            returnFiles.push(filePath);
          }
          next();
        }
      });
    }, function(err) {
      callback(err, returnFiles);
    });
  });
};

你可以这样使用它:

scan('/some/dir', '.ext', function(err, files) {
  // Do something with files that ends in '.ext'.
  console.log(files);
});

它使用了节点8中最多的新功能,包括Promises、util/promisify、destructuring、async-await、map+reduce等等,让你的同事在试图弄清楚发生了什么时挠头。

节点 8+

没有外部依赖。

const { promisify } = require('util');
const { resolve } = require('path');
const fs = require('fs');
const readdir = promisify(fs.readdir);
const stat = promisify(fs.stat);

async function getFiles(dir) {
  const subdirs = await readdir(dir);
  const files = await Promise.all(subdirs.map(async (subdir) => {
    const res = resolve(dir, subdir);
    return (await stat(res)).isDirectory() ? getFiles(res) : res;
  }));
  return files.reduce((a, f) => a.concat(f), []);
}

使用

getFiles(__dirname)
  .then(files => console.log(files))
  .catch(e => console.error(e));

节点 10.10+

更新到节点10+,甚至更多的whizbang:

const { resolve } = require('path');
const { readdir } = require('fs').promises;

async function getFiles(dir) {
  const dirents = await readdir(dir, { withFileTypes: true });
  const files = await Promise.all(dirents.map((dirent) => {
    const res = resolve(dir, dirent.name);
    return dirent.isDirectory() ? getFiles(res) : res;
  }));
  return Array.prototype.concat(...files);
}

请注意,从节点11.15.0开始,您可以使用files.flat()而不是array. prototype.concat(…files)来扁平化files数组。

11 +节点

如果你想让所有人都大吃一惊,你可以使用下面使用异步迭代器的版本。除了非常酷之外,它还允许使用者每次提取一个结果,这使得它更适合于真正大的目录。

const { resolve } = require('path');
const { readdir } = require('fs').promises;

async function* getFiles(dir) {
  const dirents = await readdir(dir, { withFileTypes: true });
  for (const dirent of dirents) {
    const res = resolve(dir, dirent.name);
    if (dirent.isDirectory()) {
      yield* getFiles(res);
    } else {
      yield res;
    }
  }
}

用法发生了变化,因为返回类型现在是异步迭代器而不是promise

;(async () => {
  for await (const f of getFiles('.')) {
    console.log(f);
  }
})()

如果有人感兴趣,我在这里写了更多关于异步迭代器的文章:https://qwtel.com/posts/software/async-generators-in-the-wild/

这就是我的答案。希望它能帮助到一些人。

我的重点是使搜索例程可以停在任何地方,对于找到的文件,告诉原始路径的相对深度。

var _fs = require('fs');
var _path = require('path');
var _defer = process.nextTick;

// next() will pop the first element from an array and return it, together with
// the recursive depth and the container array of the element. i.e. If the first
// element is an array, it'll be dug into recursively. But if the first element is
// an empty array, it'll be simply popped and ignored.
// e.g. If the original array is [1,[2],3], next() will return [1,0,[[2],3]], and
// the array becomes [[2],3]. If the array is [[[],[1,2],3],4], next() will return
// [1,2,[2]], and the array becomes [[[2],3],4].
// There is an infinity loop `while(true) {...}`, because I optimized the code to
// make it a non-recursive version.
var next = function(c) {
    var a = c;
    var n = 0;
    while (true) {
        if (a.length == 0) return null;
        var x = a[0];
        if (x.constructor == Array) {
            if (x.length > 0) {
                a = x;
                ++n;
            } else {
                a.shift();
                a = c;
                n = 0;
            }
        } else {
            a.shift();
            return [x, n, a];
        }
    }
}

// cb is the callback function, it have four arguments:
//    1) an error object if any exception happens;
//    2) a path name, may be a directory or a file;
//    3) a flag, `true` means directory, and `false` means file;
//    4) a zero-based number indicates the depth relative to the original path.
// cb should return a state value to tell whether the searching routine should
// continue: `true` means it should continue; `false` means it should stop here;
// but for a directory, there is a third state `null`, means it should do not
// dig into the directory and continue searching the next file.
var ls = function(path, cb) {
    // use `_path.resolve()` to correctly handle '.' and '..'.
    var c = [ _path.resolve(path) ];
    var f = function() {
        var p = next(c);
        p && s(p);
    };
    var s = function(p) {
        _fs.stat(p[0], function(err, ss) {
            if (err) {
                // use `_defer()` to turn a recursive call into a non-recursive call.
                cb(err, p[0], null, p[1]) && _defer(f);
            } else if (ss.isDirectory()) {
                var y = cb(null, p[0], true, p[1]);
                if (y) r(p);
                else if (y == null) _defer(f);
            } else {
                cb(null, p[0], false, p[1]) && _defer(f);
            }
        });
    };
    var r = function(p) {
        _fs.readdir(p[0], function(err, files) {
            if (err) {
                cb(err, p[0], true, p[1]) && _defer(f);
            } else {
                // not use `Array.prototype.map()` because we can make each change on site.
                for (var i = 0; i < files.length; i++) {
                    files[i] = _path.join(p[0], files[i]);
                }
                p[2].unshift(files);
                _defer(f);
            }
        });
    }
    _defer(f);
};

var printfile = function(err, file, isdir, n) {
    if (err) {
        console.log('-->   ' + ('[' + n + '] ') + file + ': ' + err);
        return true;
    } else {
        console.log('... ' + ('[' + n + '] ') + (isdir ? 'D' : 'F') + ' ' + file);
        return true;
    }
};

var path = process.argv[2];
ls(path, printfile);

为了好玩,这里有一个基于流的版本,它与highland.js streams库一起工作。作者之一是维克多·伍。

###
  directory >---m------> dirFilesStream >---------o----> out
                |                                 |
                |                                 |
                +--------< returnPipe <-----------+

  legend: (m)erge  (o)bserve

 + directory         has the initial file
 + dirListStream     does a directory listing
 + out               prints out the full path of the file
 + returnPipe        runs stat and filters on directories

###

_ = require('highland')
fs = require('fs')
fsPath = require('path')

directory = _(['someDirectory'])
mergePoint = _()
dirFilesStream = mergePoint.merge().flatMap((parentPath) ->
  _.wrapCallback(fs.readdir)(parentPath).sequence().map (path) ->
    fsPath.join parentPath, path
)
out = dirFilesStream
# Create the return pipe
returnPipe = dirFilesStream.observe().flatFilter((path) ->
  _.wrapCallback(fs.stat)(path).map (v) ->
    v.isDirectory()
)
# Connect up the merge point now that we have all of our streams.
mergePoint.write directory
mergePoint.write returnPipe
mergePoint.end()
# Release backpressure.  This will print files as they are discovered
out.each H.log
# Another way would be to queue them all up and then print them all out at once.
# out.toArray((files)-> console.log(files))

我喜欢上面chjj的答案,如果没有那个开始,我就无法创建我的并行循环版本。

var fs = require("fs");

var tree = function(dir, done) {
  var results = {
        "path": dir
        ,"children": []
      };
  fs.readdir(dir, function(err, list) {
    if (err) { return done(err); }
    var pending = list.length;
    if (!pending) { return done(null, results); }
    list.forEach(function(file) {
      fs.stat(dir + '/' + file, function(err, stat) {
        if (stat && stat.isDirectory()) {
          tree(dir + '/' + file, function(err, res) {
            results.children.push(res);
            if (!--pending){ done(null, results); }
          });
        } else {
          results.children.push({"path": dir + "/" + file});
          if (!--pending) { done(null, results); }
        }
      });
    });
  });
};

module.exports = tree;

我也创建了一个Gist。欢迎评论。我仍然在NodeJS领域起步,所以这是我希望学到更多的一种方式。