关于使用fs.readdir进行异步目录搜索有什么想法吗?我意识到我们可以引入递归,并调用read目录函数来读取下一个目录,但我有点担心它不是异步的…

什么好主意吗?我已经看了node-walk,它很棒,但它不能像readdir那样只给我数组中的文件。虽然

寻找这样的输出…

['file1.txt', 'file2.txt', 'dir/file3.txt']

当前回答

对于Node 10.3+,这里是一个For -await解决方案:

#!/usr/bin/env node

const FS = require('fs');
const Util = require('util');
const readDir = Util.promisify(FS.readdir);
const Path = require('path');

async function* readDirR(path) {
    const entries = await readDir(path,{withFileTypes:true});
    for(let entry of entries) {
        const fullPath = Path.join(path,entry.name);
        if(entry.isDirectory()) {
            yield* readDirR(fullPath);
        } else {
            yield fullPath;
        }
    }
}

async function main() {
    const start = process.hrtime.bigint();
    for await(const file of readDirR('/mnt/home/media/Unsorted')) {
        console.log(file);
    }
    console.log((process.hrtime.bigint()-start)/1000000n);
}

main().catch(err => {
    console.error(err);
});

这种解决方案的好处是,您可以立即开始处理结果;例如,读取媒体目录中的所有文件需要12秒,但如果我这样做,我可以在几毫秒内得到第一个结果。

其他回答

还有一种方法。我把它放在这里。也许将来它会对某人有用。

const fs = require("fs");
const { promisify } = require("util");
const p = require("path");
const readdir = promisify(fs.readdir);

async function getFiles(path) {
  try {
    const entries = await readdir(path, { withFileTypes: true });

    const files = entries
      .filter((file) => !file.isDirectory())
      .map((file) => ({
        path: `${path}/${file.name}`,
        ext: p.extname(`${path}/${file.name}`),
        pathDir: path,
      }));

    const folders = entries.filter((folder) => folder.isDirectory());

    for (const folder of folders) {
      files.push(...(await getFiles(`${path}/${folder.name}`)));
    }
    return files;
  } catch (error) {
    return error;
  }
}

用法:

getFiles(rootFolderPath)
 .then()
 .catch()

这是我如何使用nodejs的fs。递归搜索目录的Readdir函数。

const fs = require('fs');
const mime = require('mime-types');
const readdirRecursivePromise = path => {
    return new Promise((resolve, reject) => {
        fs.readdir(path, (err, directoriesPaths) => {
            if (err) {
                reject(err);
            } else {
                if (directoriesPaths.indexOf('.DS_Store') != -1) {
                    directoriesPaths.splice(directoriesPaths.indexOf('.DS_Store'), 1);
                }
                directoriesPaths.forEach((e, i) => {
                    directoriesPaths[i] = statPromise(`${path}/${e}`);
                });
                Promise.all(directoriesPaths).then(out => {
                    resolve(out);
                }).catch(err => {
                    reject(err);
                });
            }
        });
    });
};
const statPromise = path => {
    return new Promise((resolve, reject) => {
        fs.stat(path, (err, stats) => {
            if (err) {
                reject(err);
            } else {
                if (stats.isDirectory()) {
                    readdirRecursivePromise(path).then(out => {
                        resolve(out);
                    }).catch(err => {
                        reject(err);
                    });
                } else if (stats.isFile()) {
                    resolve({
                        'path': path,
                        'type': mime.lookup(path)
                    });
                } else {
                    reject(`Error parsing path: ${path}`);
                }
            }
        });
    });
};
const flatten = (arr, result = []) => {
    for (let i = 0, length = arr.length; i < length; i++) {
        const value = arr[i];
        if (Array.isArray(value)) {
            flatten(value, result);
        } else {
            result.push(value);
        }
    }
    return result;
};

假设在节点项目根目录中有一个名为“/database”的路径。一旦这个承诺被解决,它应该吐出'/database'下的每个文件的数组。

readdirRecursivePromise('database').then(out => {
    console.log(flatten(out));
}).catch(err => {
    console.log(err);
});

它使用了节点8中最多的新功能,包括Promises、util/promisify、destructuring、async-await、map+reduce等等,让你的同事在试图弄清楚发生了什么时挠头。

节点 8+

没有外部依赖。

const { promisify } = require('util');
const { resolve } = require('path');
const fs = require('fs');
const readdir = promisify(fs.readdir);
const stat = promisify(fs.stat);

async function getFiles(dir) {
  const subdirs = await readdir(dir);
  const files = await Promise.all(subdirs.map(async (subdir) => {
    const res = resolve(dir, subdir);
    return (await stat(res)).isDirectory() ? getFiles(res) : res;
  }));
  return files.reduce((a, f) => a.concat(f), []);
}

使用

getFiles(__dirname)
  .then(files => console.log(files))
  .catch(e => console.error(e));

节点 10.10+

更新到节点10+,甚至更多的whizbang:

const { resolve } = require('path');
const { readdir } = require('fs').promises;

async function getFiles(dir) {
  const dirents = await readdir(dir, { withFileTypes: true });
  const files = await Promise.all(dirents.map((dirent) => {
    const res = resolve(dir, dirent.name);
    return dirent.isDirectory() ? getFiles(res) : res;
  }));
  return Array.prototype.concat(...files);
}

请注意,从节点11.15.0开始,您可以使用files.flat()而不是array. prototype.concat(…files)来扁平化files数组。

11 +节点

如果你想让所有人都大吃一惊,你可以使用下面使用异步迭代器的版本。除了非常酷之外,它还允许使用者每次提取一个结果,这使得它更适合于真正大的目录。

const { resolve } = require('path');
const { readdir } = require('fs').promises;

async function* getFiles(dir) {
  const dirents = await readdir(dir, { withFileTypes: true });
  for (const dirent of dirents) {
    const res = resolve(dir, dirent.name);
    if (dirent.isDirectory()) {
      yield* getFiles(res);
    } else {
      yield res;
    }
  }
}

用法发生了变化,因为返回类型现在是异步迭代器而不是promise

;(async () => {
  for await (const f of getFiles('.')) {
    console.log(f);
  }
})()

如果有人感兴趣,我在这里写了更多关于异步迭代器的文章:https://qwtel.com/posts/software/async-generators-in-the-wild/

There are basically two ways of accomplishing this. In an async environment you'll notice that there are two kinds of loops: serial and parallel. A serial loop waits for one iteration to complete before it moves onto the next iteration - this guarantees that every iteration of the loop completes in order. In a parallel loop, all the iterations are started at the same time, and one may complete before another, however, it is much faster than a serial loop. So in this case, it's probably better to use a parallel loop because it doesn't matter what order the walk completes in, just as long as it completes and returns the results (unless you want them in order).

一个平行循环看起来是这样的:

var fs = require('fs');
var path = require('path');
var walk = function(dir, done) {
  var results = [];
  fs.readdir(dir, function(err, list) {
    if (err) return done(err);
    var pending = list.length;
    if (!pending) return done(null, results);
    list.forEach(function(file) {
      file = path.resolve(dir, file);
      fs.stat(file, function(err, stat) {
        if (stat && stat.isDirectory()) {
          walk(file, function(err, res) {
            results = results.concat(res);
            if (!--pending) done(null, results);
          });
        } else {
          results.push(file);
          if (!--pending) done(null, results);
        }
      });
    });
  });
};

一个串行循环看起来像这样:

var fs = require('fs');
var path = require('path');
var walk = function(dir, done) {
  var results = [];
  fs.readdir(dir, function(err, list) {
    if (err) return done(err);
    var i = 0;
    (function next() {
      var file = list[i++];
      if (!file) return done(null, results);
      file = path.resolve(dir, file);
      fs.stat(file, function(err, stat) {
        if (stat && stat.isDirectory()) {
          walk(file, function(err, res) {
            results = results.concat(res);
            next();
          });
        } else {
          results.push(file);
          next();
        }
      });
    })();
  });
};

并且在你的主目录中测试它(警告:如果你的主目录中有很多东西,结果列表将会非常大):

walk(process.env.HOME, function(err, results) {
  if (err) throw err;
  console.log(results);
});

编辑:改进的示例。

我修改了老特雷弗的承诺为蓝鸟工作的基础上的答案

var fs = require('fs'),
    path = require('path'),
    Promise = require('bluebird');

var readdirAsync = Promise.promisify(fs.readdir);
var statAsync = Promise.promisify(fs.stat);
function walkFiles (directory) {
    var results = [];
    return readdirAsync(directory).map(function(file) {
        file = path.join(directory, file);
        return statAsync(file).then(function(stat) {
            if (stat.isFile()) {
                return results.push(file);
            }
            return walkFiles(file).then(function(filesInDir) {
                results = results.concat(filesInDir);
            });
        });
    }).then(function() {
        return results;
    });
}

//use
walkDir(__dirname).then(function(files) {
    console.log(files);
}).catch(function(e) {
    console.error(e); {
});