如何在不使用第三方库的情况下使用Node.js下载文件?
我不需要什么特别的东西。我只想从给定的URL下载文件,然后将其保存到给定的目录。
如何在不使用第三方库的情况下使用Node.js下载文件?
我不需要什么特别的东西。我只想从给定的URL下载文件,然后将其保存到给定的目录。
当前回答
var fs = require('fs'),
request = require('request');
var download = function(uri, filename, callback){
request.head(uri, function(err, res, body){
console.log('content-type:', res.headers['content-type']);
console.log('content-length:', res.headers['content-length']);
request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
});
};
download('https://www.cryptocompare.com/media/19684/doge.png', 'icons/taskks12.png', function(){
console.log('done');
});
其他回答
这里还有另一种方法来处理它没有第三方依赖,也搜索重定向:
var download = function(url, dest, cb) {
var file = fs.createWriteStream(dest);
https.get(url, function(response) {
if ([301,302].indexOf(response.statusCode) !== -1) {
body = [];
download(response.headers.location, dest, cb);
}
response.pipe(file);
file.on('finish', function() {
file.close(cb); // close() is async, call cb after close completes.
});
});
}
说到处理错误,监听请求错误甚至更好。我甚至会通过检查响应代码来验证。这里认为只有200个响应代码成功,但其他代码可能很好。
const fs = require('fs');
const http = require('http');
const download = (url, dest, cb) => {
const file = fs.createWriteStream(dest);
const request = http.get(url, (response) => {
// check if response is success
if (response.statusCode !== 200) {
return cb('Response status was ' + response.statusCode);
}
response.pipe(file);
});
// close() is async, call cb after close completes
file.on('finish', () => file.close(cb));
// check for request error too
request.on('error', (err) => {
fs.unlink(dest, () => cb(err.message)); // delete the (partial) file and then return the error
});
file.on('error', (err) => { // Handle errors
fs.unlink(dest, () => cb(err.message)); // delete the (partial) file and then return the error
});
};
尽管这段代码相对简单,但我建议使用request模块,因为它处理更多http不支持的协议(你好,HTTPS!)。
可以这样做:
const fs = require('fs');
const request = require('request');
const download = (url, dest, cb) => {
const file = fs.createWriteStream(dest);
const sendReq = request.get(url);
// verify response code
sendReq.on('response', (response) => {
if (response.statusCode !== 200) {
return cb('Response status was ' + response.statusCode);
}
sendReq.pipe(file);
});
// close() is async, call cb after close completes
file.on('finish', () => file.close(cb));
// check for request errors
sendReq.on('error', (err) => {
fs.unlink(dest, () => cb(err.message)); // delete the (partial) file and then return the error
});
file.on('error', (err) => { // Handle errors
fs.unlink(dest, () => cb(err.message)); // delete the (partial) file and then return the error
});
};
编辑:
要使它与https兼容,请更改
const http = require('http');
to
const http = require('https');
编写自己的解决方案,因为现有的不符合我的要求。
包括:
HTTPS下载(http下载时切换包到http) 基于承诺的函数 处理转发路径(状态302) 浏览器头-需要在一些cdn 来自URL的文件名(以及硬编码) 错误处理
打印出来的,更安全。如果你使用的是纯JS(没有Flow,没有TS),可以随意删除类型,或者转换为.d。ts文件
index.js
import httpsDownload from httpsDownload;
httpsDownload('https://example.com/file.zip', './');
httpsDownload.[js|ts]
import https from "https";
import fs from "fs";
import path from "path";
function download(
url: string,
folder?: string,
filename?: string
): Promise<void> {
return new Promise((resolve, reject) => {
const req = https
.request(url, { headers: { "User-Agent": "javascript" } }, (response) => {
if (response.statusCode === 302 && response.headers.location != null) {
download(
buildNextUrl(url, response.headers.location),
folder,
filename
)
.then(resolve)
.catch(reject);
return;
}
const file = fs.createWriteStream(
buildDestinationPath(url, folder, filename)
);
response.pipe(file);
file.on("finish", () => {
file.close();
resolve();
});
})
.on("error", reject);
req.end();
});
}
function buildNextUrl(current: string, next: string) {
const isNextUrlAbsolute = RegExp("^(?:[a-z]+:)?//").test(next);
if (isNextUrlAbsolute) {
return next;
} else {
const currentURL = new URL(current);
const fullHost = `${currentURL.protocol}//${currentURL.hostname}${
currentURL.port ? ":" + currentURL.port : ""
}`;
return `${fullHost}${next}`;
}
}
function buildDestinationPath(url: string, folder?: string, filename?: string) {
return path.join(folder ?? "./", filename ?? generateFilenameFromPath(url));
}
function generateFilenameFromPath(url: string): string {
const urlParts = url.split("/");
return urlParts[urlParts.length - 1] ?? "";
}
export default download;
Gfxmonk的答案在回调和file.close()完成之间有一个非常紧张的数据竞赛。File.close()实际上接受一个回调函数,该函数在close完成时被调用。否则,立即使用文件可能会失败(非常罕见!)。
一个完整的解决方案是:
var http = require('http');
var fs = require('fs');
var download = function(url, dest, cb) {
var file = fs.createWriteStream(dest);
var request = http.get(url, function(response) {
response.pipe(file);
file.on('finish', function() {
file.close(cb); // close() is async, call cb after close completes.
});
});
}
如果不等待finish事件,幼稚的脚本可能最终得到一个不完整的文件。如果不通过close调度cb回调,您可能会在访问文件和文件实际准备就绪之间出现竞争。
根据上面的其他答案和一些微妙的问题,下面是我的尝试。
Check the file does not exist before hitting the network by using fs.access. Only create the fs.createWriteStream if you get a 200 OK status code. This reduces the amount of fs.unlink commands required to tidy up temporary file handles. Even on a 200 OK we can still possibly reject due to an EEXIST file already exists (imagine another process created the file whilst we were doing network calls). Recursively call download if you get a 301 Moved Permanently or 302 Found (Moved Temporarily) redirect following the link location provided in the header. The issue with some of the other answers recursively calling download was that they called resolve(download) instead of download(...).then(() => resolve()) so the Promise would return before the download actually finished. This way the nested chain of promises resolve in the correct order. It might seem cool to clean up the temp file asynchronously, but I chose to reject only after that completed too so I know that everything start to finish is done when this promise resolves or rejects.
const https = require('https');
const fs = require('fs');
/**
* Download a resource from `url` to `dest`.
* @param {string} url - Valid URL to attempt download of resource
* @param {string} dest - Valid path to save the file.
* @returns {Promise<void>} - Returns asynchronously when successfully completed download
*/
function download(url, dest) {
return new Promise((resolve, reject) => {
// Check file does not exist yet before hitting network
fs.access(dest, fs.constants.F_OK, (err) => {
if (err === null) reject('File already exists');
const request = https.get(url, response => {
if (response.statusCode === 200) {
const file = fs.createWriteStream(dest, { flags: 'wx' });
file.on('finish', () => resolve());
file.on('error', err => {
file.close();
if (err.code === 'EEXIST') reject('File already exists');
else fs.unlink(dest, () => reject(err.message)); // Delete temp file
});
response.pipe(file);
} else if (response.statusCode === 302 || response.statusCode === 301) {
//Recursively follow redirects, only a 200 will resolve.
download(response.headers.location, dest).then(() => resolve());
} else {
reject(`Server responded with ${response.statusCode}: ${response.statusMessage}`);
}
});
request.on('error', err => {
reject(err.message);
});
});
});
}