NodeJS程序,执行完后没有自动退出,可能是什么原因造成的?
const https = require("https");
const http = require("http");
const fs = require("fs");
let fileCount = 0;
function add() {
fileCount++;
console.log(fileCount);
}
const browser = {
"User-Agent":
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36",
};
function queryByPage(page = 1) {
const req = https.request(
`https://www.doutula.com/article/list/?page=${page}`,
{
method: "GET",
"user-agent":
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36",
// 拒绝301
allow_redirects: false,
},
(resq) => {
console.log(`${page} status CODE:`, resq.statusCode);
let data = "";
resq.on("data", (chunk) => {
data = `${data}${chunk}`;
// console.log(`响应主体: ${chunk}`);
});
resq.on("end", () => {
const imgSrc = getImgUrlFormDocument(data, "data-original");
imgSrc.forEach((item) => {
if (item) {
const imgurl = item.slice(15, -1);
const isHttps = /^http/.test(imgurl);
getImg(
isHttps ? imgurl : "https:" + imgurl,
`./Biaoqing/第${page}页`
);
}
});
});
}
);
req.end();
}
function getImg(url, saveLoaclPath = "./新建文件夹") {
const imgUrl = url.split("/");
const imgDefaultName = imgUrl[imgUrl.length - 1];
// 图片请求写入本地
let imgData = "";
const imgRequest = http.get(url, browser, function (req) {
// console.log("img request STATUS CODE:".req.statusCode);
// 改变编码以保存图片
req.setEncoding("binary");
req.on("data", function (chunk) {
imgData += chunk;
});
req.on("end", function () {
fs.mkdir(saveLoaclPath, { recursive: true }, function () {
fs.writeFile(
saveLoaclPath + "/" + imgDefaultName,
imgData,
"binary",
function (err) {
add();
if (err) {
return console.log(err);
}
console.log(
`${saveLoaclPath.slice(2)}/${imgDefaultName} save success !!!`
);
}
);
});
});
});
imgRequest.end();
}
/**
* 从文档中解析img路径
*/
function getImgUrlFormDocument(document, imgProps = "src") {
const img = document.match(/(<img.*?)>/g);
if (imgProps === "img") {
return img;
}
const imgUrlProps = new RegExp(`(${imgProps}=['"].*?)['"]`, "g");
return img.map(
(item) => item.match(imgUrlProps) && item.match(imgUrlProps)[0]
);
}
/**
* 创建请求
*/
Array.from({ length: 2 }).forEach((item, index) => {
queryByPage(index + 1);
});
process.on("uncaughtException", function (err) {
console.log("Caught exception: " + err);
});
3 回复
创建两个请求都成功了,但是写入文件时写到一半时就停住了,也没有报错,麻烦大佬们指导指导
不要用 callback 的方式,用 async 方式,学习 Promise:
http.get
换为 https://github.com/node-modules/urllibmkdir
啥的换为 mkdirp 等 promise 方式的,可以直接用 https://www.npmjs.com/package/mz
fs.writeFile 这种方式也容易因为文件太大造成内存溢出等问题,建议看看 stream 和直接 read/write 的区别