look,是原生,不是类库
用 for 遍历,然后一次拿2个,或者多个出来 再 Promise.all 之后 再 await 呗。
你就按照并发数量数量push到数组里面 用await Promise.all
同意楼上的观点,建议楼主去看看bluebird中Promise.map源码,bluebird可以做到控制并发数量。
来自酷炫的 CNodeMD
await Promise.all([…]);
@Bingmang 好,我看看
@zhang962976642 能不能贴个代码,小白表示蒙蔽
@Sunshine168 能不能贴个代码,小白表示蒙蔽
@MiYogurt 能不能贴个代码,小白表示蒙蔽
var cheerio = require(“cheerio”); var superagent = require(“superagent”); var fs = require(‘fs’); const charset = require(‘superagent-charset’); charset(superagent);
var pages = []; var basicUrl = ‘http://www.u148.net/list/’
async function fetch() { for (let i = 1; i < 1318; i++) { await SpiderPage(basicUrl + i); } }
async function SpiderPage(urllll) { console.log(“urllll::” + urllll) return new Promise((resolve, reject) => { superagent.get(urllll) .charset(‘utf8’) .end(function(err, sres) { if (err) { return next(err); } mkdir(sres)
});
resolve();
})
}
function mkdir(sres) { var $ = cheerio.load(sres.text); var eachUrl; var eachTitle; var eachWriter; var eachSummary; var eachData; var eachLook; var singleContext = [];
$('.list-content').each(function() {
eachUrl = 'http://www.u148.net' + $(this).find('h1 a').attr('href');
eachTitle = $(this).find('h1 a').text();
eachWriter = $(this).find('.index-time a').text();
eachSummary = $(this).find(".summary").text();
eachData = $(this).find('.data-text').text().replace('推荐于:', '');
eachLook = $(this).find(".data-right").text();
singleContext.push(JSON.stringify({
"eachUrl": eachUrl,
"eachTitle": eachTitle,
"eachWriter": eachWriter,
"eachSummary": eachSummary,
"eachData": eachData,
"eachLook": eachLook
}));
})
fs.appendFile('u1483.txt', singleContext, 'utf8');
}
fetch();
可以参考下callback实现的
var fs = require('fs');
var path = require('path');
var slog = require('single-line-log').stdout;
var EventEmitter = require("events").EventEmitter;
var util = require("util");
var cheerio = require("cheerio");
var superagent = require("superagent");
const charset = require("superagent-charset");
charset(superagent);
function mkdir(sres, callback) {
var $ = cheerio.load(sres.text);
var eachUrl;
var eachTitle;
var eachWriter;
var eachSummary;
var eachData;
var eachLook;
var singleContext = [];
$('.list-content').each(function() {
eachUrl = 'http://www.u148.net' + $(this).find('h1 a').attr('href');
eachTitle = $(this).find('h1 a').text();
eachWriter = $(this).find('.index-time a').text();
eachSummary = $(this).find(".summary").text();
eachData = $(this).find('.data-text').text().replace('推荐于:', '');
eachLook = $(this).find(".data-right").text();
singleContext.push(JSON.stringify({
"eachUrl": eachUrl,
"eachTitle": eachTitle,
"eachWriter": eachWriter,
"eachSummary": eachSummary,
"eachData": eachData,
"eachLook": eachLook
}));
})
fs.appendFile('u1483.txt', singleContext, 'utf8', function(err) {
callback();
});
}
function ProgressBar(description, bar_length) {
this.description = description || 'Progress';
this.length = bar_length || 25;
this.render = function(opts) {
var percent = (opts.completed / opts.total).toFixed(4);
var cell_num = Math.floor(percent * this.length);
var cell = '';
for (var i = 0; i < cell_num; i++) cell += '█';
var empty = '';
for (var i = 0; i < this.length - cell_num; i++) empty += '░';
var cmdText = this.description + ': ' + (100 * percent).toFixed(2) + '% ' + cell + empty + ' ' + opts.completed + '/' + opts.total;
slog(cmdText);
};
}
function TaskManger(tasks, getTask, limit) {
if (!(this instanceof TaskManger))
return new TaskManger(tasks, getTask, limit);
EventEmitter.call(this);
this.tasks = tasks || [];
this.limit = limit || 2;
this.total = this.sum = tasks.length;
this.getTask = getTask || function(value, callback) {
callback();
}
}
TaskManger.prototype.complete = function() {
this.emit('complete');
}
TaskManger.prototype.progress = function() {
this.emit('progress', {
completed: this.total,
total: this.sum
});
}
TaskManger.prototype.start = function() {
var limit = Math.min(this.limit, this.tasks.length);
for (var i = 0; i < limit; i++) this.runTask();
return this;
}
TaskManger.prototype.finish = function(err) {
this.total--;
this.progress();
if (this.total == 0)
return this.complete();
this.runTask();
return this;
}
TaskManger.prototype.runTask = function() {
var task = this.tasks.shift();
if (task) this.getTask(task, this.finish.bind(this));
return this;
}
util.inherits(TaskManger, EventEmitter);
Array.prototype.taskstart = function(getTask, limit) {
var self = this;
var bar = new ProgressBar('爬取进度', 50);
return TaskManger(self, getTask, limit).on("progress", function(progress) {
bar.render(progress);
}).on("complete", function() {
console.log("finish");
}).start();
};
Array.range = function(start, end, step) {
var result = [],
step = step || 1;
for (let i = start; i < end; i += step) result.push(i);
return result;
}
Array.range(1, 1318).taskstart(function(value, callback) {
superagent.get('http://www.u148.net/list/' + value)
.charset('utf8')
.end(function(err, sres) {
if (err) {
callback();
return;
}
mkdir(sres, callback);
});
}, 20)
用这个 promise.map 包
const promise = pmap(arr, async item => {
//
}, 10) // 最后是 concurrency
其实和 async.parallelLimit 一样 可以使用 bluebird Promise.map, 不喜欢和 bluebird 绑死的用我写的这个 promise.map包即可
@MiYogurt @Sunshine168 function getPage(singeUrl) { return new Promise((resolve,reject)=>{ superagent.get(singeUrl) .set({‘Content-Type’: ‘application/json’, ‘User-Agent’: ‘Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.96 Safari/537.36’}) .charset(‘utf8’) .end(function (err, sres) { if (err) throw err; utils.singleRequest(sres,db) }); resolve(‘resolve==’+singeUrl) }) } let flag = true; pages.forEach((item) => { if(flag){ flag = false; let promises = item.map(function (url) { return getPage(url); }); Promise.all(promises).then(res => { flag = true; }) } }) 就按照您说的这种办法,每次取五个url,如果都成功,flag=false,但是事实确实flag永远等于false;;;是我哪里写错了么
const stream = [
new Promise((yes)=> setTimeout(() => yes(20), 2000)),
new Promise((yes)=> setTimeout(() => yes(30), 1000)),
new Promise((yes)=> setTimeout(() => yes(40), 2000)),
new Promise((yes)=> setTimeout(() => yes(50), 2000)),
new Promise((yes)=> setTimeout(() => yes(60), 2000)),
new Promise((yes)=> setTimeout(() => yes(70), 5000))
]
function run() {
let i = -1;
function dispatch(i){
index = i;
if(index >= stream.length){
return
}
return Promise.all(
[stream[i], stream[i+1]]
).then((arr) => {
console.log(arr)
console.log(i)
return dispatch(i+2)
})
}
return dispatch(0)
}
run()
const stream = [
new Promise((yes)=> setTimeout(() => yes(20), 5000)),
new Promise((yes)=> setTimeout(() => yes(30), 6000)),
new Promise((yes)=> setTimeout(() => yes(40), 2000)),
new Promise((yes)=> setTimeout(() => yes(50), 2000)),
new Promise((yes)=> setTimeout(() => yes(60), 4000)),
new Promise((yes)=> setTimeout(() => yes(70), 4000))
]
async function run() {
for (let i = 0 ;i < stream.length; i+=2){
console.log(i)
let arr = await Promise.all([stream[i], stream[i+1]]);
console.log(arr)
}
}
run()
通过事件分发也不错。
连个 markdown 都格式化不好,哎,算了。
@MiYogurt 谢谢小哥,,解决了
既然不想用类库, 那就自己写个队列控制并发
不想用库,那就把 https://github.com/sindresorhus/promise-fun 对应的方法,CTRL+C 过来呗 -.-!!