记一次Node爬取古诗文网 数据#
- 搭建环境
- 使用的是node的koa框架,使用koa的脚手架工具在本地运行起来
<pre> const Koa = require(“koa”); const app = new Koa(); const static = require(“koa-static”); const path = require(“path”); const cors = require(“koa-cors”); const bodyParser = require(‘koa-bodyparser’); var history = require(‘connect-history-api-fallback’);
const fs = require(‘fs’);
var request = require(‘request’);
const session = require(‘koa-session’);
app.keys = [‘some secret hurr’];
//mongoose数据库
const mongoose = require(“mongoose”); mongoose.connect(‘mongodb://...:27017/koa-mongo’); var db = mongoose.connection; db.on(‘error’, console.error.bind(console, ‘connection error:’)); db.once(‘open’, function() { console.log(“connnected!!!”); });
// app.use(enforceHttps()); var options = { key: fs.readFileSync(‘guhan.site.key’), cert: fs.readFileSync(‘guhan.site.crt’) };
//静态目录 const staticPath = “./static”;
app.use(static( path.join(__dirname,staticPath) )); app.use(session(CONFIG, app)); app.use(bodyParser());//数据JSON类型
//跨域资源共享 app.use(cors({ origin:function(ctx){ // if(ctx.header.origin.indexOf(“localhost”)>-1){ // return ctx.header.origin // }else{ // return '’ // } return "" }, exposeHeaders: [‘WWW-Authenticate’, ‘Server-Authorization’, ‘Date’], maxAge: 100, credentials: true, allowMethods: [‘GET’, ‘POST’, ‘OPTIONS’], allowHeaders: [‘Content-Type’, ‘Authorization’, ‘Accept’, ‘X-Custom-Header’, ‘anonymous’], }));
// 递归路由开始 var rootRoute = path.join(__dirname,’/router/’); function utilRouter(rootRoute){ var file = fs.readdirSync(rootRoute); for (var i = 0; i < file.length; i++) { var item = file[i]; var routerPath = path.join(rootRoute,item); var stats = fs.statSync(routerPath); if(stats.isDirectory()){ utilRouter(routerPath) }else{ app.use(require(routerPath).routes()) .use(require(routerPath).allowedMethods()) } } }
utilRouter(rootRoute);
app.listen(80)
</pre>
- 下载需要的npm包
-
superagent和cheerio
npm install cheerio superagent --save-dev
-
superagent是nodejs里一个非常方便的客户端请求代理模块(类似python之中的request模块)当你想处理get,post,put,delete,head请求时,你就应该想起该用它了。详细使用可以查看superagent;
<pre> const superagent = require(‘superagent’);
// callback superagent .post(’/api/pet’) .send({ name: ‘Manny’, species: ‘cat’ }) // sends a JSON post body .set(‘X-API-Key’, ‘foobar’) .set(‘accept’, ‘json’) .end((err, res) => { // Calling the end function will send the request });
// promise with then/catch superagent.post(’/api/pet’).then(console.log).catch(console.error);
// promise with async/await (async () => { try { const res = await superagent.post(’/api/pet’); console.log(res); } catch (err) { console.error(err); } })(); </pre>
-
cheerio是一个类似Jquery的框架,更多使用查看cheerio
let $ = cheerio.load('<ul id="fruits">...</ul>');
移动地址:演示