目录结构
文件内容
安装依赖
cnpm install cheerio --save
service/spider.js
'use strict';
const Service = require('egg').Service;
class SpiderService extends Service {
async requestUrl(url) {
var result = await this.ctx.curl(url);
return result;
}
}
module.exports = SpiderService;
schedule/watchdomain.js
var cheerio = require('cheerio')
module.exports = (app) => {
return {
schedule:{
interval:'10s',
type:'all'
},
async task(ctx){
var url = 'https://news.baidu.com/';
var result = await ctx.service.spider.requestUrl(url);
//buff数据转为utf8
var htmlData = result.data.toString();
// 乱码转为utf8
const $ = cheerio.load(htmlData,{decodeEntities:false})
// 拿到网站标题
var title = $('title').html();
if(title != '百度新闻——全球最大的中文新闻平台'){
console.log("网站被修改了")
}else{
console.log("正常")
}
//根据class拿到数据
$('.hotnews a').each(function(){
console.log($(this).html())
})
}
}
}