主题
Node+Cheerio+Request的简单爬虫。很早之前写的代码,如果你会了Node爬虫,那么Python爬虫会更加简单。😃
javascript
const http = require('http')
const request = require('request')
const cheerio = require('cheerio')
const proxyList = require('./proxylist')
const LOG = console.info.bind(console)
const GLOBAL = {
second: 1000,
minute: 60000,
hour: 3600000,
day: 86400000,
week: 604800000
}
const timer = {
second: (s) => {
return GLOBAL.second * s
},
minute: (m) => {
return GLOBAL.minute * m
},
hour: (h) => {
return GLOBAL.hour * h
},
day: (d) => {
return GLOBAL.day * d
}
}
const _requestStart = (options) => {
http.get(options, (resp) => {
let html = ''
resp.on('data', (chunk) => { html += chunk })
resp.on('end', () => {
let $ = cheerio.load(html)
let titles = $('.title a')
titles.each((i, e) => { log($(e).attr('title')) })
})
}).on('error', (error) => {
LOG(error)
})
}
const targetHost = 'http://www.wic.edu.cn'
const options = {
hostname: targetHost,
proxy: proxyList.GetProxy(),
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0'
}
}
setInterval(() => { _requestStart(options)}, timer.second(1))