Skip to content

Node+Cheerio+Request的简单爬虫。很早之前写的代码,如果你会了Node爬虫,那么Python爬虫会更加简单。😃

javascript
const http = require('http')
const request = require('request')
const cheerio = require('cheerio')
const proxyList = require('./proxylist')
 
const LOG = console.info.bind(console)

const GLOBAL = {
  second: 1000,
  minute: 60000,
  hour: 3600000,
  day: 86400000,
  week: 604800000
}
 
const timer = {
  second: (s) => {
    return GLOBAL.second * s
  },
  minute: (m) => {
    return GLOBAL.minute * m
  },
  hour: (h) => {
    return GLOBAL.hour * h
  },
  day: (d) => {
    return GLOBAL.day * d
  }
}


const _requestStart = (options) => {
  http.get(options, (resp) => {
    let html = ''
    resp.on('data', (chunk) => { html += chunk })
    resp.on('end', () => {
      let $ = cheerio.load(html)
      let titles = $('.title a')
      titles.each((i, e) => { log($(e).attr('title')) })
    })
  }).on('error', (error) => {
    LOG(error)
  })
}

const targetHost = 'http://www.wic.edu.cn'

const options = {
  hostname: targetHost,
  proxy: proxyList.GetProxy(),
  headers: {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0'
  }
}

setInterval(() => { _requestStart(options)}, timer.second(1))

Powered by VitePress.