仅做了必要的注释,我太懒了。目前只是一个雏形,实现基本的需求。有时间会修改的完善一些。
/* * @Author: jiahaiLiu * @Date: 2017-07-17 10:44:03 * @Last Modified by: jiahaiLiu * @Last Modified time: 2017-07-17 18:53:48 * @Usage: node crawler [100] */ 'use strict'; /* * Request is designed to be the simplest way possible to make http calls. * It supports HTTPS and follows redirects by default. */ const request = require('request'); // cheerio是为服务器特别定制的,快速、灵活、实施的jQuery核心实现. const cheerio = require('cheerio'); /* * Async is a utility module which provides straight-forward, * powerful functions for working with asynchronous JavaScript. */ const async = require('async'); const path = require('path'); const fs = require('fs'); const url = require('url'); // 自定义下载图片数量,默认为100 let targetAmount = process.argv.splice(2)[0] || 100; /*let getLink = 'http://image.so.com/j?q=%E7%BE%8E%E5%A5%B3&src=srp&correct=%E7%BE%8E%E5%A5%B3&sn=61&pn=60&sid=7e73fad3c0eb8367ede610dcf2784c0e&ran=0&ras=0';*/ // 定义存储图片的文件夹名称 let collect_pic_dir = './collect_pic/'; let imgList = []; // 图片链接集合 let dest, start = 0; let t1 = new Date().getTime(); let urlObj = { protocol: 'http:', slashes: true, auth: null, host: 'image.so.com', port: null, hostname: 'image.so.com', hash: null, query: { q: '美女', src: 'srp', correct: '美女', sn: '0', pn: '60', sid: '7e73fad3c0eb8367ede610dcf2784c0e', ran: '0', ras: '0' }, pathname: '/j', }; let urlLink, resObj; if (!fs.existsSync(collect_pic_dir)) { fs.mkdirSync(collect_pic_dir); console.log('The ' + collect_pic_dir + ' folder has been created!'); } loop(asyncDownload); function loop(cb) { urlLink = url.format(urlObj); console.log(urlLink); request(urlLink, function(err, res, body) { if (!err && res.statusCode === 200) { /*const $ = cheerio.load(body); JSON.parse($('script[id="initData"]').html()).list.forEach(function(item) { imgList.push(item.img) });*/ resObj = JSON.parse(res.body); /* resObj example { total: 1500, end: false, sid: "6b57a007f19740b44d562f6e0ec6e050", ran: 0, ras: 0, lastindex: 121, ceg: 181011782, list: [{ id: "7697671c2932936c55a39fd2e4d30ceb", qqface_down_url: false, downurl: false, grpmd5: false, type: 0, src: "1", index: 61, title: "<em>美女</em>诱惑_peaceful", litetitle: "", "1000", height: "1504", imgsize: "225KB", imgtype: "JPEG", key: "7913541bc5", dspurl: "blog.sina.com.cn", link: "http://blog.sina.com.cn/s/blog_a5bc8202010109ta.html", source: 2, img: "http://img165.poco.cn/mypoco/myphoto/20111030/05/54704062201110300502223689419360167_010.jpg", thumb_bak: "http://p0.so.qhmsg.com/t01da6596eb67097425.jpg", thumb: "http://p0.so.qhmsg.com/t01da6596eb67097425.jpg", _thumb_bak: "http://p0.so.qhmsg.com/sdr/_240_/t01da6596eb67097425.jpg", _thumb: "http://p0.so.qhmsg.com/sdr/_240_/t01da6596eb67097425.jpg", thumbWidth: 160, dsptime: "", thumbHeight: 240, grpcnt: "8", fixedSize: false }], boxresult: null, wordguess: null } */ resObj.list.forEach(function(item) { imgList.push(item.img); }); if (imgList.length >= targetAmount) { cb(); } else { if (!resObj.end) { urlObj.query.sn = resObj.lastindex + 1; urlObj.query.sid = resObj.sid; loop(cb); } else { console.log('no more datas from source url'); } } } }); } // download picture function asyncDownload() { console.log('图片总数:', imgList.length); async.mapSeries(imgList, function(item, callback) { setTimeout(function() { downloadPic(item, collect_pic_dir + start + '.jpg'); callback(null, item); start++; }, 400); }, function(err, results) { let t2 = new Date().getTime(); console.log('全部完成,总耗时:', (t2 - t1) + 'ms'; }); } function downloadPic(src, dest) { request .get(src) .on('response', function(response) { // console.log (response); // console.log(response.statusCode) // 200 // console.log(response.headers['content-type']) // 'image/png' }) .on('error', function(err) { console.log(err) }) .pipe(fs.createWriteStream(dest)); }