编写爬虫示例:
var http = require('http'); var cheerio = require('cheerio'); var url = 'http://www.cnblogs.com/tianxintian22/'; function filterblogs(html) { var $ = cheerio.load(html); var blogs = $('.day'); // [{ // dayTitle: '', // dayCont: { // postId: '', // postTitle: '', // postCont: '' // } // }] var blogDatas = []; blogs.each(function(item) { var blog = $(this); var dayTitle = blog.find('.dayTitle a').text(); var blogData = { dayTitle: dayTitle, dayCont: [] }; var postId = blog.find('.postCon .c_b_p_desc a').attr('href').split('p/')[1].replace('.html', ''); var postTitle = blog.find('.postTitle a').text(); var postCont = blog.find('.postCon .c_b_p_desc').text(); blogData.dayCont.push({ postId: postId, postTitle: postTitle, postCont: postCont }); blogDatas.push(blogData); }) return blogDatas; } function printBlogInfo(blogDatas) { blogDatas.forEach(function(item) { var dayTitle = item.dayTitle; console.log(dayTitle + ' '); item.dayCont.forEach(function(blog){ console.log(' 【' + blog.postId + '】' + blog.postTitle +' '); console.log(' ' + blog.postCont + ' '); }); }) } http.get(url, function (res) { var html = ''; res.on('data', function(data) { html += data; }); res.on('end', function() { var blogDatas = filterblogs(html); printBlogInfo(blogDatas); }); }).on('error', function() { console.log('获取博客数据出错'); })