• nodeJS


    ]

    var http = require('http');
    var url=require('url');
    var request = require('request');
    var fs = require('fs');
    var jsdom = require('jsdom');
    
    function spiderUrl(nPage, opt, fnSpiderData)
    {   //通过http.get获取网页里面的东西
        var req = http.get( url.parse('http://www.baidu.com/s?tn=baiduhome_pg&ie=utf-8&bs=%E7%BD%91%E9%A1%B5%E4%B8%AD%E9%80%89%E4%B8%AD+%E4%BA%AE%E5%BA%A6&f=8&rsv_bp=1&rsv_spt=1&wd=%E7%BD%91%E9%A1%B5%E4%B8%AD%E9%80%89%E4%B8%AD+%E9%80%89%E6%8B%A9%E9%A2%9C%E8%89%B2&rsv_sug3=5&rsv_sug=1&rsv_sug1=4&rsv_sug4=78&inputT=4594'), function(res)
        {
    
            var g_data="";
            res.on('data', function (chunk)
            {
                g_data+=chunk;
            });
    
            res.on('end', function()
            {
                console.log("do page " + nPage);
                /*
                fs.appendFile('nodeBaidu.html', g_data, function (err) {
                    if (err) throw err;
                    console.log('The "data to append" was appended to file!');
                });
                */
                var document = jsdom.jsdom(g_data);
                var script = document.createElement("script");
                var window = document.createWindow();
                script.src = 'http://code.jquery.com/jquery-1.4.2.js';
                //如果页面载入完毕,就可以用jq的each和查询器了
                script.onload = function() {
                    window.jQuery('table').each(function(e){
    
                        //可以将文件保存到本地,名字自己命名吧,
                        fs.appendFile('result.txt', '1',function(r){
                            if(r){console.log('error')};
                            console.log('appendFile success');
                        })
                    });
                };
                document.head.appendChild(script);
                //fnSpiderData(g_data, nPage);
            });
        });
    
        req.on('error', function(e)
        {
            console.log('problem with request ' + opt.path + ' : ' + e.message);
        });
    
        req.end();
    };
    spiderUrl(1,{},function(data,page){console.log(data)})

    //抓博客园的东东

    var http = require('http');
    var fs = require('fs');
    var url = require('url');
    var jsdom = require('jsdom');
    
    function spider(u,cb){
        http.get( url.parse(u), function(res){
            var d = ''
            res.on('data',function(chunk){
                d += chunk;
            })
            res.on('end',function(){
                console.log('spider_end && do cb');
                cb(d);
            })
        });
    };
    spider('http://www.cnblogs.com/cate/108703/',function(data){
        //如果页面载入完毕,就可以用jq的each和查询器了
        var document = jsdom.jsdom( data );
    
        var window = document.createWindow();
        var script = document.createElement('script');
    
        script.src = 'http://code.jquery.com/jquery-1.4.2.js';
        script.onload = function(){
            window.jQuery('.post_item_summary').each(function(i,e){
                //console.log(1)
                //console.log(e.innerHTML )
                fs.appendFile('blogscn.html', e.innerHTML + '<br>' ,function(err){
                    if(err)throw err;
                    console.log('done')
                })
            });
        };
    
        document.head.appendChild( script );
    })

    //直接 npm install jquery 然后使用下面的命令,更快了

    var $ = require('jquery');
    var fs = require('fs');
    $.get('http://www.qiushibaike.com/8hr',function(html){
        $(html).find('.block').each(function(i,e){
                writeToFile( $(e).find('.content').text() );
        });
    });
    function writeToFile(data){
        fs.appendFile('qiubai.txt',data,function(err){
            if(err){
                if(err)throw err;
            }
            console.log('done')
        })
    }
  • 相关阅读:
    NGINX location 在配置中的优先级
    CentOS 系统启动流程
    微软输入法正则bug
    uniapp改变页面背景色
    路由Router
    vue-cli3替换默认的title和图标(区别脚手架vue-cli2版本)
    浏览器报错 Refused to apply style from 'http://******' because its MIME type ('text/html') is not a supported stylesheet MIME type, and strict MIME checking is enabled.
    禁止浏览器后退
    前端构建工具(webpack-gulp-grunt-rollup...)
    鼠标经过小箭头(状态伪类需求)
  • 原文地址:https://www.cnblogs.com/diligenceday/p/3477167.html
Copyright © 2020-2023  润新知