1:引用模块"http" (执行命令node app.js "http://www.baidu.com")
运行下面代码
//app.js
var http = require('http'); var url = require('url'); function spider(u,cb){ http.get( url.parse(u), function(res){ var d = '' res.on('data',function(chunk){ d += chunk; }) res.on('end',function(){ console.log('spider_end && do cb'); cb(d); }) }); }; var u = ""; if( require.main === module ) { u = process.argv[2] }; spider(u,function( data ){
//这个返回的是网页内容的信息; console.log( data ); });
2:引用模块nodegrass: (执行命令node app.js "http://www.baidu.com")
运行下面代码
//app.js
var url = "http://www.cnblogs.com/xiaochao12345/archive/2014/10/23/4044950.html"; var ng = require('nodegrass'); if(require.main === module) { console.log( process.argv ); url = process.argv[2]; } ng.get(url,function(data){
//这个返回的是网页内容的信息;
console.log(data); },'utf8');
3:引用模块superagent: (执行命令node app.js "http://www.baidu.com")
运行下面代码
//app.js
var url = "http://www.cnblogs.com/xiaochao12345/archive/2014/10/23/4044950.html"; var superagent = require("superagent"); if(require.main === module) { console.log( process.argv ); url = process.argv[2]; }; superagent.get(url) .end(function (err, res) { console.log('fetch successful'); console.log(res); });
4:使用curl模块:(执行命令node app.js "http://www.baidu.com")
运行下面代码
//app.js
var curl = require("curl"); var u = ""; if( require.main === module ) { u = process.argv[2] }; curl.get(u,function( data ){ console.log( arguments ); });
______________________________________________________________________________________________________________
NODEJS中处理dom节点的方式(接口都和jq是统一的);
1:引用cheerio
运行下面代码
var cheerio = require("cheerio"); var html = "<html><body><div id=\"div1\">text</div></body></html>"; var $ = cheerio.load(html); console.log( "html" ); console.log( $.html() ); console.log( "#div1————〉html" ); console.log( $("#div1").html() ); console.log( "#div1----〉text" ); console.log( $("#div1").text() );
2:引用jquery
运行下面代码
var $ = require("jquery"); var $dom = $("<html><body><div id=\"div1\">text</div></body></html>"); console.log( $dom.find("#div1").text() );
3:引用jsdom
运行下面代码
var jsdom = require('jsdom'); var curl = require("curl"); var u = "https://github.com"; if( require.main === module ) { u = process.argv[2]; }; curl.get(u,function( arg0 , html ){ //jsdom相当于打开了一个页面,在这个页面里面运行js; var document = jsdom.jsdom( html.body ); for(var a in html) console.log(a); var window = document.createWindow(); var script = document.createElement('script'); //script.src = 'http://code.jquery.com/jquery-1.4.2.js'; script.src = "http://127.0.0.1:81/js/jquery.min.js"; script.onload = function(){ console.log(1); console.log( window.jQuery("body").text() ); }; document.head.appendChild( script ); });
_________________________________________________________________________________________________________________
文件保存的话直接用nodeJS中的fs模块:
运行下面代码
var fs = require("fs"); fs.appendFile('file-name', "text_text_text_text" ,function(err){ if(err)throw err; console.log('done') });