C#版本的网络爬虫感觉还是很好用的。
1 HttpHelper http = new HttpHelper(); 2 HttpItem item = new HttpItem() { URL = "http://www.cnblogs.com/danielWise/archive/2011/02/28/1966808.html" }; 3 HtmlDocument html = new HtmlDocument(); 4 string resultHtml = http.GetHtml(item).Html; 5 html.LoadHtml(resultHtml); 6 7 HtmlAgilityPack.HtmlNode htmlNode = html.DocumentNode; 8 //要取出的Xpath标签 9 HtmlAgilityPack.HtmlNode div = htmlNode.SelectSingleNode(".//*[@id="cnblogs_post_body"]"); 10 //Console.WriteLine(div.Elements("p").Count()); 11 foreach (var mydiv in div.Elements("p")) 12 { 13 Console.WriteLine(mydiv.InnerText); 14 } 15 Console.ReadKey();