using AnfleCrawler.Common; using HtmlClient; using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace AnfleCrawler.DataAnalyzer { internal class BMap : AnalyzerBase { public override void Init(PageCrawler crawler) { base.Init(crawler); crawler.PushUrl(new Uri("http://map.baidu.com/"), 0); } protected override void AnalyzeInternal(PageLandEntity current) { using (var client = new CrossClient()) { var content = new System.Net.HttpRequestContent(); content.Headers[System.Net.HttpRequestHeader.UserAgent] = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36"; //ScriptingContext.FillAjaxBlock(content.Form, new AjaxBlockEntity[] //{ // new AjaxBlockEntity() // { // ID="", // } //}); client.Navigate(current.Url, content); client.CurrentInvoke(@" (function () { $('#PoiSearch').val('青浦 企业'); $('#poiSearchBtn').click(); })();"); System.Threading.Thread.Sleep(8000); using (var writer = new System.IO.StreamWriter(App.CombinePath("out.txt"), true, Encoding.UTF8)) { start: string html = client.CurrentGetHtml(); var dom = new HtmlAgilityPack.HtmlDocument(); dom.LoadHtml(html); try { foreach (var tr in QueryNodes(dom.DocumentNode, "#POI_TABLE_LST tr[id]")) { string title = QueryTexts(tr, ".poiTitleW").First(); string addr = QueryTexts(tr, ".n_p_lineheight").First(); string attrVal = QueryAttrs(tr, ".p_left a:first-child", "onclick").First(); attrVal = attrVal.Split(';')[2]; int s = attrVal.IndexOf("{"), e = attrVal.LastIndexOf("}"); string sJson = attrVal.Substring(s, e - s + 1).Replace("<br/>", "|"); var json = Newtonsoft.Json.Linq.JObject.Parse(sJson); writer.Write("{0},{1},{2}", json.Value<string>("title"), json.Value<string>("content"), json.Value<string>("point")); writer.WriteLine(); writer.Flush(); //SyncMgr.goFav({'point':'13494514.14|3640372','uid':'ca38818c8f551ef049fd3f9b','cityCode':'289','title':'新朋联众汽车零部件公司','content':'地址:华卫路29<br/>','panoGuid':''}); Crawler.OutWrite(json.Value<string>("title")); } } catch (HtmlNodeMissingException ex) { App.LogInfo("HtmlNodeMissing:{0} {1}", ex.Selector, html); Crawler.OutWrite("HtmlNodeMissing:{0}", ex.Selector); goto start; } client.CurrentInvoke(@" (function () { $('#result_page_c a:last').click(); })();"); System.Threading.Thread.Sleep(4000); goto start; } } } } }