using AnfleCrawler.Common; using Newtonsoft.Json.Linq; using System; using System.Collections.Generic; using System.Linq; using System.Net; using System.Text; using System.Threading.Tasks; namespace AnfleCrawler.DataAnalyzer { internal class GFT_News : AnalyzerBase { public override void Init(PageCrawler crawler) { // var pHandler = new PageContentHandler() // { // Url = new Uri("http://admin.gofangtong.com/Login.aspx"), // CrossLoad = (arg, xDom) => // { // if (arg.IsRedirect) // { // arg.IsRedirect = false; // return; // } // arg.IsRedirect = true; // var input = xDom.GetElementById("txtusername"); // input.SetAttribute("value", "admin"); // input = xDom.GetElementById("txtuserpass"); // input.SetAttribute("value", "123456"); // var btn = xDom.GetElementById("btnlogin"); // btn.InvokeMember("click"); // } // }; // Crawler.Lander.GetDocument(pHandler); //crawler.PushUrl(new System.Net.StringPatternGenerator("http://webapi.anfle.com/BMLF/BmlfList?page=[1-75]&rows=50&sort=PublishDate&order=desc"), 0); base.Init(crawler); } protected override void AnalyzeInternal(PageLandEntity current) { var lander = Crawler.Lander; var pHandler = CreateContentHandler(current); var dom = lander.GetDocument(pHandler); string text = dom.DocumentNode.InnerText; //App.LogInfo("GFT: {0}", text); var json = JObject.Parse(text); var client = new HttpClient(); client.SendReceiveTimeout = int.MaxValue; foreach (var item in json["rows"]) { try { string content = System.Web.HttpUtility.UrlEncode(item.Value<string>("CONTENT")); client.SetRequest(new Uri("http://webapi.anfle.com/BMLF/Match")); client.Form["content"] = content; var matchtext = client.GetResponse().GetResponseText(); var mjson = JObject.Parse(matchtext); var mset = (JArray)mjson["Message"]; var str = new StringBuilder(); var configs = new KeyValuePair<string, string>[] { new KeyValuePair<string, string>("CITY_NAME","城市"), new KeyValuePair<string, string>("STANDARD_REGION","区域"), new KeyValuePair<string, string>("SHORT_NAME_CHS","企业"), new KeyValuePair<string, string>("XIANGMUMINGCHENG","项目"), }; for (int i = 0; i < mset.Count; i++) { if (mset[i] == null) { continue; } var c = configs[i]; var thenSet = mset[i].Select(p => { string ext = string.Empty; if (i == 1 && p["CITY_NAME"] != null) { ext = p.Value<string>("CITY_NAME") + "-"; } return ext + p.Value<string>(c.Key) + "|" + c.Value; }); if (!thenSet.Any()) { continue; } str.Append(",").Append(string.Join(",", thenSet)); } string rowid = item.Value<string>("ROWID"); if (str.Length == 0) { Crawler.OutWrite("Skip Empty {0}", rowid); continue; } client.SetRequest(new Uri("http://webapi.anfle.com/BMLF/Save")); client.Form["isModify"] = "true"; client.Form["rowId"] = rowid; client.Form["val"] = str.ToString(); client.Form["kind"] = item.Value<string>("KIND"); client.Form["title"] = item.Value<string>("TITLE"); client.Form["source"] = item.Value<string>("SOURCE"); client.Form["date"] = item.Value<string>("PUBLISHDATE"); //client.Form["content"] = content; client.Form["state"] = Convert.ToInt16(Convert.ToBoolean(item.Value<string>("STATE"))).ToString(); string ret = client.GetResponse().GetResponseText(); Crawler.OutWrite("Match OK {0} {1}", rowid, ret); } catch (Exception ex) { Crawler.OutWrite("Error {0}", ex.Message); App.LogError(ex, "GFT"); } } } } }