• realestate.cei.gov.cn


    using AnfleCrawler.Common;
    using System;
    using System.Collections.Concurrent;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    
    namespace AnfleCrawler.DataAnalyzer
    {
        public class ManualAnalyzer : AnalyzerBase
        {
            private ConcurrentQueue<string[]> _dict = new ConcurrentQueue<string[]>();
    
            public override void Init(PageCrawler crawler)
            {
                crawler.Lander.Idle += Lander_Idle;
                base.Init(crawler);
    
                var url = new Uri("http://www.realestate.cei.gov.cn/traden/br2.aspx?rq=20140601&lx=w6&r1=20140830");
                var dom = Crawler.Lander.GetDocument(new PageContentHandler() { Url = url });
                foreach (var node in QueryNodes(dom.DocumentNode, "#qrq option"))
                {
                    string val = node.GetAttributeValue("value", string.Empty);
                    Crawler.PushUrl(new Uri(string.Format("http://www.realestate.cei.gov.cn/traden/br2.aspx?rq={0}&lx=w6&r1=20140830", val)), 1);
                }
            }
            void Lander_Idle(object sender, EventArgs e)
            {
                Crawler.OutWrite("Start step2...");
                App.LogInfo("Start step2...");
                using (var writer = new System.IO.StreamWriter(@"D:outdict.txt", false, Encoding.UTF8))
                {
                    foreach (var set in _dict)
                    {
                        writer.WriteLine(string.Join(",", set));
                    }
                }
            }
    
            protected override void AnalyzeInternal(PageLandEntity current)
            {
                Crawler.OutWrite("*Start step1...");
                var lander = Crawler.Lander;
                var pHandler = CreateContentHandler(current);
                switch (current.Depth)
                {
                    case 1:
                        {
                            var query = System.Web.HttpUtility.ParseQueryString(current.Url.Query);
                            var dt = DateTime.ParseExact(query["rq"], "yyyyMMdd", null);
                            var dom = lander.GetDocument(pHandler);
    
                            var checkNode = QueryNode(dom.DocumentNode, "#str1");
                            if (string.IsNullOrWhiteSpace(checkNode.InnerText))
                            {
                                return;
                            }
                            checkNode.InnerHtml = checkNode.InnerHtml.Replace("<tr", "</tr><tr").Substring(5);
                            App.LogInfo("WTF CN:{0}", checkNode.InnerHtml);
                            var set = QueryNodes(checkNode, "tr");
                            foreach (var node in set)
                            {
                                var x = new List<string>();
                                x.Add(dt.ToString("yyyy-MM-dd"));
                                x.AddRange(QueryTexts(node, "td"));
                                _dict.Enqueue(x.ToArray());
                            }
                            _dict.Enqueue(new string[] { Environment.NewLine });
                            Crawler.OutWrite("#Stop step1 {0} {1}", dt.ToShortDateString(), set.Count());
                        }
                        break;
                }
            }
        }
    }
  • 相关阅读:
    pytest临时文件
    djangoclassmeta说明
    python方法
    字符串统计个数2
    python之fixture作用域
    djangomodel在已有model同步添加新的字段
    将 Access 数据库 转换到Sql Server 中
    .net 实现条码
    条码打印异步调用
    日期控件datepicker 只能选指定段日期案例
  • 原文地址:https://www.cnblogs.com/Googler/p/4110974.html
Copyright © 2020-2023  润新知