• xpath 参考


    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Web;
    using System.Text.RegularExpressions;
    using System.Configuration;
    
    /// <summary>
    ////// </summary>
    public static class SearchConst
    {
    
        public static readonly string ARG_CLIENT = "client";
    
        public static readonly string ARG_WORD = "word";
    
        public static readonly int DataColumnCount = 4;
    
        public static readonly int ColumnOfUrl = 0;
    
        public static readonly int ColumnOfTitle = 1;
    
        public static readonly int ColumnOfInfo = 2;
    
        public static readonly int ColumnOfAdUrl = 3;
    
        public static readonly string FMT_Date = "yyyy/MM/dd";
    
        public static readonly string FMT_TIME = "HH:mm:ss";
    
        public static readonly string UserAgentPC = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:11.0) Gecko/20100101 Firefox/11.0";
    
        public static readonly string UserAgentMobile = "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A403 Safari/8536.25";
    
        public static readonly string SearchKeyWordPlace = "#{q}";
    
        public static readonly string DefaultEncode = "UTF-8";
    
        public static readonly string AttributeHref = "href";
    
        public static readonly string FILEEXT_ZIP = ".zip";
    
        public static readonly string FILE_TXT = "source.txt";
    
        public static readonly string FILE_KEY = "SavePath";
    
        public static readonly string BATCH_PARALLES_KEY = "BatchParalles";
    
        public static readonly string FLG_ENABLED = "1";
    
        public static readonly string CLIENT_MONITOR = "BJMOR";
    
        public static readonly string MSG_E_PAGE_STYLE_CHANGE = "fff";
    
    
    
        public static class Google
        {
    
            public static readonly string UserAgent = UserAgentPC;
    
            public static readonly string[] XPATH_ROOT = { "mbEnd", "mbEnd" };
            public static readonly string[] XPATH_CITE = { "//div[@id='mbEnd']//ol/li//cite", "//div[@id='mbEnd']//ol/li//cite" };  //获取url
            public static readonly string[] XPATH_H3 = { "//div[@id='mbEnd']//ol/li//h3", "//div[@id='mbEnd']//ol/li/h3" }; //  //获取标题
            public static readonly string[] XPATH_ADURL = { "//div[@id='mbEnd']//ol/li//h3//a[1]", "//div[@id='mbEnd']//ol/li/h3//a[1]" }; 
            public static readonly string[] XPATH_INFO = { "//div[@id='mbEnd']//ol/li//div[@class='ac ads-creative']", "//div[@id='mbEnd']//ol/li//div[@class='ads-creative']" };
            // top info
            public static readonly string[] XPATH_ROOT_TOP = { "taw", "taw" };
            public static readonly string[] XPATH_CITE_TOP = { "//div[@id='tads']//ol/li//cite", "//div[@id='tads']//ol/li//cite" };
            public static readonly string[] XPATH_H3_TOP = { "//div[@id='tads']//ol/li//h3", "//div[@id='tads']//ol/li/h3" };
            public static readonly string[] XPATH_ADURL_TOP = { "//div[@id='tads']//ol/li//h3//a[1]", "//div[@id='tads']//ol/li/h3//a[1]" };
            public static readonly string[] XPATH_INFO_TOP = { "//div[@id='tads']//ol/li//div[@class='ac ads-creative']", "//div[@id='tads']//ol/li//div[@class='ads-creative']" };
            //
            public static readonly Regex RegexAdUrl = new Regex(@"adurl=(http[S]*$)");
            //
            public static readonly string BAITAI_ID = "001";
        }
    
        public static class GoogleM
        {
            public static readonly string UserAgent = UserAgentMobile;
    
            //info
            public static readonly string[] XPATH_ROOT = { "bottomads", "bottomads" };
            public static readonly string[] XPATH_CITE = { "//div[@id='tadsb']/ol/li//cite", "//div[@id='tadsb']/ol/li//cite" };
            public static readonly string[] XPATH_H3 = { "//div[@id='tadsb']/ol/li//h3", "//div[@id='tadsb']/ol/li//h3" };
            public static readonly string[] XPATH_ADURL = { "//div[@id='tadsb']/ol/li//h3//a", "//div[@id='tadsb']/ol/li//h3//a" };
            public static readonly string[] XPATH_INFO = { "//div[@id='tadsb']/ol/li//div[@class='ac ads-creative']", "//div[@id='tadsb']/ol/li//div[@class='ads-creative']" };
    
            // top info
            public static readonly string[] XPATH_ROOT_TOP = { "tads", "tads" };
            public static readonly string[] XPATH_CITE_TOP = { "//div[@id='tads']/ol/li//cite", "//div[@id='tads']/ol/li//cite" };
            public static readonly string[] XPATH_H3_TOP = { "//div[@id='tads']/ol/li//h3", "//div[@id='tads']/ol/li//h3" };
            public static readonly string[] XPATH_ADURL_TOP = { "//div[@id='tads']/ol/li//h3//a", "//div[@id='tads']/ol/li//h3//a" };
            public static readonly string[] XPATH_INFO_TOP = { "//div[@id='tads']/ol/li//div[@class='ac ads-creative']", "//div[@id='tads']/ol/li//div[@class='ads-creative']" };
            //
            public static readonly Regex RegexAdUrl = new Regex(@"adurl=(http[S]*$)");
            //
            public static readonly string BAITAI_ID = "005";
        }
    
        public static class MSN
        {
            public static readonly string UserAgent = UserAgentPC;
            //b_context/b_ad
            public static readonly string[] XPATH_ROOT = { "sidebar", "b_context" };
            public static readonly string[] XPATH_CITE = { "//div[@class='sb_adsNv2']//li//cite", "//ol[@id='b_context']//li[@class='b_ad']//li//cite" };
            public static readonly string[] XPATH_H3 = { "//div[@class='sb_adsNv2']//li//h3", "//ol[@id='b_context']//li[@class='b_ad']//li//h2" };
            public static readonly string[] XPATH_ADURL = { "//div[@class='sb_adsNv2']//li//a", "//ol[@id='b_context']//li[@class='b_ad']//li//a" };
            public static readonly string[] XPATH_INFO = { "//div[@class='sb_adsNv2']//li//p", "//ol[@id='b_context']//li[@class='b_ad']//li//p" };
            //b_results/b_ad
            public static readonly string[] XPATH_ROOT_TOP = { "results_container", "b_results" };
            public static readonly string[] XPATH_CITE_TOP = { "//div[@class='sb_adsWv2']//li//cite", "//ol[@id='b_results']//li[@class='b_ad']//li//cite" };
            public static readonly string[] XPATH_H3_TOP = { "//div[@class='sb_adsWv2']//li//h3", "//ol[@id='b_results']//li[@class='b_ad']//li//h2" };
            public static readonly string[] XPATH_ADURL_TOP = { "//div[@class='sb_adsWv2']//li//a", "//ol[@id='b_results']//li[@class='b_ad']//li//a" };
            public static readonly string[] XPATH_INFO_TOP = { "//div[@class='sb_adsWv2']//li//p", "//ol[@id='b_results']//li[@class='b_ad']//li//p" };
            //
            public static readonly Regex RegexAdUrl = new Regex(@"**(http[S]*$)");
            //
            public static readonly string BAITAI_ID = "003";
        }
    
        public static class Yahoo
        {
            public static readonly string UserAgent = UserAgentPC;
    
            public static readonly string XPATH_ROOT = "sIn";
            public static readonly string XPATH_CITE1 = "//div[@id='So3']/div[@class='bd']/div[@class='w']/div[@class='a cf']";
            public static readonly string XPATH_H31 = "//div[@id='So3']/div[@class='bd']/div[@class='w']/h3";
            public static readonly string XPATH_ADURL1 = "//div[@id='So3']/div[@class='bd']/div[@class='w']/h3/a";
            public static readonly string XPATH_INFO1 = "//div[@id='So3']/div[@class='bd']/div[@class='w']/p";
            //
            public static readonly string XPATH_ROOT_TOP = "So1";
            public static readonly string XPATH_CITE_TOP = "//div[@id='So1']/div[@class='bd']/div[@class='w']/div[@class='a cf']";
            public static readonly string XPATH_H3_TOP = "//div[@id='So1']/div[@class='bd']/div[@class='w']/h3";
            public static readonly string XPATH_ADURL_TOP = "//div[@id='So1']/div[@class='bd']/div[@class='w']/h3/a";
            public static readonly string XPATH_INFO_TOP = "//div[@id='So1']/div[@class='bd']/div[@class='w']/p";
            //
            public static readonly Regex RegexAdUrl = new Regex(@"**(http[S]*$)");
            public static readonly string NullUrl = "&gt;";
            //
            public static readonly string BAITAI_ID = "002";
        }
    
        public static class Yahoo2
        {
            public static readonly string UserAgent = UserAgentPC;
    
            public static readonly string XPATH_ROOT_TOP = "contents";
            public static readonly string XPATH_CITE_TOP = "//div[@id='contents']/div[@class='cWrap']/div[@class='listWrap cf']/ul/li/cite";
            public static readonly string XPATH_H3_TOP = "//div[@id='contents']/div[@class='cWrap']/div[@class='listWrap cf']/ul/li/h2/a";
            public static readonly string XPATH_ADURL_TOP = "//div[@id='contents']/div[@class='cWrap']/div[@class='listWrap cf']/ul/li/h2/a";
            public static readonly string XPATH_INFO_TOP = "//div[@id='contents']/div[@class='cWrap']/div[@class='listWrap cf']/ul/li/p[@class='smr']";
            //
            public static readonly Regex RegexAdUrl = new Regex(@"**(http[S]*$)");
            public static readonly string NullUrl = "&gt;";
            //
            public static readonly string BAITAI_ID = "004";
        }
    
        public static class YahooM
        {
            public static readonly string UserAgent = UserAgentMobile;
    
            public static readonly string XPATH_ROOT = "contentsInner";
            public static readonly string XPATH_CITE = "//div[@id='contentsInner']//aside[@class='So']/div[@class='bd']/ul/li/cite";
            public static readonly string XPATH_H3 = "//div[@id='contentsInner']//aside[@class='So']/div[@class='bd']/ul/li/h3";
            public static readonly string XPATH_ADURL = "//div[@id='contentsInner']//aside[@class='So']/div[@class='bd']/ul/li/h3/a";
            public static readonly string XPATH_INFO = "//div[@id='contentsInner']//aside[@class='So']/div[@class='bd']/ul/li/p[@class='dtl']";
    
    
            public static readonly string XPATH_ROOT_TOP = "contentsInner";
            public static readonly string XPATH_CITE_TOP = "//div[@id='contentsInner']/aside[@class='So next-cmm']/div[@class='bd']/ul/li/cite";
            public static readonly string XPATH_H3_TOP = "//div[@id='contentsInner']/aside[@class='So next-cmm']/div[@class='bd']/ul/li/h3";
            public static readonly string XPATH_ADURL_TOP = "//div[@id='contentsInner']/aside[@class='So next-cmm']/div[@class='bd']/ul/li/h3/a";
            public static readonly string XPATH_INFO_TOP = "//div[@id='contentsInner']/aside[@class='So next-cmm']/div[@class='bd']/ul/li/p[@class='dtl']";
            //
            public static readonly Regex RegexAdUrl = new Regex(@"**(http[S]*$)");
            public static readonly string NullUrl = "&gt;";
            //
            public static readonly string BAITAI_ID = "006";
        }
    
        public static class BaiDu {
    
            public static readonly string UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko";
    
            public static readonly string[] XPATH_ROOT = { "ec_im_container", "ec_im_container" }; //第一种情况   。
            public static readonly string[] XPATH_CITE = { "//a/font[@size='-1' and @class]","//a/font[@size='-1' and @class]" }; //第一种情况
            public static readonly string[] XPATH_H3 = { "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-is-main-url]", "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-is-main-url]" };//第一种情况
            public static readonly string[] XPATH_ADURL = { "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-is-main-url]", "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-is-main-url]" };
            public static readonly string[] XPATH_INFO = { "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-click]/font[1]", "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-click]/font[1]" };
            // top info
            public static readonly string[] XPATH_ROOT_TOP = { "content_left", "content_left" };
            public static readonly string[] XPATH_CITE_TOP = { "//table[@data-click]/tbody/tr/td//a[not(@data-is-main-url) and not(contains(@href,'tool'))]/span", "//div[@class and @style]/div/div/a/span[1]|//div/table/tbody/tr/td[2]/div//a/span[1]" };   //前下后上
            public static readonly string[] XPATH_H3_TOP = { "//table/tbody/tr/td/a[ @data-is-main-url]", "//div[@class and @style]/div/div/h3" };    //前下后上
            public static readonly string[] XPATH_ADURL_TOP = { "//table/tbody/tr/td/a[ @data-is-main-url]", "//table/tbody/tr/td/a[ @data-is-main-url]" };    //前下后上
            public static readonly string[] XPATH_INFO_TOP = { "//table[@data-click and @class]/tbody/tr[3]/td/a[not(./span)]|//table[@data-click and @class]/tbody/tr/td/table/tbody/tr/td/div/font/a", "//div[@class and @style]/div/div[not(./span)]/a|//div/table/tbody/tr/td/div/font/a[not(./span)]" };         ////
            //public static readonly Regex RegexAdUrl = new Regex(@"http[S]*$");
            //
            public static readonly string BAITAI_ID = "007";
        }
    
        public static class CnBing {
    
            public static readonly string UserAgent = UserAgentPC;
    
            public static readonly string[] XPATH_ROOT = { "b_context", "b_context" };
            public static readonly string[] XPATH_CITE = { "//div[@class='sb_add sb_adTA']//cite", "//div[@class='sb_add sb_adTA']//cite" };
            public static readonly string[] XPATH_H3 = { "//div[@class='sb_add sb_adTA']//h2/a", "//div[@class='sb_add sb_adTA']//h2/a" };//第一种情况
            public static readonly string[] XPATH_ADURL = { "//div[@class='sb_add sb_adTA']//h2/a", "//div[@class='sb_add sb_adTA']//h2/a" };
            public static readonly string[] XPATH_INFO = { "//div[@class='sb_add sb_adTA']//div[@class='b_caption']/p", "//div[@class='sb_add sb_adTA']//div[@class='b_caption']/p" };
            // top info
            public static readonly string[] XPATH_ROOT_TOP = { "gg", "gg" };
            public static readonly string[] XPATH_CITE_TOP = { "", "" };   //前下后上
            public static readonly string[] XPATH_H3_TOP = { "", "" };    //前下后上
            public static readonly string[] XPATH_ADURL_TOP = { "", "" };    //前下后上
            public static readonly string[] XPATH_INFO_TOP = { "", "" };         //前下部分广告后上
            //
            public static readonly Regex RegexAdUrl = new Regex(@"rturl=(http[S]*$)");
            //
            public static readonly string BAITAI_ID = "008";
        }
    
        public static class HaoSou {
    
            public static readonly string UserAgent = UserAgentPC;
                // 右边的广告
            public static readonly string[] XPATH_ROOT = { "side", "side" }; //获取范围
            public static readonly string[] XPATH_CITE = { "//ul[@id='rightbox']/li/p/cite[not(contains(text(),' http://e.360.cn'))]|//div[@id='m-spread-left']//cite", "//ul[@id='rightbox']/li/p/cite[not(contains(text(),' http://e.360.cn'))]|//div[@id='m-spread-left']//cite" }; //第一种情况
            public static readonly string[] XPATH_H3 = { "//ul[@id='rightbox']/li/h3/a[not(contains(text(),'ss'))]|//div[@id='m-spread-left']//h3/a", "//ul[@id='rightbox']/li/h3/a[not(contains(text(),'ss'))]|//div[@id='m-spread-left']//h3/a" };//第一种情况
            public static readonly string[] XPATH_ADURL = { "//ul[@id='rightbox']/li/h3/a[not(contains(text(),'ss'))]|//div[@id='m-spread-left']//h3/a", "//ul[@id='rightbox']/li/h3/a[not(contains(text(),'ss'))]|//div[@id='m-spread-left']//h3/a" };
            public static readonly string[] XPATH_INFO = { "//ul[@id='e_idea_pp']/li//p|//ul[@id='rightbox']/li/p[not(contains(text(),'4000-360-360'))]", "//ul[@id='e_idea_pp']/li//p|//ul[@id='rightbox']/li/p[not(contains(text(),'4000-360-360'))]" };
            // top info
            public static readonly string[] XPATH_ROOT_TOP = {"ss", "sss" };
            public static readonly string[] XPATH_CITE_TOP = { "", "" };   //前下后上
            public static readonly string[] XPATH_H3_TOP = { "", "" };    //前下后上
            public static readonly string[] XPATH_ADURL_TOP = { "", "" };    //前下后上
            public static readonly string[] XPATH_INFO_TOP = { "", "" };         //前下部分广告后上
            //
            //public static readonly Regex RegexAdUrl = new Regex(@"http[S]*$");
            //
            public static readonly string BAITAI_ID = "009";
        }
    
        public static class Sogou {
            public static readonly string UserAgent = UserAgentPC;
            //right 部分
            public static readonly string[] XPATH_ROOT = { "right" };
            public static readonly string[] XPATH_CITE = { "//div[@class='bizr_fb']" };//绿色的url
            public static readonly string[] XPATH_H3 = { "//h3[@class='bizr_title']" };//#ad_leftresult_0 > h3:nth-child(1)
            public static readonly string[] XPATH_ADURL = { "//h3[@class='bizr_title']/a" };//.h3的url
            public static readonly string[] XPATH_INFO = { "//div[@class='bizr_ft']" };
            //top 部分                          
            public static readonly string[] XPATH_ROOT_TOP = { "promotion_adv_container" };//*[@id="promotion_adv_container"]/div/div
            public static readonly string[] XPATH_CITE_TOP = { "//div[contains(@class,'biz_rb')and @id]/div//cite" };
            public static readonly string[] XPATH_H3_TOP = { "//h3[@class='biz_title']" };
            public static readonly string[] XPATH_ADURL_TOP = { "//h3[@class='biz_title']/a" };
            public static readonly string[] XPATH_INFO_TOP = { "//div[@class='crown_info_box' or @class='biz_ft']|//div[contains(@id,'box_id')]/table" };//   ""
    
            //
            //public static readonly Regex RegexAdUrl = new Regex(@"**(http[S]*$)");
            //0
            public static readonly string BAITAI_ID = "010";
            public static readonly string NullUrl = "&gt;";
        }
    
    }

    using System;using System.Collections.Generic;using System.Linq;using System.Web;using System.Text.RegularExpressions;using System.Configuration;
    /// <summary>/// SearchHelper の概要の説明です/// </summary>public static class SearchConst{
        public static readonly string ARG_CLIENT = "client";
        public static readonly string ARG_WORD = "word";
        public static readonly int DataColumnCount = 4;
        public static readonly int ColumnOfUrl = 0;
        public static readonly int ColumnOfTitle = 1;
        public static readonly int ColumnOfInfo = 2;
        public static readonly int ColumnOfAdUrl = 3;
        public static readonly string FMT_Date = "yyyy/MM/dd";
        public static readonly string FMT_TIME = "HH:mm:ss";
        public static readonly string UserAgentPC = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:11.0) Gecko/20100101 Firefox/11.0";
        public static readonly string UserAgentMobile = "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A403 Safari/8536.25";
        public static readonly string SearchKeyWordPlace = "#{q}";
        public static readonly string DefaultEncode = "UTF-8";
        public static readonly string AttributeHref = "href";
        public static readonly string FILEEXT_ZIP = ".zip";
        public static readonly string FILE_TXT = "source.txt";
        public static readonly string FILE_KEY = "SavePath";
        public static readonly string BATCH_PARALLES_KEY = "BatchParalles";
        public static readonly string FLG_ENABLED = "1";
        public static readonly string CLIENT_MONITOR = "BJMOR";
        public static readonly string MSG_E_PAGE_STYLE_CHANGE = "スポンサーチェックの検索媒体レイアウト変更";


        public static class Google    {
            public static readonly string UserAgent = UserAgentPC;
            public static readonly string[] XPATH_ROOT = { "mbEnd", "mbEnd" };        public static readonly string[] XPATH_CITE = { "//div[@id='mbEnd']//ol/li//cite", "//div[@id='mbEnd']//ol/li//cite" };  //获取url        public static readonly string[] XPATH_H3 = { "//div[@id='mbEnd']//ol/li//h3", "//div[@id='mbEnd']//ol/li/h3" }; //  //获取标题        public static readonly string[] XPATH_ADURL = { "//div[@id='mbEnd']//ol/li//h3//a[1]", "//div[@id='mbEnd']//ol/li/h3//a[1]" };         public static readonly string[] XPATH_INFO = { "//div[@id='mbEnd']//ol/li//div[@class='ac ads-creative']", "//div[@id='mbEnd']//ol/li//div[@class='ads-creative']" };        // top info        public static readonly string[] XPATH_ROOT_TOP = { "taw", "taw" };        public static readonly string[] XPATH_CITE_TOP = { "//div[@id='tads']//ol/li//cite", "//div[@id='tads']//ol/li//cite" };        public static readonly string[] XPATH_H3_TOP = { "//div[@id='tads']//ol/li//h3", "//div[@id='tads']//ol/li/h3" };        public static readonly string[] XPATH_ADURL_TOP = { "//div[@id='tads']//ol/li//h3//a[1]", "//div[@id='tads']//ol/li/h3//a[1]" };        public static readonly string[] XPATH_INFO_TOP = { "//div[@id='tads']//ol/li//div[@class='ac ads-creative']", "//div[@id='tads']//ol/li//div[@class='ads-creative']" };        //        public static readonly Regex RegexAdUrl = new Regex(@"adurl=(http[S]*$)");        //        public static readonly string BAITAI_ID = "001";    }
        public static class GoogleM    {        public static readonly string UserAgent = UserAgentMobile;
            //info        public static readonly string[] XPATH_ROOT = { "bottomads", "bottomads" };        public static readonly string[] XPATH_CITE = { "//div[@id='tadsb']/ol/li//cite", "//div[@id='tadsb']/ol/li//cite" };        public static readonly string[] XPATH_H3 = { "//div[@id='tadsb']/ol/li//h3", "//div[@id='tadsb']/ol/li//h3" };        public static readonly string[] XPATH_ADURL = { "//div[@id='tadsb']/ol/li//h3//a", "//div[@id='tadsb']/ol/li//h3//a" };        public static readonly string[] XPATH_INFO = { "//div[@id='tadsb']/ol/li//div[@class='ac ads-creative']", "//div[@id='tadsb']/ol/li//div[@class='ads-creative']" };
            // top info        public static readonly string[] XPATH_ROOT_TOP = { "tads", "tads" };        public static readonly string[] XPATH_CITE_TOP = { "//div[@id='tads']/ol/li//cite", "//div[@id='tads']/ol/li//cite" };        public static readonly string[] XPATH_H3_TOP = { "//div[@id='tads']/ol/li//h3", "//div[@id='tads']/ol/li//h3" };        public static readonly string[] XPATH_ADURL_TOP = { "//div[@id='tads']/ol/li//h3//a", "//div[@id='tads']/ol/li//h3//a" };        public static readonly string[] XPATH_INFO_TOP = { "//div[@id='tads']/ol/li//div[@class='ac ads-creative']", "//div[@id='tads']/ol/li//div[@class='ads-creative']" };        //        public static readonly Regex RegexAdUrl = new Regex(@"adurl=(http[S]*$)");        //        public static readonly string BAITAI_ID = "005";    }
        public static class MSN    {        public static readonly string UserAgent = UserAgentPC;        //b_context/b_ad        public static readonly string[] XPATH_ROOT = { "sidebar", "b_context" };        public static readonly string[] XPATH_CITE = { "//div[@class='sb_adsNv2']//li//cite", "//ol[@id='b_context']//li[@class='b_ad']//li//cite" };        public static readonly string[] XPATH_H3 = { "//div[@class='sb_adsNv2']//li//h3", "//ol[@id='b_context']//li[@class='b_ad']//li//h2" };        public static readonly string[] XPATH_ADURL = { "//div[@class='sb_adsNv2']//li//a", "//ol[@id='b_context']//li[@class='b_ad']//li//a" };        public static readonly string[] XPATH_INFO = { "//div[@class='sb_adsNv2']//li//p", "//ol[@id='b_context']//li[@class='b_ad']//li//p" };        //b_results/b_ad        public static readonly string[] XPATH_ROOT_TOP = { "results_container", "b_results" };        public static readonly string[] XPATH_CITE_TOP = { "//div[@class='sb_adsWv2']//li//cite", "//ol[@id='b_results']//li[@class='b_ad']//li//cite" };        public static readonly string[] XPATH_H3_TOP = { "//div[@class='sb_adsWv2']//li//h3", "//ol[@id='b_results']//li[@class='b_ad']//li//h2" };        public static readonly string[] XPATH_ADURL_TOP = { "//div[@class='sb_adsWv2']//li//a", "//ol[@id='b_results']//li[@class='b_ad']//li//a" };        public static readonly string[] XPATH_INFO_TOP = { "//div[@class='sb_adsWv2']//li//p", "//ol[@id='b_results']//li[@class='b_ad']//li//p" };        //        public static readonly Regex RegexAdUrl = new Regex(@"**(http[S]*$)");        //        public static readonly string BAITAI_ID = "003";    }
        public static class Yahoo    {        public static readonly string UserAgent = UserAgentPC;
            public static readonly string XPATH_ROOT = "sIn";        public static readonly string XPATH_CITE1 = "//div[@id='So3']/div[@class='bd']/div[@class='w']/div[@class='a cf']";        public static readonly string XPATH_H31 = "//div[@id='So3']/div[@class='bd']/div[@class='w']/h3";        public static readonly string XPATH_ADURL1 = "//div[@id='So3']/div[@class='bd']/div[@class='w']/h3/a";        public static readonly string XPATH_INFO1 = "//div[@id='So3']/div[@class='bd']/div[@class='w']/p";        //        public static readonly string XPATH_ROOT_TOP = "So1";        public static readonly string XPATH_CITE_TOP = "//div[@id='So1']/div[@class='bd']/div[@class='w']/div[@class='a cf']";        public static readonly string XPATH_H3_TOP = "//div[@id='So1']/div[@class='bd']/div[@class='w']/h3";        public static readonly string XPATH_ADURL_TOP = "//div[@id='So1']/div[@class='bd']/div[@class='w']/h3/a";        public static readonly string XPATH_INFO_TOP = "//div[@id='So1']/div[@class='bd']/div[@class='w']/p";        //        public static readonly Regex RegexAdUrl = new Regex(@"**(http[S]*$)");        public static readonly string NullUrl = "&gt;";        //        public static readonly string BAITAI_ID = "002";    }
        public static class Yahoo2    {        public static readonly string UserAgent = UserAgentPC;
            public static readonly string XPATH_ROOT_TOP = "contents";        public static readonly string XPATH_CITE_TOP = "//div[@id='contents']/div[@class='cWrap']/div[@class='listWrap cf']/ul/li/cite";        public static readonly string XPATH_H3_TOP = "//div[@id='contents']/div[@class='cWrap']/div[@class='listWrap cf']/ul/li/h2/a";        public static readonly string XPATH_ADURL_TOP = "//div[@id='contents']/div[@class='cWrap']/div[@class='listWrap cf']/ul/li/h2/a";        public static readonly string XPATH_INFO_TOP = "//div[@id='contents']/div[@class='cWrap']/div[@class='listWrap cf']/ul/li/p[@class='smr']";        //        public static readonly Regex RegexAdUrl = new Regex(@"**(http[S]*$)");        public static readonly string NullUrl = "&gt;";        //        public static readonly string BAITAI_ID = "004";    }
        public static class YahooM    {        public static readonly string UserAgent = UserAgentMobile;
            public static readonly string XPATH_ROOT = "contentsInner";        public static readonly string XPATH_CITE = "//div[@id='contentsInner']//aside[@class='So']/div[@class='bd']/ul/li/cite";        public static readonly string XPATH_H3 = "//div[@id='contentsInner']//aside[@class='So']/div[@class='bd']/ul/li/h3";        public static readonly string XPATH_ADURL = "//div[@id='contentsInner']//aside[@class='So']/div[@class='bd']/ul/li/h3/a";        public static readonly string XPATH_INFO = "//div[@id='contentsInner']//aside[@class='So']/div[@class='bd']/ul/li/p[@class='dtl']";

            public static readonly string XPATH_ROOT_TOP = "contentsInner";        public static readonly string XPATH_CITE_TOP = "//div[@id='contentsInner']/aside[@class='So next-cmm']/div[@class='bd']/ul/li/cite";        public static readonly string XPATH_H3_TOP = "//div[@id='contentsInner']/aside[@class='So next-cmm']/div[@class='bd']/ul/li/h3";        public static readonly string XPATH_ADURL_TOP = "//div[@id='contentsInner']/aside[@class='So next-cmm']/div[@class='bd']/ul/li/h3/a";        public static readonly string XPATH_INFO_TOP = "//div[@id='contentsInner']/aside[@class='So next-cmm']/div[@class='bd']/ul/li/p[@class='dtl']";        //        public static readonly Regex RegexAdUrl = new Regex(@"**(http[S]*$)");        public static readonly string NullUrl = "&gt;";        //        public static readonly string BAITAI_ID = "006";    }
        public static class BaiDu {
            public static readonly string UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko";
            public static readonly string[] XPATH_ROOT = { "ec_im_container", "ec_im_container" }; //第一种情况   好像就一种情况。        public static readonly string[] XPATH_CITE = { "//a/font[@size='-1' and @class]","//a/font[@size='-1' and @class]" }; //第一种情况        public static readonly string[] XPATH_H3 = { "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-is-main-url]", "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-is-main-url]" };//第一种情况        public static readonly string[] XPATH_ADURL = { "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-is-main-url]", "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-is-main-url]" };        public static readonly string[] XPATH_INFO = { "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-click]/font[1]", "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-click]/font[1]" };        // top info        public static readonly string[] XPATH_ROOT_TOP = { "content_left", "content_left" };        public static readonly string[] XPATH_CITE_TOP = { "//table[@data-click]/tbody/tr/td//a[not(@data-is-main-url) and not(contains(@href,'tool'))]/span", "//div[@class and @style]/div/div/a/span[1]|//div/table/tbody/tr/td[2]/div//a/span[1]" };   //前下后上        public static readonly string[] XPATH_H3_TOP = { "//table/tbody/tr/td/a[ @data-is-main-url]", "//div[@class and @style]/div/div/h3" };    //前下后上        public static readonly string[] XPATH_ADURL_TOP = { "//table/tbody/tr/td/a[ @data-is-main-url]", "//table/tbody/tr/td/a[ @data-is-main-url]" };    //前下后上        public static readonly string[] XPATH_INFO_TOP = { "//table[@data-click and @class]/tbody/tr[3]/td/a[not(./span)]|//table[@data-click and @class]/tbody/tr/td/table/tbody/tr/td/div/font/a", "//div[@class and @style]/div/div[not(./span)]/a|//div/table/tbody/tr/td/div/font/a[not(./span)]" };         //前下部分广告后上        //        //public static readonly Regex RegexAdUrl = new Regex(@"http[S]*$");        //        public static readonly string BAITAI_ID = "007";    }
        public static class CnBing {
            public static readonly string UserAgent = UserAgentPC;
            public static readonly string[] XPATH_ROOT = { "b_context", "b_context" };        public static readonly string[] XPATH_CITE = { "//div[@class='sb_add sb_adTA']//cite", "//div[@class='sb_add sb_adTA']//cite" };        public static readonly string[] XPATH_H3 = { "//div[@class='sb_add sb_adTA']//h2/a", "//div[@class='sb_add sb_adTA']//h2/a" };//第一种情况        public static readonly string[] XPATH_ADURL = { "//div[@class='sb_add sb_adTA']//h2/a", "//div[@class='sb_add sb_adTA']//h2/a" };        public static readonly string[] XPATH_INFO = { "//div[@class='sb_add sb_adTA']//div[@class='b_caption']/p", "//div[@class='sb_add sb_adTA']//div[@class='b_caption']/p" };        // top info        public static readonly string[] XPATH_ROOT_TOP = { "なし", "なし" };        public static readonly string[] XPATH_CITE_TOP = { "", "" };   //前下后上        public static readonly string[] XPATH_H3_TOP = { "", "" };    //前下后上        public static readonly string[] XPATH_ADURL_TOP = { "", "" };    //前下后上        public static readonly string[] XPATH_INFO_TOP = { "", "" };         //前下部分广告后上        //        public static readonly Regex RegexAdUrl = new Regex(@"rturl=(http[S]*$)");        //        public static readonly string BAITAI_ID = "008";    }
        public static class HaoSou {
            public static readonly string UserAgent = UserAgentPC;            // 右边的广告        public static readonly string[] XPATH_ROOT = { "side", "side" }; //获取范围        public static readonly string[] XPATH_CITE = { "//ul[@id='rightbox']/li/p/cite[not(contains(text(),' http://e.360.cn'))]|//div[@id='m-spread-left']//cite", "//ul[@id='rightbox']/li/p/cite[not(contains(text(),' http://e.360.cn'))]|//div[@id='m-spread-left']//cite" }; //第一种情况        public static readonly string[] XPATH_H3 = { "//ul[@id='rightbox']/li/h3/a[not(contains(text(),'好搜推广'))]|//div[@id='m-spread-left']//h3/a", "//ul[@id='rightbox']/li/h3/a[not(contains(text(),'好搜推广'))]|//div[@id='m-spread-left']//h3/a" };//第一种情况        public static readonly string[] XPATH_ADURL = { "//ul[@id='rightbox']/li/h3/a[not(contains(text(),'好搜推广'))]|//div[@id='m-spread-left']//h3/a", "//ul[@id='rightbox']/li/h3/a[not(contains(text(),'好搜推广'))]|//div[@id='m-spread-left']//h3/a" };        public static readonly string[] XPATH_INFO = { "//ul[@id='e_idea_pp']/li//p|//ul[@id='rightbox']/li/p[not(contains(text(),'4000-360-360'))]", "//ul[@id='e_idea_pp']/li//p|//ul[@id='rightbox']/li/p[not(contains(text(),'4000-360-360'))]" };        // top info        public static readonly string[] XPATH_ROOT_TOP = {"なし", "なし" };        public static readonly string[] XPATH_CITE_TOP = { "", "" };   //前下后上        public static readonly string[] XPATH_H3_TOP = { "", "" };    //前下后上        public static readonly string[] XPATH_ADURL_TOP = { "", "" };    //前下后上        public static readonly string[] XPATH_INFO_TOP = { "", "" };         //前下部分广告后上        //        //public static readonly Regex RegexAdUrl = new Regex(@"http[S]*$");        //        public static readonly string BAITAI_ID = "009";    }
        public static class Sogou {        public static readonly string UserAgent = UserAgentPC;        //right 部分        public static readonly string[] XPATH_ROOT = { "right" };        public static readonly string[] XPATH_CITE = { "//div[@class='bizr_fb']" };//绿色的url        public static readonly string[] XPATH_H3 = { "//h3[@class='bizr_title']" };//#ad_leftresult_0 > h3:nth-child(1)        public static readonly string[] XPATH_ADURL = { "//h3[@class='bizr_title']/a" };//.h3的url        public static readonly string[] XPATH_INFO = { "//div[@class='bizr_ft']" };        //top 部分                                  public static readonly string[] XPATH_ROOT_TOP = { "promotion_adv_container" };//*[@id="promotion_adv_container"]/div/div        public static readonly string[] XPATH_CITE_TOP = { "//div[contains(@class,'biz_rb')and @id]/div//cite" };        public static readonly string[] XPATH_H3_TOP = { "//h3[@class='biz_title']" };        public static readonly string[] XPATH_ADURL_TOP = { "//h3[@class='biz_title']/a" };        public static readonly string[] XPATH_INFO_TOP = { "//div[@class='crown_info_box' or @class='biz_ft']|//div[contains(@id,'box_id')]/table" };//   ""
            //        //public static readonly Regex RegexAdUrl = new Regex(@"**(http[S]*$)");        //0        public static readonly string BAITAI_ID = "010";        public static readonly string NullUrl = "&gt;";    }
    }

  • 相关阅读:
    英语词汇辨异 —— 形近字、近义词
    英文构词法 —— circum- 前缀
    英文构词法 —— circum- 前缀
    MySQL Cluster-备份恢复初步测试
    MySQL root密码重置报错:mysqladmin: connect to server at 'localhost' failed的解决方案!
    [置顶] High Performance Canvas Game for Android
    [移动网关]2G环境下资源下载有一定概率失败,客户端日志显示收到403错误
    工作两年,新起点,新征程
    CloudStack 物理网络架构
    数学之路(3)-机器学习(3)-机器学习算法-欧氏距离(1)
  • 原文地址:https://www.cnblogs.com/xdot/p/4785317.html
Copyright © 2020-2023  润新知