• 解析网页源码方式


    解析HTML源码

    1,正则获取:

     1 string str_pattern = "(?<FlightNo>[A-Z]{2}[0-9]{4})\s*(?<Cabin>[A-Z0-9]{1,2})\s*(?<FlghtDate>[A-Z]{2}[0-9]{2}[A-Z]{3})\s*(?<FromTo>[A-Z]{6})\s*(?<Statu>[A-Za-z]{2}[0-9]{1})\s*(?<FromDt>[0-9]{4})\s*(?<ToDt>[0-9]{4})";
     2                 Regex regex = new Regex(str_pattern, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant);
     3                 if (regex.IsMatch(str_html_part2))
     4                 {
     5                     string FlightNo = "";
     6                     string Cabin = "";
     7                     string FlghtDate = "";
     8                     string FromTo = "";
     9                     string Statu = "";
    10                     string FromDt = "";
    11                     string ToDt = "";
    12                     MatchCollection matchCollection = regex.Matches(str_html_part2);
    13                     foreach (Match match in matchCollection)
    14                     {
    15                         FlightNo = match.Groups["FlightNo"].Value.Trim();
    16                         Cabin = match.Groups["Cabin"].Value.Trim();
    17                         FlghtDate = match.Groups["FlghtDate"].Value.Trim();
    18                         FromTo = match.Groups["FromTo"].Value.Trim();
    19                         Statu = match.Groups["Statu"].Value.Trim();
    20                         FromDt = match.Groups["FromDt"].Value.Trim();
    21                         ToDt = match.Groups["ToDt"].Value.Trim();
    22                         //public bool IsExistFlight(string allcout,string cabin,string FromDt,string ToDt, string sp_code, string fromcity, string tocity, string fromdate)
    23                     }
    24                      
    25                 }
    1  if (!Regex.IsMatch(match.Groups["Result"].Value.ToString(), @"^[0-9]*$"))
    View Code


    Regex regex = new Regex(strPattern, RegexOptions.IgnoreCase | RegexOptions.Multiline); if (regex.IsMatch(htmlContent)) { MatchCollection matchCollection = regex.Matches(htmlContent); foreach (Match match in matchCollection) { string value = match.Value;//获取到的 } }

    测试解析PNR航班 

    rt编码信息 如: 

    RTAAAAAA                                                                       
     1.MENG/HONG MS 2.ZHANG/DEPING MR 3.ZHANG/MUHAN MS AAAAAA                       
     4.  NH964  W   TU18JUL  PEKHND HK3   0825 1250      SEAME  3 I                 
     5.  NH963  V   MO24JUL  HNDPEK HK3   1720 2010      SEAME  I 3                 
     6.SZX/T SZX/T 0755-82819601/SHENZHEN TIANTAI AIR INTERNATIONAL TRAVEL AGENCY   
        CO.,LTD ABCDEFG                                                             
     7.18912790711                                                                  
     8.18912790711                                                                  
     9.TL/0625/18JUL/SZX000                                                         
    10.SSR ADTK 1E TO NH BY 30JUN 1200 OTHERWISE WILL BE XLD                        
    11.SSR DOCS NH HK1 P/CN/G42027462/CN/13OCT68/F/27APR20/MENG/HONG/P1             
    12.SSR DOCS NH HK1 P/CN/G40834536/CN/08SEP66/M/25MAY20/ZHANG/DEPING/P2          
    13.SSR DOCS NH HK1 P/CN/E81525458/CN/07MAY99/F/19JUL26/ZHANG/MUHAN/P3          +
                                                                                   
                                                                                    
                                                                                    
    PN                                                                             
    14.SSR CTCM NH HK1 18912790729/P3                                              -
    15.OSI NH CTCT18912790729                                                       
    16.RMK TJ AUTH SZV000/T                                                         
    17.RMK 备注信息                                                                 
    18.RMK 1A/M42ROX                                                                
    19.SZX000 
    

      

    解析方法

      1 public OrderView GetOrderViewByRtPNRTxt(string pnrtxt, ref string msg)
      2         {
      3             ILogHandle handle = new ILogHandle(userid, "1E", "SELF", "解析RTPNR");
      4             DateTime _now = DateTime.Now;
      5             int restime = 0;
      6 
      7             OrderView result = new OrderView();
      8             HttpUtils http = new HttpUtils();
      9             string pnr = "";
     10             string strreq = pnrtxt;
     11             string strrsp = "";
     12             Regex rex = new Regex("\s*(MR|MS|MRS|MISS)\s*(?<PNR>[a-zA-Z0-9]{6})", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.Multiline);
     13             if (!rex.IsMatch(pnrtxt))
     14             {
     15                 msg = "未解析到PNR";
     16                 restime = Convert.ToInt32((DateTime.Now - _now).TotalMilliseconds);
     17                 handle.Error(strreq, strrsp, "未解析到PNR", restime);
     18                 return result;
     19             }
     20             pnr = rex.Matches(pnrtxt)[0].Groups["PNR"].Value.Trim();
     21             int seq = 1;
     22             try
     23             {
     24                 string str_pattern = "(?<FlightNo>[0-9A-Z]{2}[0-9]{1,4})\s*(?<Cabin>[A-Z0-9]{1,2})\s*(?<FlghtDate>[A-Z]{2}[0-9]{2}[A-Z]{3})\s*(?<FromTo>[A-Z]{6})\s*(?<Statu>[A-Za-z]{2}[0-9]{1})\s*(?<FromDt>[0-9]{4}[+]?[1-9]?)\s*(?<ToDt>[0-9]{4}[+]?[1-9]?)";
     25                 Regex regex = new Regex(str_pattern, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant);
     26                 if (!regex.IsMatch(pnrtxt))
     27                 {
     28                     msg = "未匹配到任何航班信息";
     29                     restime = Convert.ToInt32((DateTime.Now - _now).TotalMilliseconds);
     30                     handle.Error(strreq, strrsp, "未匹配到任何航班信息", restime);
     31                     return result;
     32                 }
     33                 result.flights = new List<FlightModel>();
     34                 string FlghtDate = "";
     35                 string FromTo = "";
     36                 DateTime dtfdate = DateTime.Now;
     37                 DateTime dttrgar = DateTime.Now;
     38 
     39                 MatchCollection matchCollection = regex.Matches(pnrtxt);
     40                 foreach (Match match in matchCollection)
     41                 {
     42                     FlghtDate = match.Groups["FlghtDate"].Value.Trim();
     43                     FromTo = match.Groups["FromTo"].Value.Trim();
     44                     FlightModel f = new FlightModel();
     45 
     46                     dttrgar = Convert.ToDateTime(DateTime.Now.Year.ToString() + "-" + MakePnrText.GetMonth(FlghtDate.Substring(4)) + "-" + FlghtDate.Substring(2, 2));
     47                     if (dttrgar < DateTime.Now)
     48                         dttrgar = dttrgar.AddYears(1);
     49 
     50                     string fdate = dttrgar.ToString("yyyy-MM-dd");
     51                     if (seq == 1)
     52                     {
     53                         dtfdate = Convert.ToDateTime(fdate);
     54                     }
     55                     f.flightno = match.Groups["FlightNo"].Value.Trim();
     56                     f.seat = match.Groups["Cabin"].Value.Trim().Substring(0, 1);
     57                     f.carrier = f.flightno.Substring(0, 2);
     58                     f.dept = FromTo.Substring(0, 3);
     59                     f.depttime = GetDatetime(fdate, match.Groups["FromDt"].Value.Trim());
     60                     f.arr = FromTo.Substring(3);
     61                     f.arrtime = GetDatetime(fdate, match.Groups["ToDt"].Value.Trim());
     62                     f.sailtype = seq;
     63                     f.triptype = (f.depttime.Value - dtfdate).TotalDays >= 2 ? 2 : 1;
     64                     f.optcarrier = f.carrier;
     65                     f.optflightno = f.flightno;
     66                     f.state = "Y";
     67                     result.flights.Add(f);
     68                     seq++;
     69                 }
     70             }
     71             catch (Exception)
     72             {
     73                 msg = "" + seq + "段航班信息有误,请核实RT编码文本信息";
     74                 restime = Convert.ToInt32((DateTime.Now - _now).TotalMilliseconds);
     75                 handle.Error(strreq, strrsp, msg, restime);
     76                 return null;
     77             }
     78 
     79             result.passes = new List<PassengerModel>();
     80             seq = 1;
     81             DateTime dtnow = DateTime.Now;
     82             try
     83             {
     84                 string str_pass = "[.]?(?<name>[A-Z]{2,}/[A-Z]{1,}\s?[A-Z]{0,})\s*(?<sex>MR|MS|MRS|MISS)\s*(" + pnr + ")?";
     85                 rex = new Regex(str_pass, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant);
     86                 if (!rex.IsMatch(pnrtxt))
     87                 {
     88                     msg = "未匹配到任何乘客信息";
     89                     restime = Convert.ToInt32((DateTime.Now - _now).TotalMilliseconds);
     90                     handle.Error(strreq, strrsp, "未匹配到任何乘客信息", restime);
     91                     return result;
     92                 }
     93 
     94                 MatchCollection matches = rex.Matches(pnrtxt);
     95                 foreach (Match match in matches)
     96                 {
     97                     string name = match.Groups["name"].Value.Trim();
     98                     string sex = match.Groups["sex"].Value.Trim();
     99                     if (string.IsNullOrEmpty(name) || string.IsNullOrEmpty(sex))
    100                     {
    101                         msg = "" + seq + "位乘客信息有误,请核实RT编码文本信息";
    102                         restime = Convert.ToInt32((DateTime.Now - _now).TotalMilliseconds);
    103                         handle.Error(strreq, strrsp, msg, restime);
    104                         return null;
    105                     }
    106                     PassengerModel p = new PassengerModel();
    107                     p.name = name;
    108                     p.sex = sex == "MR" ? "M" : "F";
    109                     p.phone = "";
    110                     p.nationality = "CN";
    111                     p.birthday = dtnow.AddYears(new Random().Next(-20, -13));
    112                     p.cardaddress = "CN";
    113                     p.cardexpire = dtnow.AddYears(new Random().Next(2, 10));
    114                     p.cardno = "P88888";
    115                     p.cardtype = "PP";
    116                     p.ptype = 1;
    117                     result.passes.Add(p);
    118                     if (seq == 1)
    119                         result.needpassinfo = "1";
    120                     seq++;
    121                 }
    122                 //SSR DOCS NH HK1 P/CN/E81525458/CN/07MAY99/F/19JUL26/ZHANG/MUHAN/P3  
    123                 string str_pinfo = "(.SSR DOCS [0-9A-Z]{2} [0-9A-Z]{2}1 P/)(?<cardaddress>[A-Z]{2})/(?<cardno>[0-9A-Z]{2,})/(?<nationality>[A-Z]{2})/(?<birthday>[0-9A-Z]{7})/(?<sex>[A-Z]{1})/(?<cardexpire>[0-9A-Z]{7})/(?<name>[A-Z]{2,}(/)[A-Z]{1,}\s?[A-Z]{0,})(/H)?/P[0-9]{1,}\s*";
    124                 rex = new Regex(str_pinfo, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant);
    125                 if (rex.IsMatch(pnrtxt))
    126                 {
    127                     matches = rex.Matches(pnrtxt);
    128                     seq = 0;
    129                     int nowsyear = int.Parse(DateTime.Now.Year.ToString().Substring(3));
    130                     string yearpart = "19";
    131                     foreach (Match match in matches)
    132                     {
    133                         string birthday = match.Groups["birthday"].Value.Trim();
    134                         string cardexpire = match.Groups["cardexpire"].Value.Trim();
    135                         if (result.passes[seq].name == match.Groups["name"].Value.Trim())
    136                         {
    137                             if (int.Parse(birthday.Substring(5)) < nowsyear)
    138                                 yearpart = "20";
    139                             result.passes[seq].cardaddress = match.Groups["cardaddress"].Value.Trim();
    140                             result.passes[seq].cardno = match.Groups["cardno"].Value.Trim();
    141                             result.passes[seq].nationality = match.Groups["nationality"].Value.Trim();
    142                             result.passes[seq].sex = match.Groups["sex"].Value.Trim();
    143                             result.passes[seq].birthday = Convert.ToDateTime(yearpart + birthday.Substring(5) + "-" + MakePnrText.GetMonth(birthday.Substring(2, 3)) + "-" + birthday.Substring(0, 2));
    144                             result.passes[seq].cardexpire = Convert.ToDateTime("20" + cardexpire.Substring(5) + "-" + MakePnrText.GetMonth(cardexpire.Substring(2, 3)) + "-" + cardexpire.Substring(0, 2));
    145                         }
    146                         seq++;
    147                     }
    148                 }
    149 
    150             }
    151             catch (Exception)
    152             {
    153                 msg = "" + seq + "位乘客信息有误,请核实RT编码文本";
    154                 restime = Convert.ToInt32((DateTime.Now - _now).TotalMilliseconds);
    155                 handle.Error(strreq, strrsp, msg, restime);
    156                 return null;
    157             }
    158 
    159             result.extemmsg = pnr;
    160             restime = Convert.ToInt32((DateTime.Now - _now).TotalMilliseconds);
    161             handle.Succes(strreq, strrsp, restime);
    162             return result;
    163         }
    View Code

    2,HtmlAgilityPack 之 HtmlNode类 (主要是XPath语法解析,firebug插件可以查看对应XPath)

    string detailContext="html 源码";
    HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(detailContext);
                HtmlNode node = doc.DocumentNode;
                HtmlNodeCollection trlist = node.SelectNodes("//table[@class='tab_result']//tr[@class='line'][@height='40']");

    3,Newtonsoft.Json序列化和反序列

    这里下载:http://www.newtonsoft.com/products/json/ 
     
    1  List<Models.实体类> list=Newtonsoft.Json.JsonConvert.DeserializeObject<List<Models.实体类>>(context);
    View Code
     1  using (StreamReader reader = new StreamReader(stream))
     2             {
     3                 string jsonData = reader.ReadToEnd();
     4                 // 解析JSON,分析JSON
     5                 JObject objectRoot = JsonConvert.DeserializeObject(jsonData) as JObject;
     6                 JArray imgsArray = objectRoot["imgs"] as JArray;
     7                 for (int i = 0; i < imgsArray.Count; i++)
     8                 {
     9                     JObject img = imgsArray[i] as JObject;
    10                     string objUrl = (string)img["objURL"];
    11                     //txtLogs.AppendText(objUrl + Environment.NewLine); // 测试获取图片路径
    12                     try
    13                     {
    14                         // 下载具体的某一张图片
    15                         DownloadImage(objUrl);
    16                         // 更新进度条
    17                         progressBar.BeginInvoke(new Action(() =>
    18                             {
    19                                 progressBar.Value = i * 100 / sumCount;
    20                             }));
    21                         // 更新文本框
    22                         txtLogs.BeginInvoke(new Action(() =>
    23                             {
    24                                 txtLogs.AppendText("已下载:" + objUrl + Environment.NewLine);
    25                             }));
    26                     }
    27                     catch (Exception ex)
    28                     {
    29                         // 跨线程访问UI线程的txtLogs控件
    30                         txtLogs.BeginInvoke(new Action(() =>
    31                             {
    32                                 txtLogs.AppendText("【异常:" + ex.Message + "" + Environment.NewLine);
    33                             }));
    34                     }
    35                 }
    36             }
    View Code
    1 Regex _rexPC = new Regex(@"([d]{1,})件");
    2 string str="成都-昆明,1件,每件23KG,长宽高100*60*40CM;昆明-万象,1件,每件23KG,长宽高100*60*40CM.万象-昆明,1件,每件23KG,长宽高100*60*40CM;昆明-成都,1件,每件23KG,长宽高100*60*40CM";
    3   if (_rexPC.IsMatch(str))
    4                     {
    5                         var _mch = _rexPC.Match(str);
    6                         int adtpc = StringHelper.StrToInt(_mch.Groups[1].Value, 0);
    7                     }
    View Code
    有时候获取json数据要解析时需手动写实体类,之前一直手写,感觉太浪费时间了,后面找到了一款工具,可以实现转换功能。
     
  • 相关阅读:
    macbook 无声音解决方案
    webapck dev server代理请求 json截断问题
    百度卫星地图开启
    服务器 nginx配置 防止其他域名绑定自己的服务器
    记一次nginx php配置的心路历程
    遇到npm报错read ECONNRESET怎么办
    运行svn tortoiseSvn cleanup 命令失败的解决办法
    svn add 命令 递归目录下所有文件
    m4出现Please port gnulib freadahead.c to your platform! Look at the definition of fflush, fread, ungetc on your system, then report this to bug-gnulib."
    Ubuntu下安装GCC,mpc、mpfr、gmp
  • 原文地址:https://www.cnblogs.com/systemkk/p/4227004.html
Copyright © 2020-2023  润新知