• C# HtmlDocument和HtmlNode的使用以及节点的模糊查询


    C#HtmlAgilityPack.HtmlDocument和HtmlAgilityPack.HtmlNode的使用

      HtmlAgilityPack.HtmlDocument response = null;
                //HtmlAgilityPack.HtmlNode类和HtmlAgilityPack.HtmlDocument类的使用!
                HtmlAgilityPack.HtmlNode responseNew = null;
    
                HtmlDocument doc = new HtmlDocument();
     wc.Encoding = Encoding.UTF8;
                              string html= wc.DownloadString(url);
                              doc.LoadHtml(html);
    
                              responseNew = doc.DocumentNode.SelectSingleNode("/html/body");     //根据XPath查找节点,跟XmlNode差不多
    
                        HtmlNodeCollection categoryNodeList = responseNew.SelectNodes("div[3]/div[1]/div[1]/div[1]/ul[1]/li");
                        foreach (HtmlNode item in categoryNodeList)
                        //foreach (HtmlNode item in ulS2.ChildNodes)
                        {
                            var xpath = item.XPath;
                            if (k % 2 != 0)
                            {
                                number = number + 1;
                                xpath = xpath.Replace("/#text[" + number + "]", "/li");//替换字符串
                            }
                          
                            k=k+1;
                            string titleName, infourl, company, city, date, salary, salary_em, source;
                            titleName = item.SelectSingleNode(xpath + "/div/div/span/a").InnerText;//
                            infourl = item.SelectSingleNode(xpath + "/div/div/span/a").Attributes["href"].Value; //url
    }

    XML节点的模糊查询:contains(@属性,'模糊查询的值')

     ulS = response.DocumentNode.SelectNodes("//*[@id='resultList']/div[contains(@class,'el')]");

    XML节点的等值查询

    ulS = response.DocumentNode.SelectNodes("//*[@id='resultList']/div[@class='el']");
      for (int i = 2; i < ulS.Count; i++)
                        {
                            var item = ulS[i];
                            var xpath = item.XPath;
                            string titleName;
                            titleName = item.SelectSingleNode(xpath + "/p/span/a").InnerText;
    }
  • 相关阅读:
    将01字符串转换成数字的办法
    Codeforces Round #180 (Div. 2) AB
    CPU制作过程『转』
    向VECTOR的头部添加元素
    母版页中js操作问题
    操作粘贴板
    XML和关系数据使用XML和数据集类
    XML和关系数据用XML加载数据集
    XPath和XSL转换向XML应用XSL转换
    XML和关系数据从XSD架构创建数据集映射
  • 原文地址:https://www.cnblogs.com/yangjinwang/p/6424552.html
Copyright © 2020-2023  润新知