• C# 获取网页信息


    • 获取网页源码
     ///通过HttpWebResponse 
    public  string GetUrlHtml(string url)
            {
    
                string strHtml = string.Empty;
    
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
    
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();//从Internet资源返回数据流
    
                if (response.CharacterSet.ToLower() == "gbk")
                {
                    using (Stream respStream = response.GetResponseStream())//读取数据流
                    {
                        using (StreamReader str = new StreamReader(respStream, Encoding.GetEncoding("gb2312")))//读取数据
                        {
                            strHtml = str.ReadToEnd();
                        }
                    }
                }
                else
                {
                    using (Stream respStream = response.GetResponseStream())//读取数据流
                    {
                        using (StreamReader str = new StreamReader(respStream, Encoding.UTF8))//读取数据
                        {
                            strHtml = str.ReadToEnd();
                        }
                    }
                }
                return strHtml;
            }
    ///通过WebClient 
      private static string htmlcontent(string url)
            {                                           
                System.Net.WebClient wc = new System.Net.WebClient();
                Byte[] pageData = wc.DownloadData(url);
                string s = System.Text.Encoding.Default.GetString(pageData);
                ////s = System.Text.Encoding.UTF8.GetString(pageData);去除中文乱码
                return s;
            }
    • 获取网页元素(HtmlAgilityPack)
    using HtmlAgilityPack;
    //HtmlNode node = doc.DocumentNode.SelectSingleNode("Xpath");  元素
    //HtmlNodeCollection nodes = doc1.DocumentNode.SelectNodes(Xpath); //元素集合                
    //
    
    private static Menu Level_chidren_Menu(Menu menu1,string Xpath)
            {                               
                foreach (var item in menu1.ChildMenus)
                {
                    HtmlDocument doc1 = new HtmlDocument();//1步:声明
                    doc1.LoadHtml(HttpHelper.GetUrlHtml(item.MenuUrl));   //加载HTML源码                                
                    HtmlNodeCollection nodes = doc1.DocumentNode.SelectNodes(Xpath);//取节点元素
                    foreach (var page in nodes)
                    {
                        item.ChildMenus.Add(new Menu() {            
                            LevelMenu = 3,
                            MenuName = page.InnerText,
                            MenuUrl = HttpHelper.baseUrl + HttpHelper.GetHtmlAttribute(page.InnerHtml, "a").Attributes["href"].Value   
                        });
                    }
                }
                return menu1;
            }
    

      

  • 相关阅读:
    集合类
    ajax技术
    Java中的Runnable、Callable、Future、FutureTask的区别与示例
    python 检测文件编码等
    android发送/接收Json包含中文的处理
    android 获取 imei号码 及相关信息
    RelativeLayout常用属性介绍
    Android之读取 AndroidManifest.xml 中的数据
    Java中int与Integer
    Handler sendMessage 与 obtainMessage (sendToTarget)
  • 原文地址:https://www.cnblogs.com/Zingu/p/14541846.html
Copyright © 2020-2023  润新知