• c#抓取网页数据


    写了一个简单的抓取网页数据的小例子,代码如下:

     1  //根据Url地址得到网页的html源码 
     2     private string GetWebContent(string Url) 
     3     {
     4         string strResult = ""; ;
     5         try 
     6         { 
     7             HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url); 
     8         //声明一个HttpWebRequest请求 
     9             request.Timeout = 30000; 
    10             //设置连接超时时间 
    11             request.Headers.Set("Pragma", "no-cache"); 
    12             HttpWebResponse response = (HttpWebResponse)request.GetResponse(); 
    13             Stream streamReceive = response.GetResponseStream(); 
    14             Encoding encoding = Encoding.GetEncoding("GB2312"); 
    15             StreamReader streamReader = new StreamReader(streamReceive, encoding); 
    16             strResult = streamReader.ReadToEnd(); 
    17         } 
    18         catch 
    19         { 
    20             
    21         } 
    22     return strResult; 
    23     } 
    24 //为了使用HttpWebRequest和HttpWebResponse,需填名字空间引用 
    25 
    26 //以下是程序具体实现过程: 
    27     protected void btn_Click(object sender, EventArgs e) 
    28     { 
    29         //要抓取的URL地址 
    30         string Url = "http://www.awtrip.com/"; 
    31         //得到指定Url的源码 
    32         string strWebContent = GetWebContent(Url);
    33         //Response.Write(strWebContent);
    34         //取出和数据有关的那段源码 
    35         int iBodyStart = strWebContent.IndexOf("<body", 0);
    36         int iStart = strWebContent.IndexOf("热门目的地旅游", iBodyStart);
    37         int iTableStart = strWebContent.IndexOf("<ul", iStart);
    38         int iTableEnd = strWebContent.IndexOf("</ul>", iTableStart);
    39         string strWeb = strWebContent.Substring(iTableStart, iTableEnd - iTableStart + 5);
    40         //生成HtmlDocument 
    41         WebBrowser webb = new WebBrowser();
    42         webb.Navigate("about:blank");
    43         HtmlDocument htmldoc = webb.Document.OpenNew(true);
    44         htmldoc.Write(strWeb);
    45         HtmlElementCollection htmlTR = htmldoc.GetElementsByTagName("li");
    46         StringBuilder strlist = new StringBuilder();
    47         foreach (HtmlElement tr in htmlTR)
    48         {
    49             strlist.AppendFormat(tr.GetElementsByTagName("a")[0].InnerText+"$");
    50         }
    51         Response.Write(strlist.ToString().Remove(strlist.ToString().Length-1));
    52         ////最后再插入数据库 
    53        
    54     } 

    引用:

    using System.Net;
    using System.IO;
    using System.Text;
    using System.Windows.Forms;
    View Code

    运行时可能为遇到“当前线程不在单线程单元中,因此无法实例化 ActiveX 控件”的问题,把aspx页面顶部的AutoEventWireup设置为ture就可以了

  • 相关阅读:
    Socket的应用案例
    利用XStream实现对象XML话
    策略模式
    深入理解Java引用类型
    java 消息机制 ActiveMQ入门实例
    activity工作流表结构分析
    Spring MVC 之 Hello World
    如何发布Web项目到互联网
    ionic开发ios app
    ionic开发android app步骤
  • 原文地址:https://www.cnblogs.com/lanmoxiaozhu/p/3305096.html
Copyright © 2020-2023  润新知