前台页面: <%@ Page language="c#" Codebehind="Gethttpcode.aspx.cs" AutoEventWireup="false" Inherits="coll_net.GetPageHtml" %> <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" > <HTML> <HEAD> <title>采集例子</title> <meta name="GENERATOR" Content="Microsoft Visual Studio .NET 7.1"> <meta name="CODE_LANGUAGE" Content="C#"> <meta name="vs_defaultClientScript" content="JavaScript"> <meta name="vs_targetSchema" content="http://schemas.microsoft.com/intellisense/ie5"> </HEAD> <body MS_POSITIONING="GridLayout"> <form id="aspNetBuffer" method="post" runat="server"> <div align="center" style="FONT-WEIGHT: bold">得到任意网页源代码</div> <asp:TextBox id="UrlText" runat="server" Width="400px">http://www.0579.info/ </asp:TextBox> <br> <asp:Button id="WebClientButton" Runat="server" Text="用WebClient得到"></asp:Button> <asp:Button id="GetText" style="Z-INDEX: 101; LEFT: 208px; POSITION: absolute; TOP: 72px" runat="server" Text="GetText"></asp:Button> <br> <asp:Button id="WebRequestButton" runat="server" Text="用WebRequest得到"></asp:Button> <br> <asp:TextBox id="ContentHtml" runat="server" Width="100%" Height="360px" TextMode="MultiLine"></asp:TextBox> </form> </body> </HTML> |
后台源代码: using System; using System.Collections; using System.ComponentModel; using System.Data; using System.Drawing; using System.Web; using System.Web.SessionState; using System.Web.UI; using System.Web.UI.WebControls; using System.Web.UI.HtmlControls; using System.IO; using System.Net; using System.Text; using System.Text.RegularExpressions; namespace coll_net { /// <summary> /// GetPageHtml 的摘要说明。 /// </summary> public class GetPageHtml : System.Web.UI.Page { protected System.Web.UI.WebControls.TextBox UrlText; protected System.Web.UI.WebControls.Button WebClientButton; protected System.Web.UI.WebControls.Button WebRequestButton; protected System.Web.UI.WebControls.TextBox ContentHtml; protected System.Web.UI.WebControls.Button GetText; protected System.Web.UI.WebControls.Button Button1; private string PageUrl = ""; private void Page_Load(object sender, System.EventArgs e) { // 在此处放置用户代码以初始化页面 } private void WebClientButton_Click(object sender, System.EventArgs e) { PageUrl = UrlText.Text; WebClient wc = new WebClient(); wc.Credentials = CredentialCache.DefaultCredentials; ///方法一: Byte[] pageData = wc.DownloadData(PageUrl); ContentHtml.Text = Encoding.Default.GetString(pageData); /// 方法二: /// ***************代码开始********** /// Stream resStream = wc.OpenRead(PageUrl); /// StreamReader sr = new StreamReader(resStream,System.Text.Encoding.Default); /// ContentHtml.Text = sr.ReadToEnd(); /// resStream.Close(); /// **************代码结束******** /// wc.Dispose(); } private void WebRequestButton_Click(object sender, System.EventArgs e) { PageUrl = UrlText.Text; WebRequest request = WebRequest.Create(PageUrl); WebResponse response = request.GetResponse(); Stream resStream = response.GetResponseStream(); StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default); ContentHtml.Text = sr.ReadToEnd(); resStream.Close(); sr.Close(); } private void GetText_Click(object sender, System.EventArgs e) { PageUrl = UrlText.Text; WebRequest request = WebRequest.Create(PageUrl); WebResponse response = request.GetResponse(); Stream resStream = response.GetResponseStream(); StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default); ContentHtml.Text = sr.ReadToEnd(); resStream.Close(); sr.Close(); ContentHtml.Text = Regex.Replace(ContentHtml.Text,"<[^>]*>", ""); //替换空格 ContentHtml.Text = Regex.Replace(ContentHtml.Text,"\\s+", " "); } #region Web 窗体设计器生成的代码 override protected void OnInit(EventArgs e) { // // CODEGEN: 该调用是 ASP.NET Web 窗体设计器所必需的。 // InitializeComponent(); base.OnInit(e); } /// <summary> /// 设计器支持所需的方法 - 不要使用代码编辑器修改 /// 此方法的内容。 /// </summary> private void InitializeComponent() { this.WebClientButton.Click += new System.EventHandler(this.WebClientButton_Click); this.WebRequestButton.Click += new System.EventHandler(this.WebRequestButton_Click); this.Load += new System.EventHandler(this.Page_Load); } #endregion } } |
以下是引用片段: 这里是针对一些利用 isa server proxy 上网的. 修改下 WebRequest 方法: PageUrl = UrlText.Text; WebRequest request = WebRequest.Create(PageUrl); WebProxy myProxy=new WebProxy(); myProxy = (WebProxy)request.Proxy; myProxy.Address = new Uri("http://代理服务器:端口"); myProxy.Credentials = new NetworkCredential("用户名", "密码", "域名"); request.Proxy = myProxy; WebResponse response = request.GetResponse(); |
以下是引用片段:
另一个实例 〔转〕
private void Page_Load(object sender, System.EventArgs e)
{
string Url = "/blog/upload/20051126115210282.gif";
string StringFileName = Url.Substring(Url.LastIndexOf("/") + 1);
string StringFilePath = Request.PhysicalApplicationPath;
if(!StringFilePath.EndsWith("/")) StringFilePath += "/";
MSXML2.XMLHTTP _xmlhttp = new MSXML2.XMLHTTPClass();
_xmlhttp.open("GET",Url,false,null,null);
_xmlhttp.send("");
if( _xmlhttp.readyState == 4 )
{
if(System.IO.File.Exists(StringFilePath + StringFileName))
System.IO.File.Delete(StringFilePath + StringFileName);
System.IO.FileStream fs = new System.IO.FileStream(StringFilePath + StringFileName, System.IO.FileMode.CreateNew);
System.IO.BinaryWriter w = new System.IO.BinaryWriter(fs);
w.Write((byte[])_xmlhttp.responseBody);
w.Close();
fs.Close();
Response.Write ("文件已经得到。<br><a href=’" + Request.ApplicationPath + StringFileName +"’ target=’_blank’>");
Response.Write ("查看" + StringFileName + "</a>");
}
else
Response.Write (_xmlhttp.statusText);
Response.End();
}