用Asp.Net c#写的采集小例子
前台页面:
<%@ Page language="c#" Codebehind="Gethttpcode.aspx.cs" AutoEventWireup="false" Inherits="coll_net.GetPageHtml" %>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" >
<HTML>
<HEAD>
<title>采集例子</title>
<meta name="GENERATOR" Content="Microsoft Visual Studio .NET 7.1">
<meta name="CODE_LANGUAGE" Content="C#">
<meta name="vs_defaultClientScript" content="JavaScript">
<meta name="vs_targetSchema" content="http://schemas.microsoft.com/intellisense/ie5">
</HEAD>
<body MS_POSITIONING="GridLayout">
<form id="aspNetBuffer" method="post" runat="server">
<div align="center" style="FONT-WEIGHT: bold">得到任意网页源代码</div>
<asp:TextBox id="UrlText" runat="server" Width="400px">http://www.0579.info/
</asp:TextBox>
<br>
<asp:Button id="WebClientButton" Runat="server" Text="用WebClient得到"></asp:Button>
<asp:Button id="GetText" style="Z-INDEX: 101; LEFT: 208px; POSITION: absolute; TOP: 72px" runat="server"
Text="GetText"></asp:Button>
<br>
<asp:Button id="WebRequestButton" runat="server" Text="用WebRequest得到"></asp:Button>
<br>
<asp:TextBox id="ContentHtml" runat="server" Width="100%" Height="360px" TextMode="MultiLine"></asp:TextBox>
</form>
</body>
</HTML>
后台源代码:
using System;
using System.Collections;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Web;
using System.Web.SessionState;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.HtmlControls;
using System.IO;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
namespace coll_net
{
/// <summary>
/// GetPageHtml 的摘要说明。
/// </summary>
public class GetPageHtml : System.Web.UI.Page
{
protected System.Web.UI.WebControls.TextBox UrlText;
protected System.Web.UI.WebControls.Button WebClientButton;
protected System.Web.UI.WebControls.Button WebRequestButton;
protected System.Web.UI.WebControls.TextBox ContentHtml;
protected System.Web.UI.WebControls.Button GetText;
protected System.Web.UI.WebControls.Button Button1;
private string PageUrl = "";
private void Page_Load(object sender, System.EventArgs e)
{
// 在此处放置用户代码以初始化页面
}
private void WebClientButton_Click(object sender, System.EventArgs e)
{
PageUrl = UrlText.Text;
WebClient wc = new WebClient();
wc.Credentials = CredentialCache.DefaultCredentials;
///方法一:
Byte[] pageData = wc.DownloadData(PageUrl);
ContentHtml.Text = Encoding.Default.GetString(pageData);
/// 方法二:
/// ***************代码开始**********
/// Stream resStream = wc.OpenRead(PageUrl);
/// StreamReader sr = new StreamReader(resStream,System.Text.Encoding.Default);
/// ContentHtml.Text = sr.ReadToEnd();
/// resStream.Close();
/// **************代码结束********
///
wc.Dispose();
}
private void WebRequestButton_Click(object sender, System.EventArgs e)
{
PageUrl = UrlText.Text;
WebRequest request = WebRequest.Create(PageUrl);
WebResponse response = request.GetResponse();
Stream resStream = response.GetResponseStream();
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);
ContentHtml.Text = sr.ReadToEnd();
resStream.Close();
sr.Close();
}
private void GetText_Click(object sender, System.EventArgs e)
{
PageUrl = UrlText.Text;
WebRequest request = WebRequest.Create(PageUrl);
WebResponse response = request.GetResponse();
Stream resStream = response.GetResponseStream();
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);
ContentHtml.Text = sr.ReadToEnd();
resStream.Close();
sr.Close();
ContentHtml.Text = Regex.Replace(ContentHtml.Text,"<[^>]*>", "");
//替换空格
ContentHtml.Text = Regex.Replace(ContentHtml.Text,"\\s+", " ");
}
#region Web 窗体设计器生成的代码
override protected void OnInit(EventArgs e)
{
//
// CODEGEN: 该调用是 ASP.NET Web 窗体设计器所必需的。
//
InitializeComponent();
base.OnInit(e);
}
/// <summary>
/// 设计器支持所需的方法 - 不要使用代码编辑器修改
/// 此方法的内容。
/// </summary>
private void InitializeComponent()
{
this.WebClientButton.Click += new System.EventHandler(this.WebClientButton_Click);
this.WebRequestButton.Click += new System.EventHandler(this.WebRequestButton_Click);
this.Load += new System.EventHandler(this.Page_Load);
}
#endregion
}
}
以下是引用片段:
这里是针对一些利用 isa server proxy 上网的.
修改下 WebRequest 方法:
PageUrl = UrlText.Text;
WebRequest request = WebRequest.Create(PageUrl);
WebProxy myProxy=new WebProxy();
myProxy = (WebProxy)request.Proxy;
myProxy.Address = new Uri("http://代理服务器:端口");
myProxy.Credentials = new NetworkCredential("用户名", "密码", "域名");
request.Proxy = myProxy;
WebResponse response = request.GetResponse();
以下是引用片段:
另一个实例 〔转〕
private void Page_Load(object sender, System.EventArgs e)
{
string Url = "/blog/upload/20051126115210282.gif";
string StringFileName = Url.Substring(Url.LastIndexOf("/") + 1);
string StringFilePath = Request.PhysicalApplicationPath;
if(!StringFilePath.EndsWith("/")) StringFilePath += "/";
MSXML2.XMLHTTP _xmlhttp = new MSXML2.XMLHTTPClass();
_xmlhttp.open("GET",Url,false,null,null);
_xmlhttp.send("");
if( _xmlhttp.readyState == 4 )
{
if(System.IO.File.Exists(StringFilePath + StringFileName))
System.IO.File.Delete(StringFilePath + StringFileName);
System.IO.FileStream fs = new System.IO.FileStream(StringFilePath + StringFileName, System.IO.FileMode.CreateNew);
System.IO.BinaryWriter w = new System.IO.BinaryWriter(fs);
w.Write((byte[])_xmlhttp.responseBody);
w.Close();
fs.Close();
Response.Write ("文件已经得到。<br><a href=’" + Request.ApplicationPath + StringFileName +"’ target=’_blank’>");
Response.Write ("查看" + StringFileName + "</a>");
}
else
Response.Write (_xmlhttp.statusText);
Response.End();
}
前台页面:
<%@ Page language="c#" Codebehind="Gethttpcode.aspx.cs" AutoEventWireup="false" Inherits="coll_net.GetPageHtml" %>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" >
<HTML>
<HEAD>
<title>采集例子</title>
<meta name="GENERATOR" Content="Microsoft Visual Studio .NET 7.1">
<meta name="CODE_LANGUAGE" Content="C#">
<meta name="vs_defaultClientScript" content="JavaScript">
<meta name="vs_targetSchema" content="http://schemas.microsoft.com/intellisense/ie5">
</HEAD>
<body MS_POSITIONING="GridLayout">
<form id="aspNetBuffer" method="post" runat="server">
<div align="center" style="FONT-WEIGHT: bold">得到任意网页源代码</div>
<asp:TextBox id="UrlText" runat="server" Width="400px">http://www.0579.info/
</asp:TextBox>
<br>
<asp:Button id="WebClientButton" Runat="server" Text="用WebClient得到"></asp:Button>
<asp:Button id="GetText" style="Z-INDEX: 101; LEFT: 208px; POSITION: absolute; TOP: 72px" runat="server"
Text="GetText"></asp:Button>
<br>
<asp:Button id="WebRequestButton" runat="server" Text="用WebRequest得到"></asp:Button>
<br>
<asp:TextBox id="ContentHtml" runat="server" Width="100%" Height="360px" TextMode="MultiLine"></asp:TextBox>
</form>
</body>
</HTML>
后台源代码:
using System;
using System.Collections;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Web;
using System.Web.SessionState;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.HtmlControls;
using System.IO;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
namespace coll_net
{
/// <summary>
/// GetPageHtml 的摘要说明。
/// </summary>
public class GetPageHtml : System.Web.UI.Page
{
protected System.Web.UI.WebControls.TextBox UrlText;
protected System.Web.UI.WebControls.Button WebClientButton;
protected System.Web.UI.WebControls.Button WebRequestButton;
protected System.Web.UI.WebControls.TextBox ContentHtml;
protected System.Web.UI.WebControls.Button GetText;
protected System.Web.UI.WebControls.Button Button1;
private string PageUrl = "";
private void Page_Load(object sender, System.EventArgs e)
{
// 在此处放置用户代码以初始化页面
}
private void WebClientButton_Click(object sender, System.EventArgs e)
{
PageUrl = UrlText.Text;
WebClient wc = new WebClient();
wc.Credentials = CredentialCache.DefaultCredentials;
///方法一:
Byte[] pageData = wc.DownloadData(PageUrl);
ContentHtml.Text = Encoding.Default.GetString(pageData);
/// 方法二:
/// ***************代码开始**********
/// Stream resStream = wc.OpenRead(PageUrl);
/// StreamReader sr = new StreamReader(resStream,System.Text.Encoding.Default);
/// ContentHtml.Text = sr.ReadToEnd();
/// resStream.Close();
/// **************代码结束********
///
wc.Dispose();
}
private void WebRequestButton_Click(object sender, System.EventArgs e)
{
PageUrl = UrlText.Text;
WebRequest request = WebRequest.Create(PageUrl);
WebResponse response = request.GetResponse();
Stream resStream = response.GetResponseStream();
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);
ContentHtml.Text = sr.ReadToEnd();
resStream.Close();
sr.Close();
}
private void GetText_Click(object sender, System.EventArgs e)
{
PageUrl = UrlText.Text;
WebRequest request = WebRequest.Create(PageUrl);
WebResponse response = request.GetResponse();
Stream resStream = response.GetResponseStream();
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);
ContentHtml.Text = sr.ReadToEnd();
resStream.Close();
sr.Close();
ContentHtml.Text = Regex.Replace(ContentHtml.Text,"<[^>]*>", "");
//替换空格
ContentHtml.Text = Regex.Replace(ContentHtml.Text,"\\s+", " ");
}
#region Web 窗体设计器生成的代码
override protected void OnInit(EventArgs e)
{
//
// CODEGEN: 该调用是 ASP.NET Web 窗体设计器所必需的。
//
InitializeComponent();
base.OnInit(e);
}
/// <summary>
/// 设计器支持所需的方法 - 不要使用代码编辑器修改
/// 此方法的内容。
/// </summary>
private void InitializeComponent()
{
this.WebClientButton.Click += new System.EventHandler(this.WebClientButton_Click);
this.WebRequestButton.Click += new System.EventHandler(this.WebRequestButton_Click);
this.Load += new System.EventHandler(this.Page_Load);
}
#endregion
}
}
以下是引用片段:
这里是针对一些利用 isa server proxy 上网的.
修改下 WebRequest 方法:
PageUrl = UrlText.Text;
WebRequest request = WebRequest.Create(PageUrl);
WebProxy myProxy=new WebProxy();
myProxy = (WebProxy)request.Proxy;
myProxy.Address = new Uri("http://代理服务器:端口");
myProxy.Credentials = new NetworkCredential("用户名", "密码", "域名");
request.Proxy = myProxy;
WebResponse response = request.GetResponse();
以下是引用片段:
另一个实例 〔转〕
private void Page_Load(object sender, System.EventArgs e)
{
string Url = "/blog/upload/20051126115210282.gif";
string StringFileName = Url.Substring(Url.LastIndexOf("/") + 1);
string StringFilePath = Request.PhysicalApplicationPath;
if(!StringFilePath.EndsWith("/")) StringFilePath += "/";
MSXML2.XMLHTTP _xmlhttp = new MSXML2.XMLHTTPClass();
_xmlhttp.open("GET",Url,false,null,null);
_xmlhttp.send("");
if( _xmlhttp.readyState == 4 )
{
if(System.IO.File.Exists(StringFilePath + StringFileName))
System.IO.File.Delete(StringFilePath + StringFileName);
System.IO.FileStream fs = new System.IO.FileStream(StringFilePath + StringFileName, System.IO.FileMode.CreateNew);
System.IO.BinaryWriter w = new System.IO.BinaryWriter(fs);
w.Write((byte[])_xmlhttp.responseBody);
w.Close();
fs.Close();
Response.Write ("文件已经得到。<br><a href=’" + Request.ApplicationPath + StringFileName +"’ target=’_blank’>");
Response.Write ("查看" + StringFileName + "</a>");
}
else
Response.Write (_xmlhttp.statusText);
Response.End();
}