使用的是WebRequest类,在这以http://novel.hongxiu.com/a/1036665/10425842.html为例。
代码如下:
using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.IO; using System.Net; using System.Text; using System.Text.RegularExpressions; using System.Windows.Forms; namespace 网页抓取 { public partial class Form1 : Form { public Form1() { InitializeComponent(); } public void zhuaqu() { WebRequest request = WebRequest.Create(label1.Text);//发出请求 WebResponse response = request.GetResponse();//Internet请求的响应 StreamReader sr = new StreamReader(response.GetResponseStream(), Encoding.UTF8);//按编码方式读取Internet返回的数据流 string html = sr.ReadToEnd(); string th = thtxt(html);//使用正则表达式替换html源代码中的标签为空格 sr.Close(); int sindex = th.IndexOf("红|袖|言|情|小|说");//查索引 int lindex = th.IndexOf("但是什么?"); string subtxt = th.Substring(sindex,lindex-sindex+6);//截取想要的内容 StreamWriter sw = new StreamWriter("E:\x1.txt");//写入流保存 sw.WriteLine(subtxt); sw.Close(); richTextBox1.Text = subtxt; } private void button1_Click(object sender, EventArgs e) { zhuaqu(); } private string thtxt(string Html) { Regex reg = new Regex("<(.| )+?>"); //Regex r = new Regex(@"s+");//把空格替换掉的正则表达式 string th = reg.Replace(Html, ""); th = th.Replace("<", "<"); th = th.Replace(">", ""); //th = r.Replace(th,""); return th; } } }
运行效果