• .NET2.0抓取网页全部链接


    效果图

    后台代码

     

    以下为引用的内容:
    using System;

    using System.Data;

    using System.Configuration;

    using System.Web;

    using System.Web.Security;

    using System.Web.UI;

    using System.Web.UI.WebControls;

    using System.Web.UI.WebControls.WebParts;

    using System.Web.UI.HtmlControls;

    using System.Text.RegularExpressions;

    using System.Net;

    using System.IO;

    using System.Collections;

    public partial class _Default : System.Web.UI.Page
    {

        protected void Page_Load(object sender, EventArgs e)

        {

            if (!IsPostBack)

            {

               

            }

           

        }


        protected void Button1_Click(object sender, EventArgs e)

        {

            TextBox2.Text = "";

            string web_url = this.TextBox1.Text;//"http://blog.csdn.net/21aspnet/"

            string all_code = "";

            HttpWebRequest all_codeRequest = (HttpWebRequest)WebRequest.Create(web_url);

            WebResponse all_codeResponse = all_codeRequest.GetResponse();

            StreamReader the_Reader = new StreamReader(all_codeResponse.GetResponseStream());

            all_code = the_Reader.ReadToEnd();

            the_Reader.Close();

            ArrayList my_list = new ArrayList();

            string p = @"http://([/w-]+/.)+[/w-]+(/[/w- ./?%&=]*)?";

            Regex re = new Regex(p, RegexOptions.IgnoreCase);

            MatchCollection mc = re.Matches(all_code);

            for (int i = 0; i <= mc.Count - 1; i++)
            {

                bool _foo = false;

                string name = mc[i].ToString();

                foreach (string list in my_list)

                {

                    if (name == list)

                    {

                        _foo = true;

                        break;

                    }


                }//
    过滤

                if (!_foo)
                {

                    TextBox2.Text += name + "/n";

                }

            }

        }

    }

    前台

     

    以下为引用的内容:
    <%@ Page Language="C#" AutoEventWireup="true"  CodeFile="Default.aspx.cs" Inherits="_Default" %>

    <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

    <html xmlns="http://www.w3.org/1999/xhtml" >
    <head runat="server">

        <title>
    抓取网页所有链接</title>
       

    </head>

    <body >

        <form id="form1" runat="server">

        <div>

            <asp:TextBox ID="TextBox1" runat="server" Width="481px"></asp:TextBox>

            <asp:Button ID="Button1" runat="server" OnClick="Button1_Click" Text="
    提取" />
            <br />

            <asp:TextBox ID="TextBox2" runat="server" Height="304px" TextMode="MultiLine" Width="524px"></asp:TextBox></div>

        </form>

    </body>

    </html>

  • 相关阅读:
    Centos6.5下搭建nagios详解
    Centos6.5下升级Python版本
    Python生成随机密码
    配置apache使用https访问
    Irrlicht 论坛好贴 精选(不断补充中...)
    [原创]一个在Irrlicht中会常用的字符串转换函数
    [转]Scrolling Credits Code
    [原创]Irrlicht中的Texture透明色(colorkey)
    [原创]IrrLicht的GUI使用
    [转](C++) How to animate and move an entity
  • 原文地址:https://www.cnblogs.com/ymyglhb/p/1263512.html
Copyright © 2020-2023  润新知