• asp.net 抓取新闻


    前台页面:

    <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
    <html xmlns="http://www.w3.org/1999/xhtml">
    <head id="Head1" runat="server">
        <title></title>
        <script type="text/javascript" src="http://code.jquery.com/jquery-1.7.2.js"></script>
    </head>
    <body>
        <form id="form1" runat="server">
        <div>
            <asp:Panel ID="Panel1" runat="server">
                网址:<asp:TextBox ID="txtUrl" runat="server"></asp:TextBox>
                <asp:Button ID="btnGet" runat="server" Text="RSS" OnClick="btnGet_Click" />
            </asp:Panel>
        </div>
        </form>
    </body>
    </html>

    后台代码:

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Web;
    using System.Web.UI;
    using System.Web.UI.WebControls;
    using System.Text;
    using System.Net;
    using System.Text.RegularExpressions;
    
    public partial class Default7 : System.Web.UI.Page
    {
        protected void Page_Load(object sender, EventArgs e)
        {
    
        }
        protected void btnGet_Click(object sender, EventArgs e)
        {
            string strurl = txtUrl.Text.ToString(); //欲获取的网页地址 要 http://
            WebClient myWebClient = new WebClient(); //创建WebClient实例myWebClient
            //获取或设置用于对向 Internet 资源的请求进行身份验证的网络凭据。
            myWebClient.Credentials = CredentialCache.DefaultCredentials;
            //从资源下载数据并返回字节数组。(加@是因为网址中间有"/"符号)
            byte[] pagedata = myWebClient.DownloadData(@strurl);
            //string result = Encoding.Default.GetString(pagedata); //如果获取网站页面采用的是GB2312,则使用这句 
            string result = Encoding.GetEncoding("GB2312").GetString(pagedata); //如果获取网站页面采用的是UTF-8,则使用这句 
            Regex regex = new Regex("<div class="mark">([\s\S]*)</div>([\s\S]*)<div id="ajax_page">", RegexOptions.Compiled);
             Match match= regex.Match(result);
             if (match.Success)
             {
                 result = match.Groups[0].Value;
             }
            Response.Write(result); //在WEB页中显示获取的内容
            Panel1.Visible = false;
    
        }
    
        /// <summary>
        /// 返回分析数据
        /// </summary>
        /// <param name="regexString">正则</param>
        /// <param name="html">HTML</param>
        /// <param name="group">分组长度</param>
        /// <returns>数据</returns>
        //public List<string> GetData(string regexString, string html, int group)
        //{
        //    List<string> result = new List<string>();
        //    Regex regex = new Regex(regexString, RegexOptions.IgnoreCase);
        //    MatchCollection mc = regex.Matches(html);
        //    for (int count = 0; count < mc.Count; count++)
        //    {
        //        Match m = mc[count];
        //        for (int index = 0; m.Groups[index].Value != ""; index++)
        //        {
        //            string value = m.Groups[index].Value;
        //            if (count % group != 2)
        //                value = Regex.Replace(value, "&", "");
        //            if (value == "")
        //            {
        //                result.RemoveRange((result.Count / group) * group, result.Count % group);
        //                count = (count / group) * group + group - 1;
        //                break;
        //            }
        //            result.Add(value);
        //        }
        //    }
        //    return result;
        //}
    
        /// <summary>
        /// 返回分析数据
        /// </summary>
        /// <param name="regexString">正则</param>
        /// <param name="html">HTML</param>
        /// <returns>数据</returns>
        public List<string> GetData(string regexString, string html)
        {
            List<string> result = new List<string>();
            Regex regex = new Regex(regexString, RegexOptions.IgnoreCase);
            MatchCollection mc = regex.Matches(html);
            for (int count = 0; count < mc.Count; count++)
            {
                Match m = mc[count];
                for (int index = 0; m.Groups[index].Value != ""; index++)
                {
                    result.Add(m.Groups[index].Value);
                }
            }
            return result;
        }
    
    
    }
    

      

  • 相关阅读:
    Average Score39届亚洲赛牡丹江站A题
    Average Score39届亚洲赛牡丹江站A题
    Building Fire Stations 39届亚洲赛牡丹江站B题
    Leetcode 155 Min Stack 小顶堆+栈,优先队列实现 难度:0
    pycharm 使用小结
    POJ 3020 Antenna Placement 匈牙利算法,最大流解法 难度:1
    POJ 3041 Asteroids 匈牙利算法,最大流解法,行列为点 难度:1
    POJ 1094 Sorting It All Out 拓扑排序 难度:0
    POJ 2240 && ZOJ 1082 Arbitrage 最短路,c++ stl pass g++ tle 难度:0
    POJ 1125 Stockbroker Grapevine 最短路 难度:0
  • 原文地址:https://www.cnblogs.com/douqiumiao/p/3421897.html
Copyright © 2020-2023  润新知