• C# 页面抓取类


    抓取网站页面的内容,简单的类应用,代码如下:

     /// <summary>
            /// 获取页面内容
            /// </summary>
            /// <param name="url">Url链接</param>
            /// <returns></returns>
            public string WebHtmlCon(string url)
            {
                try
                {
                    string htmlCon = "";
                    WebRequest request = WebRequest.Create(url);
                    WebResponse response = request.GetResponse();
                    Stream stream = response.GetResponseStream();
                    using (StreamReader sReader = new StreamReader(stream, Encoding.UTF8))
                    {
                        htmlCon = sReader.ReadToEnd();
                    }
                    return htmlCon;
                }
                catch (Exception e)
                {
                    return e.Message;
                } 
            }
            /// <summary>
            /// 获取页面内容
            /// </summary>
            /// <param name="url">Url链接</param>
            /// <returns></returns>
            public string WebClientHtmlCon(string url)
            {
                try
                {
                    WebClient webclient = new WebClient();
                    webclient.Encoding = Encoding.UTF8;
                    string HtmlCon = webclient.DownloadString(url);
                    return HtmlCon;
                }
                catch (Exception E)
                {
                    return E.Message;
                }
            }

    根据抓取的内容进行实际应用。

    例如api的应用,获取页面内容的json数据,并进行分析获取自己想要的数据:

    json数据分析代码(引用的.Net自带的类库应用):

             /// <summary>
            /// JSON数据解析 返回字典类 引用:System.Web.Extensions 类库
            /// </summary>
            /// <param name="jsonData">json数据</param>
            /// <returns></returns>
            private static Dictionary<string, object> JsonToDictionary(string jsonData)
            {
                JavaScriptSerializer jss = new JavaScriptSerializer();
                return jss.Deserialize<Dictionary<string, object>>(jsonData);
            }

    快递查询API应用:

      /// <summary>
            /// 查询邮件的邮寄状况
            /// </summary>
            /// <param name="con">邮寄公司</param>
            /// <param name="number">邮寄号</param>
            /// <returns></returns>
            public static string SelectYJ(string con, string number)
            {
                string url = "http://www.kuaidi100.com/query?type=" + con + "&postid=" + number;//查询地址
    
                Dictionary<string, object> diclist = new Dictionary<string, object>();
                diclist = JsonToDictionary(WebHtmlCon(url));
    
    
                if (diclist["message"].ToString() == "ok")
                {
                    string KuaiDi = "<table id="showtablecontext" style="border-collapse: collapse; 520px; border-spacing: 0; border:0;">";
                    KuaiDi += "<tbody>";
                    KuaiDi += "<tr><th width='163' style="background: #64AADB; border: 1px solid #75C2EF; color: #FFFFFF; font-size: 14px; font-weight: bold; height: 28px; line-height: 28px; text-indent: 15px;">时间</th><th width='354' style="background: #64AADB; border: 1px solid #75C2EF; color: #FFFFFF; font-size: 14px; font-weight: bold; height: 28px; line-height: 28px; text-indent: 15px;">地点和跟踪进度</th></tr>";
                    ArrayList list = (ArrayList)diclist["data"];
    
                    foreach (Dictionary<string, object> item in list)
                    {
    
                        KuaiDi += " <tr><td style="border: 1px solid #DDDDDD; font-size: 12px; line-height: 22px; padding: 3px 5px;">" + item["time"].ToString() + "</td><td>" + item["context"].ToString() + "</td></tr>";
    
                    }
                    KuaiDi += "</tbody>";
                    KuaiDi += "</table>";
                    return KuaiDi;
                }
                else
                {
                    return "<p style="line-height:28px;margin:0px;padding:0px;color:#F21818; font-size: 14px;">快递公司网络异常,请稍后查询.</p>";
                }
            }

    结果为:

  • 相关阅读:
    (三)3-5 Python生成式和生成器
    (三)3-4 Python的高阶函数和匿名函数
    Linux下安装Python3
    Python math函数库
    今日头条as,cp,_signature参数破解
    使用scrapy实现分布式爬虫
    scrapy框架持久化存储
    Python中使用rsa加密
    使用Python进行微博登录
    WebDriverWait 显示等待
  • 原文地址:https://www.cnblogs.com/xiao-bei/p/4422432.html
Copyright © 2020-2023  润新知