抓取网站页面的内容,简单的类应用,代码如下:
/// <summary> /// 获取页面内容 /// </summary> /// <param name="url">Url链接</param> /// <returns></returns> public string WebHtmlCon(string url) { try { string htmlCon = ""; WebRequest request = WebRequest.Create(url); WebResponse response = request.GetResponse(); Stream stream = response.GetResponseStream(); using (StreamReader sReader = new StreamReader(stream, Encoding.UTF8)) { htmlCon = sReader.ReadToEnd(); } return htmlCon; } catch (Exception e) { return e.Message; } } /// <summary> /// 获取页面内容 /// </summary> /// <param name="url">Url链接</param> /// <returns></returns> public string WebClientHtmlCon(string url) { try { WebClient webclient = new WebClient(); webclient.Encoding = Encoding.UTF8; string HtmlCon = webclient.DownloadString(url); return HtmlCon; } catch (Exception E) { return E.Message; } }
根据抓取的内容进行实际应用。
例如api的应用,获取页面内容的json数据,并进行分析获取自己想要的数据:
json数据分析代码(引用的.Net自带的类库应用):
/// <summary> /// JSON数据解析 返回字典类 引用:System.Web.Extensions 类库 /// </summary> /// <param name="jsonData">json数据</param> /// <returns></returns> private static Dictionary<string, object> JsonToDictionary(string jsonData) { JavaScriptSerializer jss = new JavaScriptSerializer(); return jss.Deserialize<Dictionary<string, object>>(jsonData); }
快递查询API应用:
/// <summary> /// 查询邮件的邮寄状况 /// </summary> /// <param name="con">邮寄公司</param> /// <param name="number">邮寄号</param> /// <returns></returns> public static string SelectYJ(string con, string number) { string url = "http://www.kuaidi100.com/query?type=" + con + "&postid=" + number;//查询地址 Dictionary<string, object> diclist = new Dictionary<string, object>(); diclist = JsonToDictionary(WebHtmlCon(url)); if (diclist["message"].ToString() == "ok") { string KuaiDi = "<table id="showtablecontext" style="border-collapse: collapse; 520px; border-spacing: 0; border:0;">"; KuaiDi += "<tbody>"; KuaiDi += "<tr><th width='163' style="background: #64AADB; border: 1px solid #75C2EF; color: #FFFFFF; font-size: 14px; font-weight: bold; height: 28px; line-height: 28px; text-indent: 15px;">时间</th><th width='354' style="background: #64AADB; border: 1px solid #75C2EF; color: #FFFFFF; font-size: 14px; font-weight: bold; height: 28px; line-height: 28px; text-indent: 15px;">地点和跟踪进度</th></tr>"; ArrayList list = (ArrayList)diclist["data"]; foreach (Dictionary<string, object> item in list) { KuaiDi += " <tr><td style="border: 1px solid #DDDDDD; font-size: 12px; line-height: 22px; padding: 3px 5px;">" + item["time"].ToString() + "</td><td>" + item["context"].ToString() + "</td></tr>"; } KuaiDi += "</tbody>"; KuaiDi += "</table>"; return KuaiDi; } else { return "<p style="line-height:28px;margin:0px;padding:0px;color:#F21818; font-size: 14px;">快递公司网络异常,请稍后查询.</p>"; } }
结果为: