• 支持Cookie并开放了一些特殊设置项的HttpWebClient


      1 using System;
      2 using System.Collections.Generic;
      3 using System.Linq;
      4 using System.Text;
      5 using System.Net;
      6 using System.IO;
      7 using System.Collections.Specialized;
      8 using System.Web;
      9 
     10 namespace Common.Helpers
     11 {
     12     /// <summary>
     13     /// 网络访问辅助类
     14     /// </summary>
     15     public class HttpWebClient : WebClient
     16     {
     17         #region 公共属性
     18         /// <summary>
     19         /// 浏览器用户标识,默认采用Chrome的标识
     20         /// </summary>
     21         public string UserAgent { get; set; }
     22         /// <summary>
     23         /// Cookie容器
     24         /// </summary>
     25         public CookieContainer CookieContainer { get; set; }
     26         /// <summary>
     27         /// 如果 POST 请求需要 100-Continue 响应,则为 true;否则为 false。
     28         /// </summary>
     29         public bool Expect100Continue { get; set; }
     30 
     31         private WebResponse m_LastWebResponse = null;
     32         /// <summary>
     33         /// 最后一次的响应对象
     34         /// </summary>
     35         public WebResponse LastWebResponse { get { return this.m_LastWebResponse; } }
     36 
     37         private int m_Timeout = 120000;
     38         /// <summary>
     39         /// 超时时间,默认120000毫秒(120秒)
     40         /// </summary>
     41         public int Timeout
     42         {
     43             get { return m_Timeout; }
     44             set { m_Timeout = value; }
     45         }
     46 
     47         private HttpWebClientSetting m_HttpWebClientSetting = null;
     48         /// <summary>
     49         /// WebClient设置项,该属性始终不会为null
     50         /// </summary>
     51         public HttpWebClientSetting HttpWebClientSetting
     52         {
     53             get
     54             {
     55                 if (m_HttpWebClientSetting == null)
     56                 {
     57                     m_HttpWebClientSetting = new HttpWebClientSetting();
     58                 }
     59                 return m_HttpWebClientSetting;
     60             }
     61             set
     62             {
     63                 m_HttpWebClientSetting = value ?? new HttpWebClientSetting();
     64             }
     65         }
     66         
     67 
     68         /// <summary>
     69         /// 预处理Web请求对象的委托方法(会在每次获取WebRequest对象后调用),默认值为null
     70         /// </summary>
     71         public Action<HttpWebRequest> PrepareProcessWebRequest { get; set; }
     72         #endregion
     73 
     74         #region 构造方法
     75         public HttpWebClient()
     76             : this(new CookieContainer())
     77         {
     78         }
     79 
     80         public HttpWebClient(CookieContainer cookieContainer)
     81         {
     82             this.CookieContainer = cookieContainer;
     83             this.UserAgent = UserAgentValues.FireFox;
     84             this.Expect100Continue = false;
     85         }
     86         #endregion
     87 
     88         #region 重写方法,增加对CookieContainer的支持
     89         protected override WebRequest GetWebRequest(Uri address)
     90         {
     91             if (!string.IsNullOrEmpty(this.UserAgent))
     92             {
     93                 this.Headers.Add(HttpRequestHeader.UserAgent, this.UserAgent);
     94             }
     95 
     96             WebRequest request = base.GetWebRequest(address);
     97             request.Timeout = this.Timeout;
     98             
     99             if (request is HttpWebRequest)
    100             {
    101                 HttpWebRequest httpRequest = request as HttpWebRequest;
    102                 httpRequest.CookieContainer = this.CookieContainer;
    103                 httpRequest.ServicePoint.Expect100Continue = this.Expect100Continue; // 取消100-continue
    104 
    105                 //读取自定义设置项
    106                 if (this.HttpWebClientSetting != null)
    107                 {
    108                     httpRequest.AllowAutoRedirect = this.HttpWebClientSetting.AllowAutoRedirect;
    109                 }
    110 
    111                 //使用外部委托属性处理Request对象
    112                 if (this.PrepareProcessWebRequest != null)
    113                 {
    114                     this.PrepareProcessWebRequest(httpRequest);
    115                 }
    116             }
    117             
    118             return request;
    119         }
    120         #endregion
    121 
    122         #region 重写方法,增加对响应对象的访问
    123         protected override WebResponse GetWebResponse(WebRequest request)
    124         {
    125             WebResponse response = base.GetWebResponse(request);
    126             this.m_LastWebResponse = response;
    127             return response;
    128         }
    129         #endregion
    130 
    131         #region (public) 向一个URL用POST提交数据,并返回其响应内容 PostData
    132         /// <summary>
    133         /// 向一个URL用POST提交数据,并返回其响应内容
    134         /// ZhangQingFeng    2014-12-14    Add
    135         ///    EditLog:
    136         ///        ZhangQingFeng    2015-05-12    Edit        因WebClient的UpdateValues方法中固定为UTF-8格式进行UrlEncode,因此此处需用UploadString方式来间接实现    --见微软WebClient类源码UploadValuesInternal方法中
    137         /// </summary>
    138         /// <param name="url">请求的URL</param>
    139         /// <param name="data">要提交的数据</param>
    140         /// <param name="encoding">请求所使用的编码</param>
    141         /// <param name="responseEncoding">响应内容所使用的编码,为null时使用请求的编码</param>
    142         /// <returns>响应的内容</returns>
    143         public string PostData(string url, NameValueCollection data, Encoding encoding, Encoding responseEncoding)
    144         {
    145             WebClient client = this;
    146 
    147             /*
    148             client.Encoding = encoding ?? Encoding.UTF8;
    149 
    150             byte[] response = client.UploadValues(url, "POST", data ?? new NameValueCollection());
    151 
    152             string html = string.Empty;
    153 
    154             if (responseEncoding == null)
    155             {
    156                 html = client.Encoding.GetString(response);
    157             }
    158             else
    159             {
    160                 html = responseEncoding.GetString(response);
    161             }
    162              */
    163 
    164             client.Encoding = encoding ?? Encoding.UTF8;
    165             client.Headers.Add(HttpRequestHeader.ContentType, "application/x-www-form-urlencoded");
    166 
    167             string delimiter = String.Empty;
    168             StringBuilder values = new StringBuilder();
    169             foreach (string name in data.AllKeys)
    170             {
    171                 values.Append(delimiter);
    172                 values.Append(HttpUtility.UrlEncode(name, encoding));
    173                 values.Append("=");
    174                 values.Append(HttpUtility.UrlEncode(data[name], encoding));
    175                 delimiter = "&";
    176             }
    177 
    178             byte[] arrData = client.UploadData(url, "POST", Encoding.ASCII.GetBytes(values.ToString()));
    179             string html = (responseEncoding ?? client.Encoding).GetString(arrData);
    180 
    181             return html;
    182         }
    183 
    184         /// <summary>
    185         /// 向一个URL用POST提交数据,并返回其响应内容
    186         /// ZhangQingFeng    2014-12-14    Add
    187         /// </summary>
    188         /// <param name="url">请求的URL</param>
    189         /// <param name="data">要提交的数据</param>
    190         /// <param name="encoding">请求和响应所使用的编码</param>
    191         /// <returns>响应的内容</returns>
    192         public string PostData(string url, NameValueCollection data, Encoding encoding)
    193         {
    194             return PostData(url, data, encoding, null);
    195         }
    196 
    197         /// <summary>
    198         /// 向一个URL用POST提交数据,并返回其响应内容(使用this.Encoding来作请求编码和响应编码)
    199         /// ZhangQingFeng    2014-12-14    Add
    200         /// </summary>
    201         /// <param name="url">请求的URL</param>
    202         /// <param name="data">要提交的数据</param>
    203         /// <returns>响应的内容</returns>
    204         public string PostData(string url, NameValueCollection data)
    205         {
    206             return PostData(url, data, this.Encoding);
    207         }
    208         #endregion
    209 
    210         #region (public) 向一个URL用POST提交数据,并返回其响应内容 PostData
    211         /// <summary>
    212         /// 向一个URL用POST提交数据,并返回其响应内容
    213         /// ZhangQingFeng    2014-12-14    Add
    214         /// </summary>
    215         /// <param name="url">请求的URL</param>
    216         /// <param name="data">要提交的数据</param>
    217         /// <param name="encoding">请求和响应内容所使用的编码</param>
    218         /// <returns>响应的内容</returns>
    219         public string PostData(string url, Dictionary<string, string> data, Encoding encoding, Encoding responseEncoding)
    220         {
    221             NameValueCollection postData = new NameValueCollection();
    222             if (data != null)
    223             {
    224                 foreach (var item in data)
    225                 {
    226                     postData.Add(item.Key, item.Value);
    227                 }
    228             }
    229             return PostData(url, postData, encoding, responseEncoding);
    230         }
    231 
    232 
    233         /// <summary>
    234         /// 向一个URL用POST提交数据,并返回其响应内容
    235         /// ZhangQingFeng    2014-12-14    Add
    236         /// </summary>
    237         /// <param name="url">请求的URL</param>
    238         /// <param name="data">要提交的数据</param>
    239         /// <param name="encoding">请求和响应所使用的编码</param>
    240         /// <returns>响应的内容</returns>
    241         public string PostData(string url, Dictionary<string, string> data, Encoding encoding)
    242         {
    243             return PostData(url, data, encoding, null);
    244         }
    245 
    246         /// <summary>
    247         /// 向一个URL用POST提交数据,并返回其响应内容(使用this.Encoding来作请求编码和响应编码)
    248         /// ZhangQingFeng    2014-12-14    Add
    249         /// </summary>
    250         /// <param name="url">请求的URL</param>
    251         /// <param name="data">要提交的数据</param>
    252         /// <returns>响应的内容</returns>
    253         public string PostData(string url, Dictionary<string, string> data)
    254         {
    255             return PostData(url, data, this.Encoding);
    256         }
    257         #endregion
    258 
    259         #region 辅助类
    260         /// <summary>
    261         /// 浏览器用户标识类
    262         /// </summary>
    263         public class UserAgentValues
    264         {
    265             public static readonly string FireFox = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0";
    266             public static readonly string Chrome = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36";
    267             public static readonly string IE8 = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2;)";
    268         }
    269         #endregion
    270     }
    271 
    272     /// <summary>
    273     /// HttpWebClient对象设置类
    274     /// </summary>
    275     public class HttpWebClientSetting
    276     {
    277         private bool m_AllowAutoRedirect = true;
    278         /// <summary>
    279         /// 当响应内容为重定向时客户端是否自动重定向(如果该属性为true,则取到的响应则为重定向后的内容,否则则为响应原文),默认值为true
    280         /// </summary>
    281         public bool AllowAutoRedirect
    282         {
    283             get { return m_AllowAutoRedirect; }
    284             set { m_AllowAutoRedirect = value; }
    285         }
    286     }
    287 }
    HttpWebClient

    在做页面抓取的过程中,发现自带的WebClient不够灵活,因此做了一个实现。

    关于在PostData方法中不使用UploadValues()方法的原因:

    1.查看微软的源代码实现时发现,无论设置请求时的Encoding是否为GB2312,在使用WebClient的UploadValues()上传内容时,其内在都是使用UTF-8编码进行UrlEncode,因此传到服务端中的数据中若包含有中文时则一定会乱码,因此重写PostData以规避此问题。

    关于HttpWebClientSetting中的AllowAutoRedirect属性:

    在WebClient发起请求时,若响应内容为重定向,则WebClient会自动做重定向,因此该类提供此设置项以控制在访问时是否自动做重定向(第二次访问Refer后的网站时会将请求中的Refer头置空,将该AllowAutoRedirect设置为false,然后手动从Response.Header中取出Location对象地址,设置Refer后再访问,则可真实模拟浏览器访问,从而避开一些网站的防抓取设置)

    关于HttpWebClient中的LastWebResponse属性:

    当存在多次重定向时,系统记录了最后一次返回的内容,从此内容的Header中取出ResponseUri,则可以取到最后返回响应的页面真实地址,从而为下一次的设置请求Refer头作准备。

    大约就是如此,后期如有Bug会继续更新。

  • 相关阅读:
    使用SSH密钥方式登录ubuntu Linux,指令(ssh-keygen 和 ssh-copy-id)
    显示、更改ubuntu linux主机名(计算机名)
    如何在VMware ubuntu linux虚拟机中安装VMware tools
    VMware bridge 桥接方式连接internet
    在STEP7 TIA PORTAL中,设置模块的地址和设备名(Device name)
    查看linux网卡硬件名称
    Ubuntu linux安装putty
    Linux修改文件permission可执行属性
    在windows中,使用SSH登录VMware ubuntu linux虚拟机
    单选,复选操作div,显示隐藏
  • 原文地址:https://www.cnblogs.com/feng8621/p/4905996.html
Copyright © 2020-2023  润新知