• C# asp.net 抓取需要登录的网页内容 抓取asp.net登录验证的网站


     private void btnASPNET_Click(object sender, EventArgs e)
            {
                Dictionary<string, string> postParams = new Dictionary<string, string>();
                postParams.Add("txtLoginId", "www.cnuunet.com");
                postParams.Add("txtPassword", "www.cnuunet.com");
                postParams.Add("btnLogin", "Sign in");
                textBox1.Text = GetAspNetCodeResponseDataFromWebSite(postParams, "http://www.cnuunet.com/login.aspx", "http://www.cnuunet.com/ProductList.aspx");
            }

            /// <summary>
            /// ASP.net页面登录方式 通过post密码方式读取网页内容
            /// 在本页面(login.aspx)的.cs文件内验证用户名和密码。
            /// Asp.net验证需要记住本次页面加载的__VIEWSTATE和__EVENTVALIDATION信息,并且需要button按钮的ID和Text信息
            /// </summary>
            /// <param name="postParams">用户名(用户名文本框的ID和内容)、密码(密码文本框的ID和内容)、summit按钮(button按钮的ID和Text)</param>
            /// <param name="getViewStateAndEventValidationLoginUrl">需要验证登陆信息的url</param>
            /// <param name="getDataUrl">需要抓取数据的网页url</param>
            /// <returns>抓取页面返回的html信息</returns>
            private string GetAspNetCodeResponseDataFromWebSite(Dictionary<string, string> postParams, string getViewStateAndEventValidationLoginUrl, string getDataUrl)
            {
                if (postParams == null || postParams.Keys.Count!=3)
                {
                    string errorMessage = "参数中需要包含如下3个信息,缺一不可。用户名(用户名文本框的ID和内容)、密码(密码文本框的ID和内容)、summit按钮(button按钮的ID和Text)";
                    MessageBox.Show(errorMessage);
                    return errorMessage ;
                }

                try
                {
                    CookieContainer cookieContainer = new CookieContainer();

                    ///////////////////////////////////////////////////
                    // 1.打开 MyLogin.aspx 页面,获得 GetVeiwState & EventValidation
                    ///////////////////////////////////////////////////                
                    // 设置打开页面的参数
                    HttpWebRequest request = WebRequest.Create(getViewStateAndEventValidationLoginUrl) as HttpWebRequest;
                    request.Method = "GET";
                    request.KeepAlive = false;

                    // 接收返回的页面
                    HttpWebResponse response = request.GetResponse() as HttpWebResponse;
                    System.IO.Stream responseStream = response.GetResponseStream();
                    System.IO.StreamReader reader = new System.IO.StreamReader(responseStream, Encoding.UTF8);
                    string srcString = reader.ReadToEnd();

                    // 获取页面的 VeiwState,分析返回的页面,解析出__VIEWSTATE的值          
                    string viewStateFlag = "id="__VIEWSTATE" value="";
                    int i = srcString.IndexOf(viewStateFlag) + viewStateFlag.Length;
                    int j = srcString.IndexOf(""", i);
                    string viewState = srcString.Substring(i, j - i);

                    // 获取页面的 EventValidation,分析返回的页面,解析出__VIEWSTATE的值                   
                    string eventValidationFlag = "id="__EVENTVALIDATION" value="";
                    i = srcString.IndexOf(eventValidationFlag) + eventValidationFlag.Length;
                    j = srcString.IndexOf(""", i);
                    string eventValidation = srcString.Substring(i, j - i);

                    ///////////////////////////////////////////////////
                    // 2.自动填充并提交 Login.aspx 页面,提交Login.aspx页面,来保存Cookie
                    ///////////////////////////////////////////////////

                    // 将文本转换成 URL 编码字符串
                    viewState = System.Web.HttpUtility.UrlEncode(viewState);
                    eventValidation = System.Web.HttpUtility.UrlEncode(eventValidation);
                    
                    // 要提交的字符串数据。格式形如:user=uesr1&password=123
                    string postString = "";
                    foreach (KeyValuePair<string, string> de in postParams)
                    {
                        //把提交按钮中的中文字符转换成url格式,以防中文或空格等信息
                        postString += System.Web.HttpUtility.UrlEncode(de.Key.ToString()) + "=" + System.Web.HttpUtility.UrlEncode(de.Value.ToString()) + "&";
                    }
                    postString += string.Format("__VIEWSTATE={0}&__EVENTVALIDATION={1}", viewState, eventValidation);
                    
                    // 将提交的字符串数据转换成字节数组
                    byte[] postData = Encoding.ASCII.GetBytes(postString);

                    // 设置提交的相关参数
                    request = WebRequest.Create(getViewStateAndEventValidationLoginUrl) as HttpWebRequest;
                    request.Method = "POST";
                    request.KeepAlive = false;
                    request.ContentType = "application/x-www-form-urlencoded";
                    request.CookieContainer = cookieContainer;
                    request.ContentLength = postData.Length;

                    // 提交请求数据
                    System.IO.Stream outputStream = request.GetRequestStream();
                    outputStream.Write(postData, 0, postData.Length);
                    outputStream.Close();

                    // 接收返回的页面
                    response = request.GetResponse() as HttpWebResponse;
                    responseStream = response.GetResponseStream();
                    reader = new System.IO.StreamReader(responseStream, Encoding.GetEncoding("GB2312"));
                    srcString = reader.ReadToEnd();

                    ///////////////////////////////////////////////////
                    // 3.打开需要抓取数据的页面
                    ///////////////////////////////////////////////////
                    // 设置打开页面的参数
                    request = WebRequest.Create(getDataUrl) as HttpWebRequest;
                    request.Method = "GET";
                    request.KeepAlive = false;
                    request.CookieContainer = cookieContainer;

                    // 接收返回的页面
                    response = request.GetResponse() as HttpWebResponse;
                    responseStream = response.GetResponseStream();
                    reader = new System.IO.StreamReader(responseStream, Encoding.UTF8);
                    srcString = reader.ReadToEnd();
                    return srcString;
                    ///////////////////////////////////////////////////
                    // 4.分析返回的页面
                    ///////////////////////////////////////////////////
                    // ...... ......
                }
                catch (WebException we)
                {
                    string msg = we.Message;
                    return msg;
                }  
            }
        

  • 相关阅读:
    PHP把数组按指定的个数分隔
    主题模型(LDA)(一)--通俗理解与简单应用
    用户活跃度下降40%!七问新浪微博
    天才罗素:知识面横跨哲学 数学和文学 最懂的却是女人
    金刚经---现代解读
    离散数学
    交易已无秘密 一个期货高手的终极感悟
    一位资深交易员的投资感悟(建议收藏)
    F1 score,micro F1score,macro F1score 的定义
    以前曾看到过一个期货童话故事,很有意思,发上来
  • 原文地址:https://www.cnblogs.com/xpvincent/p/3273503.html
Copyright © 2020-2023  润新知