• .net 环境下get 获取页面出现乱码问题解决


    不多说了,先上代码:

    /// <summary>
            /// 获取页面内容
            /// </summary>
            /// <param name="Url">链接地址</param>
            /// <returns></returns>
            public static string GetWebContent(string Url)
            {
    
                string strResult = "", strCharacterSet="";
                try
                {
                    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
                    StreamReader streamReader;
                    //声明一个HttpWebRequest请求
                    request.Timeout = 30000;
                    //设置连接超时时间
                    request.Headers.Set("Pragma", "no-cache");
                    HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                    Stream streamReceive = response.GetResponseStream();
                    Encoding encoding;
                    string strPageCharacterSet = response.CharacterSet.ToLower();//获取页面响应中定义的编码字符串
                    encoding = Encoding.GetEncoding(strPageCharacterSet);
                    streamReader = new StreamReader(streamReceive, encoding);
                    strResult = streamReader.ReadToEnd();
                    strResult = StringHelps.RepalceStr(strResult, 0);
                    strCharacterSet = GetEncoding(strResult).ToLower();//获取页面html中声明的编码字符串
                    if (!strCharacterSet.Equals(strPageCharacterSet))//比较两者的编码格式是否一致,如果不一致,以页面中定义的编码格式再次去获取页面内容
                    {
                        strResult = GetWebContentByCharecterSet(Url, strCharacterSet);
                    }
                    if (string.IsNullOrEmpty(strResult))
                    {
                        streamReader = new StreamReader(streamReceive, encoding);
                        strResult = streamReader.ReadToEnd();
                    }
                }
                catch (Exception ex)
                {
                    LogHelper.Save("获取页面出现乱码" + Url + ex.ToString());
                }
                return strResult;
            }
            /// <summary>
            /// 指定编码格式获取页面代码
            /// </summary>
            /// <param name="Url"></param>
            /// <param name="strCharacterSet"></param>
            /// <returns></returns>
            public static string GetWebContentByCharecterSet(string Url,string strCharacterSet)
            {
    
                string strResult = "";
                try
                {
                    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
                    StreamReader streamReader;
                    //声明一个HttpWebRequest请求
                    request.Timeout = 30000;
                    //设置连接超时时间
                    request.Headers.Set("Pragma", "no-cache");
                    HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                    Stream streamReceive = response.GetResponseStream();
                    Encoding encoding;
                    encoding = Encoding.GetEncoding(strCharacterSet);
                    streamReader = new StreamReader(streamReceive, encoding);
                    strResult = streamReader.ReadToEnd();
                }
                catch (Exception ex)
                {
                    LogHelper.Save("获取页面出现异常" + Url + ex.ToString());
                }
                return strResult;
            }
            /// <summary>
            /// 根据网页的HTML内容提取网页的Encoding
            /// </summary>
            /// <param name="html"></param>
            /// <returns></returns>
            static string GetEncoding(string html)
            {
                string pattern = @"(?i)charset=(?<charset>[-a-zA-Z_0-9]+)";
                string charset = Regex.Match(html, pattern).Groups["charset"].Value;
                if (string.IsNullOrEmpty(charset))
                    charset = "utf-8";
                return charset;
            }
    
  • 相关阅读:
    一个生成随机颜色的js函数
    实战vue代码
    vue练习用免费开源api大全
    vue-resource插件使用
    专题 查找与排序的Java代码实现(一)
    十字链表的画法
    20162322 朱娅霖 作业009 堆和优先队列
    20162322 朱娅霖 作业008 二叉查找树
    20162322 朱娅霖 实验报告二 树
    20162322 朱娅霖 作业007 树
  • 原文地址:https://www.cnblogs.com/wdkshy/p/5311950.html
Copyright © 2020-2023  润新知