• 自动判断网页编码


    /// <summary>
            /// 获取指定URL的HTML源代码
            /// </summary>
            /// <param name="url"></param>
            /// <param name="encoding">如果为NULL 则自动识别</param>
            /// <returns></returns>
            public static string GetWebHtml(string url, Encoding encoding)
            {
                try
                {
                    HttpWebRequest hwr = (HttpWebRequest)HttpWebRequest.Create(url);
                    HttpWebResponse res;
    
                    try
                    {
                        res = (HttpWebResponse)hwr.GetResponse();
                    }
                    catch
                    {
                        return string.Empty;
                    }
    
                    if (res.StatusCode == HttpStatusCode.OK)
                    {
                        using (Stream mystream = res.GetResponseStream())
                        {
                            //没有指定编码,
                            if (encoding == null)
                            {
                                return DecodeData(mystream, res);
                            }
                            //指定了编码
                            else
                            {
                                using (StreamReader reader = new StreamReader(mystream, encoding))
                                {
                                    return reader.ReadToEnd();
                                }
                            }
                        }
                    }
    
                    return null;
                }
                catch
                {
                    return null;
                }
            }
    
    
            private static string DecodeData(Stream responseStream, HttpWebResponse response)
            {
                string name = null;
                string text2 = response.Headers["content-type"];
                if (text2 != null)
                {
                    int index = text2.IndexOf("charset=");
                    if (index != -1)
                    {
                        name = text2.Substring(index + 8);
                    }
                }
                MemoryStream stream = new MemoryStream();
                byte[] buffer = new byte[0x400];
                for (int i = responseStream.Read(buffer, 0, buffer.Length); i > 0; i = responseStream.Read(buffer, 0, buffer.Length))
                {
                    stream.Write(buffer, 0, i);
                }
                responseStream.Close();
                if (name == null)
                {
                    MemoryStream stream3 = stream;
                    stream3.Seek((long)0, SeekOrigin.Begin);
                    string text3 = new StreamReader(stream3, Encoding.ASCII).ReadToEnd();
                    if (text3 != null)
                    {
                        int startIndex = text3.IndexOf("charset=");
                        int num4 = -1;
                        if (startIndex != -1)
                        {
                            num4 = text3.IndexOf(""", startIndex);
                            if (num4 != -1)
                            {
                                int num5 = startIndex + 8;
                                name = text3.Substring(num5, (num4 - num5) + 1).TrimEnd(new char[] { '>', '"' });
                            }
                        }
                    }
                }
                Encoding aSCII = null;
                if (name == null)
                {
                    aSCII = Encoding.GetEncoding("gb2312");
                }
                else
                {
                    try
                    {
                        if (name == "GBK")
                        {
                            name = "GB2312";
                        }
                        aSCII = Encoding.GetEncoding(name);
                    }
                    catch
                    {
                        aSCII = Encoding.GetEncoding("gb2312");
                    }
                }
                stream.Seek((long)0, SeekOrigin.Begin);
                StreamReader reader2 = new StreamReader(stream, aSCII);
                return reader2.ReadToEnd();
            }
  • 相关阅读:
    [图论分块] HDU 4858 项目管理
    pytorch 自定义权重变量初始化
    Linux系统下命令静默安装weblogic12c
    第六章 类文件结构(2)
    第六章 类文件结构(1)
    第三章 垃圾收集器与内存分配策略
    第二章(3)实战: OutOfMemoryError异常
    第二章(2)HotSpot虚拟机对象探秘
    第二章(1) Java内存区域与内存溢出异常
    第一章 走进java
  • 原文地址:https://www.cnblogs.com/ganler1988/p/3146144.html
Copyright © 2020-2023  润新知