• 【C#基础】实现URL Unicode编码,编码、解码相关整理


    1、Unicode编码  引用系统 System.Web

    using System.Web;
    string postdata = "SAMLRequest=" + HttpUtility.UrlEncode(SAMLRequest) + "&RelayState=" + RelayState;

    2、Unicode编码  自己封装的方法

         //实现URL编码
            public static string UrlEncode(string str)
            {
                StringBuilder sb = new StringBuilder();
                byte[] byStr = System.Text.Encoding.UTF8.GetBytes(str); //默认是System.Text.Encoding.Default.GetBytes(str)
                for (int i = 0; i < byStr.Length; i++)
                {
                    sb.Append(@"%" + Convert.ToString(byStr[i], 16));
                }
    
                return (sb.ToString());
            }

    3、u50FAu49AD 转换成 "中国"

         ///<summary>
            /// u50FAu49AD 转换成 "中国"
         ///u8eabu4efdu9a8cu8bc1u5931u8d25,u8bf7u6ce8u9500u5e10u53f7u540eu91cdu65b0u767bu5f55
            ///<summary>
            public static string NormalU2C(string input)
            {
                string str = "";
                char[] chArray = input.ToCharArray();
                Encoding bigEndianUnicode = Encoding.BigEndianUnicode;
                for (int i = 0; i < chArray.Length; i++)
                {
                    char ch = chArray[i];
                    if (ch.Equals('\'))
                    {
                        i++;
                        i++;
                        char[] chArray2 = new char[4];
                        int index = 0;
                        index = 0;
                        while ((index < 4) && (i < chArray.Length))
                        {
                            chArray2[index] = chArray[i];
                            index++;
                            i++;
                        }
                        if (index == 4)
                        {
                            try
                            {
                                str = str + UnicodeCode2Str(chArray2);
                            }
                            catch (Exception)
                            {
                                str = str + @"u";
                                for (int j = 0; j < index; j++)
                                {
                                    str = str + chArray2[j];
                                }
                            }
                            i--;
                        }
                        else
                        {
                            str = str + @"u";
                            for (int k = 0; k < index; k++)
                            {
                                str = str + chArray2[k];
                            }
                        }
                    }
                    else
                    {
                        str = str + ch.ToString();
                    }
                }
                return str;
            }
    
            ///<summary>
            /// UnicodeCode字节 转换成 "中国"
         /// 上面的方法引用此方法
            ///<summary>
            public static string UnicodeCode2Str(char[] u4)
            {
                if (u4.Length < 4)
                {
                    throw new Exception("It's not a unicode code array");
                }
                string str = "0123456789ABCDEF";
                char ch = char.ToUpper(u4[0]);
                char ch2 = char.ToUpper(u4[1]);
                char ch3 = char.ToUpper(u4[2]);
                char ch4 = char.ToUpper(u4[3]);
                int index = str.IndexOf(ch);
                int num2 = str.IndexOf(ch2);
                int num3 = str.IndexOf(ch3);
                int num4 = str.IndexOf(ch4);
                if (((index == -1) || (num2 == -1)) || ((num3 == -1) || (num4 == -1)))
                {
                    throw new Exception("It's not a unicode code array");
                }
                byte num5 = (byte)(((index * 0x10) + num2) & 0xff);
                byte num6 = (byte)(((num3 * 0x10) + num4) & 0xff);
                byte[] bytes = new byte[] { num5, num6 };
                return Encoding.BigEndianUnicode.GetString(bytes);
            }

    4、网页ASCII转换成Unicode

        //网页ASCII转换成Unicode
            public string HtmlEncoding(string htmltext)
            {
                string text = "";
                IHTMLDocument2 doc = new HTMLDocumentClass();
                doc.write(new object[] { htmltext });
                doc.close();
                text = doc.body.innerText;
                return text;
            }

    5、解析html的NCR编码方法

    //解析html的NCR编码方法
            public string NCRtoString(string htmltext)
            {
                string result = "";
                try
                {
                    RegexHelper.GetMatchStr(htmltext, "<body>(.*?)</body>", out htmltext);
                    htmltext = htmltext.Replace("	", "").Replace("
    ", "").Replace("
    ", "").Replace(" ", "");
                    htmltext = Regex.Replace(htmltext,"<[^>]*>","");
                    htmltext = htmltext.Replace("&#x", "\u").Replace(";", "");
                    string[] strlist = htmltext.Replace("\", "").Split('u');
                    for (int i = 1; i < strlist.Length; i++)
                    {
                        if (strlist[i].Length!=4)
                        {
                            strlist[i] = strlist[i].Substring(0,4);
                        }
                        //将unicode字符转为10进制整数,然后转为char中文字符
                        result += (char)int.Parse(strlist[i], System.Globalization.NumberStyles.HexNumber);
                    }
                }
                catch (Exception)
                {
                    return "解析html的NCR编码方法异常";
                }
                return result;
            }
     

    6、C#实现escape编码

         //C#实现escape编码
            public static string UrlEncode(string s)
            {
                StringBuilder sb = new StringBuilder();
                byte[] ba = System.Text.Encoding.Unicode.GetBytes(s);
                for (int i = 0; i < ba.Length; i += 2)
                {
                    sb.Append("%25u");
                    sb.Append(ba[i + 1].ToString("X2"));
                    sb.Append(ba[i].ToString("X2"));
                }
                return sb.ToString();
            }

    7、将Unicode编码转换为汉字字符串

            /// <summary>
    
            /// 汉字转换为Unicode编码
    
            /// </summary>
    
            /// <param name="str">要编码的汉字字符串</param>
    
            /// <returns>Unicode编码的的字符串</returns>
    
            public static string ToUnicode(string str)
            {
    
                byte[] bts = Encoding.Unicode.GetBytes(str);
    
                string r = "";
    
                for (int i = 0; i < bts.Length; i += 2) r += "\u" + bts[i + 1].ToString("x").PadLeft(2, '0') + bts[i].ToString("x").PadLeft(2, '0');
    
                return r;
    
            }
    
            /// <summary>
    
            /// 将Unicode编码转换为汉字字符串
    
            /// </summary>
    
            /// <param name="str">Unicode编码字符串</param>
    
            /// <returns>汉字字符串</returns>
    
            public static string ToGB2312(string str)
            {
    
                string r = "";
    
                MatchCollection mc = Regex.Matches(str, @"\u([w]{2})([w]{2})", RegexOptions.Compiled | RegexOptions.IgnoreCase);
    
                byte[] bts = new byte[2];
    
                foreach (Match m in mc)
                {
    
                    bts[0] = (byte)int.Parse(m.Groups[2].Value, NumberStyles.HexNumber);
    
                    bts[1] = (byte)int.Parse(m.Groups[1].Value, NumberStyles.HexNumber);
    
                    r += Encoding.Unicode.GetString(bts);
    
                }
    
                return r;
    
            }
  • 相关阅读:
    「十二省联考2019」 春节十二响
    「八省联考2018」 劈配
    斯特林数
    「POJ2505」A multiplication game [博弈论]
    [luogu2048] [bzoj2006] [NOI2010] 超级钢琴 题解
    [HNOI2002]-洛谷2234-营业额统计-Treap
    平衡树Treap模板与原理
    KMP算法讲解
    高斯消元--模板,原理
    第一篇博客!!
  • 原文地址:https://www.cnblogs.com/jhli/p/5911857.html
Copyright © 2020-2023  润新知