• Android url中文编码问题


    最近项目遇见一个很奇葩问题,关于URL问题,项目中加载图片,图片的URL含有中文,但是,我的手机可以加载,没问题,同事也都可以,但是测试手机却不可以,加载失败,找到问题,就是URL含有中文问题。

    解决方案:

    把中文字符encode即可:

    方法1:

     public static String encodeUrl(String url) {
            return Uri.encode(url, "-![.:/,%?&=]");
     }
    

      

    方法2:

     public static String toUtf8String(String s) {
            StringBuffer sb = new StringBuffer();
            for (int i = 0; i < s.length(); i++) {
                char c = s.charAt(i);
                if (c >= 0 && c <= 255) {
                    sb.append(c);
                } else {
                    byte[] b;
                    try {
                        b = String.valueOf(c).getBytes("utf-8");
                    } catch (Exception ex) {
                        System.out.println(ex);
                        b = new byte[0];
                    }
                    for (int j = 0; j < b.length; j++) {
                        int k = b[j];
                        if (k < 0)
                            k += 256;
                        sb.append("%" + Integer.toHexString(k).toUpperCase());
                    }
                }
            }
            return sb.toString();
        }
    

      

    或者

    import java.io.CharArrayWriter;
    import java.io.UnsupportedEncodingException;
    import java.net.URLDecoder;
    import java.nio.charset.Charset;
    import java.nio.charset.IllegalCharsetNameException;
    import java.nio.charset.UnsupportedCharsetException;
    import java.util.BitSet;
    
    public class URLEncoderURI {
    
        static BitSet dontNeedEncoding;
        static final int caseDiff = ('a' - 'A');
    
    
        static {
    
            /*
             * The list of characters that are not encoded has been determined as
             * follows:
             * 
             * RFC 2396 states: ----- Data characters that are allowed in a URI but
             * do not have a reserved purpose are called unreserved. These include
             * upper and lower case letters, decimal digits, and a limited set of
             * punctuation marks and symbols.
             * 
             * unreserved = alphanum | mark
             * 
             * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
             * 
             * Unreserved characters can be escaped without changing the semantics
             * of the URI, but this should not be done unless the URI is being used
             * in a context that does not allow the unescaped character to appear.
             * -----
             * 
             * It appears that both Netscape and Internet Explorer escape all
             * special characters from this list with the exception of "-", "_",
             * ".", "*". While it is not clear why they are escaping the other
             * characters, perhaps it is safest to assume that there might be
             * contexts in which the others are unsafe if not escaped. Therefore, we
             * will use the same list. It is also noteworthy that this is consistent
             * with O'Reilly's "HTML: The Definitive Guide" (page 164).
             * 
             * As a last note, Intenet Explorer does not encode the "@" character
             * which is clearly not unreserved according to the RFC. We are being
             * consistent with the RFC in this matter, as is Netscape.
             */
    
            dontNeedEncoding = new BitSet(256);
            int i;
            for (i = 'a'; i <= 'z'; i++) {
                dontNeedEncoding.set(i);
            }
            for (i = 'A'; i <= 'Z'; i++) {
                dontNeedEncoding.set(i);
            }
            for (i = '0'; i <= '9'; i++) {
                dontNeedEncoding.set(i);
            }
            dontNeedEncoding.set(' '); /*
                                         * encoding a space to a + is done in the
                                         * encode() method
                                         */
            dontNeedEncoding.set('-');
            dontNeedEncoding.set('_');
            dontNeedEncoding.set('.');
            dontNeedEncoding.set('*');
            dontNeedEncoding.set(':');
            dontNeedEncoding.set('/');
            dontNeedEncoding.set('?');
            dontNeedEncoding.set(';');
            dontNeedEncoding.set('&');
            dontNeedEncoding.set('=');
    
        }
    
        /**
         * You can't call the constructor.
         */
        private URLEncoderURI() {
        }
    
    
    
        /**
         * Translates a string into <code>application/x-www-form-urlencoded</code>
         * format using a specific encoding scheme. This method uses the supplied
         * encoding scheme to obtain the bytes for unsafe characters.
         * <p>
         * <em><strong>Note:</strong> The <a href=
         * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
         * World Wide Web Consortium Recommendation</a> states that
         * UTF-8 should be used. Not doing so may introduce
         * incompatibilites.</em>
         * 
         * @param s
         *            <code>String</code> to be translated.
         * @param enc
         *            The name of a supported <a
         *            href="../lang/package-summary.html#charenc">character
         *            encoding</a>.
         * @return the translated <code>String</code>.
         * @exception UnsupportedEncodingException
         *                If the named encoding is not supported
         * @see URLDecoder#decode(java.lang.String, java.lang.String)
         * @since 1.4
         */
        public static String encode(String s, String enc) throws UnsupportedEncodingException {
    
            boolean needToChange = false;
            StringBuffer out = new StringBuffer(s.length());
            Charset charset;
            CharArrayWriter charArrayWriter = new CharArrayWriter();
    
            if (enc == null)
                throw new NullPointerException("charsetName");
    
            try {
                charset = Charset.forName(enc);
            } catch (IllegalCharsetNameException e) {
                throw new UnsupportedEncodingException(enc);
            } catch (UnsupportedCharsetException e) {
                throw new UnsupportedEncodingException(enc);
            }
    
            for (int i = 0; i < s.length();) {
                int c = (int) s.charAt(i);
                // System.out.println("Examining character: " + c);
                if (dontNeedEncoding.get(c)) {
                    if (c == ' ') {
                        c = '+';
                        needToChange = true;
                    }
                    // System.out.println("Storing: " + c);
                    out.append((char) c);
                    i++;
                } else {
                    // convert to external encoding before hex conversion
                    do {
                        charArrayWriter.write(c);
                        /*
                         * If this character represents the start of a Unicode
                         * surrogate pair, then pass in two characters. It's not
                         * clear what should be done if a bytes reserved in the
                         * surrogate pairs range occurs outside of a legal surrogate
                         * pair. For now, just treat it as if it were any other
                         * character.
                         */
                        if (c >= 0xD800 && c <= 0xDBFF) {
                            /*
                             * System.out.println(Integer.toHexString(c) +
                             * " is high surrogate");
                             */
                            if ((i + 1) < s.length()) {
                                int d = (int) s.charAt(i + 1);
                                /*
                                 * System.out.println("	Examining " +
                                 * Integer.toHexString(d));
                                 */
                                if (d >= 0xDC00 && d <= 0xDFFF) {
                                    /*
                                     * System.out.println("	" +
                                     * Integer.toHexString(d) +
                                     * " is low surrogate");
                                     */
                                    charArrayWriter.write(d);
                                    i++;
                                }
                            }
                        }
                        i++;
                    } while (i < s.length() && !dontNeedEncoding.get((c = (int) s.charAt(i))));
    
                    charArrayWriter.flush();
                    String str = new String(charArrayWriter.toCharArray());
                    byte[] ba = str.getBytes(charset);
                    for (int j = 0; j < ba.length; j++) {
                        out.append('%');
                        char ch = Character.forDigit((ba[j] >> 4) & 0xF, 16);
                        // converting to use uppercase letter as part of
                        // the hex value if ch is a letter.
                        if (Character.isLetter(ch)) {
                            ch -= caseDiff;
                        }
                        out.append(ch);
                        ch = Character.forDigit(ba[j] & 0xF, 16);
                        if (Character.isLetter(ch)) {
                            ch -= caseDiff;
                        }
                        out.append(ch);
                    }
                    charArrayWriter.reset();
                    needToChange = true;
                }
            }
    
            return (needToChange ? out.toString() : s);
        }
    }
    

      

    参考:

    文/SIMPLE孙鹏(简书作者)
    原文链接:http://www.jianshu.com/p/9be694c8fee2
    著作权归作者所有,转载请联系作者获得授权,并标注“简书作者”。
  • 相关阅读:
    python爬虫第二天
    sqlite3 数据库创建表
    python 中的nonlocal
    python中 random.seed()函数
    每日一题6/5
    竞赛191
    二进制操作, ~按位取反, | 或, & 与, ^异或, >倍数
    竞赛190
    css BFC
    css动画 Vs js动画
  • 原文地址:https://www.cnblogs.com/spring87/p/5322856.html
Copyright © 2020-2023  润新知