博客来源:http://blog.csdn.net/songylwq/article/details/7578905
Base64是什么:
Base64是网络上最常见的用于传输8Bit字节代码的编码方式之一,大家可以查看RFC2045~RFC2049,上面有MIME的详细规范。Base64编码可用于在HTTP环境下传递较长的标识信息。例如,在Java Persistence系统Hibernate中,就采用了Base64来将一个较长的唯一标识符(一般为128-bit的UUID)编码为一个字符串,用作HTTP表单和HTTP GET URL中的参数。在其他应用程序中,也常常需要把二进制数据编码为适合放在URL(包括隐藏表单域)中的形式。此时,采用Base64编码不仅比较简短,同时也具有不可读性,即所编码的数据不会被人用肉眼所直接看到
简介
标准的Base64并不适合直接放在URL里传输,因为URL编码器会把标准Base64中的“/”和“+”字符变为形如“%XX”的形式,而这些“%”号在存入数据库时还需要再进行转换,因为ANSI SQL中已将“%”号用作通配符。
为解决此问题,可采用一种用于URL的改进Base64编码,它不在末尾填充'='号,并将标准Base64中的“+”和“/”分别改成了“*”和“-”,这样就免去了在URL编解码和数据库存储时所要作的转换,避免了编码信息长度在此过程中的增加,并统一了数据库、表单等处对象标识符的格式。
另有一种用于正则表达式的改进Base64变种,它将“+”和“/”改成了“!”和“-”,因为“+”,“*”以及前面在IRCu中用到的“[”和“]”在正则表达式中都可能具有特殊含义。
此外还有一些变种,它们将“+/”改为“_-”或“._”(用作编程语言中的标识符名称)或“.-”(用于XML中的Nmtoken)甚至“_:”(用于XML中的Name)。
Base64要求把每三个8Bit的字节转换为四个6Bit的字节(3*8 = 4*6 = 24),然后把6Bit再添两位高位0,组成四个8Bit的字节,也就是说,转换后的字符串理论上将要比原来的长1/3。
规则
关于这个编码的规则:
①.把3个字符变成4个字符..
②每76个字符加一个换行符..
③.最后的结束符也要处理..
这样说会不会太抽象了?不怕,我们来看一个例子:
转换前 aaaaaabb ccccdddd eeffffff
转换后 00aaaaaa 00bbcccc 00ddddee 00ffffff
应该很清楚了吧?上面的三个字节是原文,下面的四个字节是转换后的Base64编码,其前两位均为0。
转换后,我们用一个码表来得到我们想要的字符串(也就是最终的Base64编码),这个表是这样的:(摘自RFC2045)
java代码示例:
- public final class Base64 {
- static private final int BASELENGTH = 255;
- static private final int LOOKUPLENGTH = 64;
- static private final int TWENTYFOURBITGROUP = 24;
- static private final int EIGHTBIT = 8;
- static private final int SIXTEENBIT = 16;
- static private final int SIXBIT = 6;
- static private final int FOURBYTE = 4;
- static private final int SIGN = -128;
- static private final char PAD = '=';
- static private final boolean fDebug = false;
- static final private byte[] base64Alphabet = new byte[BASELENGTH];
- static final private char[] lookUpBase64Alphabet = new char[LOOKUPLENGTH];
- static {
- for (int i = 0; i < BASELENGTH; i++) {
- base64Alphabet[i] = -1;
- }
- for (int i = 'Z'; i >= 'A'; i--) {
- base64Alphabet[i] = (byte) (i - 'A');
- }
- for (int i = 'z'; i >= 'a'; i--) {
- base64Alphabet[i] = (byte) (i - 'a' + 26);
- }
- for (int i = '9'; i >= '0'; i--) {
- base64Alphabet[i] = (byte) (i - '0' + 52);
- }
- base64Alphabet['+'] = 62;
- base64Alphabet['/'] = 63;
- for (int i = 0; i <= 25; i++)
- lookUpBase64Alphabet[i] = (char) ('A' + i);
- for (int i = 26, j = 0; i <= 51; i++, j++)
- lookUpBase64Alphabet[i] = (char) ('a' + j);
- for (int i = 52, j = 0; i <= 61; i++, j++)
- lookUpBase64Alphabet[i] = (char) ('0' + j);
- lookUpBase64Alphabet[62] = (char) '+';
- lookUpBase64Alphabet[63] = (char) '/';
- }
- protected static boolean isWhiteSpace(char octect) {
- return (octect == 0x20 || octect == 0xd || octect == 0xa || octect == 0x9);
- }
- protected static boolean isPad(char octect) {
- return (octect == PAD);
- }
- protected static boolean isData(char octect) {
- return (base64Alphabet[octect] != -1);
- }
- protected static boolean isBase64(char octect) {
- return (isWhiteSpace(octect) || isPad(octect) || isData(octect));
- }
- /**
- * Encodes hex octects into Base64
- *
- * @param binaryData
- * Array containing binaryData
- * @return Encoded Base64 array
- */
- public static String encode(byte[] binaryData) {
- if (binaryData == null)
- return null;
- int lengthDataBits = binaryData.length * EIGHTBIT;
- if (lengthDataBits == 0) {
- return "";
- }
- int fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP;
- int numberTriplets = lengthDataBits / TWENTYFOURBITGROUP;
- int numberQuartet = fewerThan24bits != 0 ? numberTriplets + 1
- : numberTriplets;
- int numberLines = (numberQuartet - 1) / 19 + 1;
- char encodedData[] = null;
- encodedData = new char[numberQuartet * 4 + numberLines];
- byte k = 0, l = 0, b1 = 0, b2 = 0, b3 = 0;
- int encodedIndex = 0;
- int dataIndex = 0;
- int i = 0;
- if (fDebug) {
- System.out.println("number of triplets = " + numberTriplets);
- }
- for (int line = 0; line < numberLines - 1; line++) {
- for (int quartet = 0; quartet < 19; quartet++) {
- b1 = binaryData[dataIndex++];
- b2 = binaryData[dataIndex++];
- b3 = binaryData[dataIndex++];
- if (fDebug) {
- System.out.println("b1= " + b1 + ", b2= " + b2 + ", b3= "
- + b3);
- }
- l = (byte) (b2 & 0x0f);
- k = (byte) (b1 & 0x03);
- byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2)
- : (byte) ((b1) >> 2 ^ 0xc0);
- byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4)
- : (byte) ((b2) >> 4 ^ 0xf0);
- byte val3 = ((b3 & SIGN) == 0) ? (byte) (b3 >> 6)
- : (byte) ((b3) >> 6 ^ 0xfc);
- if (fDebug) {
- System.out.println("val2 = " + val2);
- System.out.println("k4 = " + (k << 4));
- System.out.println("vak = " + (val2 | (k << 4)));
- }
- encodedData[encodedIndex++] = lookUpBase64Alphabet[val1];
- encodedData[encodedIndex++] = lookUpBase64Alphabet[val2
- | (k << 4)];
- encodedData[encodedIndex++] = lookUpBase64Alphabet[(l << 2)
- | val3];
- encodedData[encodedIndex++] = lookUpBase64Alphabet[b3 & 0x3f];
- i++;
- }
- encodedData[encodedIndex++] = 0xa;
- }
- for (; i < numberTriplets; i++) {
- b1 = binaryData[dataIndex++];
- b2 = binaryData[dataIndex++];
- b3 = binaryData[dataIndex++];
- if (fDebug) {
- System.out.println("b1= " + b1 + ", b2= " + b2 + ", b3= " + b3);
- }
- l = (byte) (b2 & 0x0f);
- k = (byte) (b1 & 0x03);
- byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2)
- : (byte) ((b1) >> 2 ^ 0xc0);
- byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4)
- : (byte) ((b2) >> 4 ^ 0xf0);
- byte val3 = ((b3 & SIGN) == 0) ? (byte) (b3 >> 6)
- : (byte) ((b3) >> 6 ^ 0xfc);
- if (fDebug) {
- System.out.println("val2 = " + val2);
- System.out.println("k4 = " + (k << 4));
- System.out.println("vak = " + (val2 | (k << 4)));
- }
- encodedData[encodedIndex++] = lookUpBase64Alphabet[val1];
- encodedData[encodedIndex++] = lookUpBase64Alphabet[val2 | (k << 4)];
- encodedData[encodedIndex++] = lookUpBase64Alphabet[(l << 2) | val3];
- encodedData[encodedIndex++] = lookUpBase64Alphabet[b3 & 0x3f];
- }
- // form integral number of 6-bit groups
- if (fewerThan24bits == EIGHTBIT) {
- b1 = binaryData[dataIndex];
- k = (byte) (b1 & 0x03);
- if (fDebug) {
- System.out.println("b1=" + b1);
- System.out.println("b1<<2 = " + (b1 >> 2));
- }
- byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2)
- : (byte) ((b1) >> 2 ^ 0xc0);
- encodedData[encodedIndex++] = lookUpBase64Alphabet[val1];
- encodedData[encodedIndex++] = lookUpBase64Alphabet[k << 4];
- encodedData[encodedIndex++] = PAD;
- encodedData[encodedIndex++] = PAD;
- } else if (fewerThan24bits == SIXTEENBIT) {
- b1 = binaryData[dataIndex];
- b2 = binaryData[dataIndex + 1];
- l = (byte) (b2 & 0x0f);
- k = (byte) (b1 & 0x03);
- byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2)
- : (byte) ((b1) >> 2 ^ 0xc0);
- byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4)
- : (byte) ((b2) >> 4 ^ 0xf0);
- encodedData[encodedIndex++] = lookUpBase64Alphabet[val1];
- encodedData[encodedIndex++] = lookUpBase64Alphabet[val2 | (k << 4)];
- encodedData[encodedIndex++] = lookUpBase64Alphabet[l << 2];
- encodedData[encodedIndex++] = PAD;
- }
- encodedData[encodedIndex] = 0xa;
- return new String(encodedData);
- }
- /**
- * Decodes Base64 data into octects
- *
- * @param binaryData
- * Byte array containing Base64 data
- * @return Array containind decoded data.
- */
- public static byte[] decode(String encoded) {
- if (encoded == null)
- return null;
- char[] base64Data = encoded.toCharArray();
- // remove white spaces
- int len = removeWhiteSpace(base64Data);
- if (len % FOURBYTE != 0) {
- return null;// should be divisible by four
- }
- int numberQuadruple = (len / FOURBYTE);
- if (numberQuadruple == 0)
- return new byte[0];
- byte decodedData[] = null;
- byte b1 = 0, b2 = 0, b3 = 0, b4 = 0, marker0 = 0, marker1 = 0;
- char d1 = 0, d2 = 0, d3 = 0, d4 = 0;
- int i = 0;
- int encodedIndex = 0;
- int dataIndex = 0;
- decodedData = new byte[(numberQuadruple) * 3];
- for (; i < numberQuadruple - 1; i++) {
- if (!isData((d1 = base64Data[dataIndex++]))
- || !isData((d2 = base64Data[dataIndex++]))
- || !isData((d3 = base64Data[dataIndex++]))
- || !isData((d4 = base64Data[dataIndex++])))
- return null;// if found "no data" just return null
- b1 = base64Alphabet[d1];
- b2 = base64Alphabet[d2];
- b3 = base64Alphabet[d3];
- b4 = base64Alphabet[d4];
- decodedData[encodedIndex++] = (byte) (b1 << 2 | b2 >> 4);
- decodedData[encodedIndex++] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
- decodedData[encodedIndex++] = (byte) (b3 << 6 | b4);
- }
- if (!isData((d1 = base64Data[dataIndex++]))
- || !isData((d2 = base64Data[dataIndex++]))) {
- return null;// if found "no data" just return null
- }
- b1 = base64Alphabet[d1];
- b2 = base64Alphabet[d2];
- d3 = base64Data[dataIndex++];
- d4 = base64Data[dataIndex++];
- if (!isData((d3)) || !isData((d4))) {// Check if they are PAD characters
- if (isPad(d3) && isPad(d4)) { // Two PAD e.g. 3c[Pad][Pad]
- if ((b2 & 0xf) != 0)// last 4 bits should be zero
- return null;
- byte[] tmp = new byte[i * 3 + 1];
- System.arraycopy(decodedData, 0, tmp, 0, i * 3);
- tmp[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
- return tmp;
- } else if (!isPad(d3) && isPad(d4)) { // One PAD e.g. 3cQ[Pad]
- b3 = base64Alphabet[d3];
- if ((b3 & 0x3) != 0)// last 2 bits should be zero
- return null;
- byte[] tmp = new byte[i * 3 + 2];
- System.arraycopy(decodedData, 0, tmp, 0, i * 3);
- tmp[encodedIndex++] = (byte) (b1 << 2 | b2 >> 4);
- tmp[encodedIndex] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
- return tmp;
- } else {
- return null;// an error like "3c[Pad]r", "3cdX", "3cXd", "3cXX"
- // where X is non data
- }
- } else { // No PAD e.g 3cQl
- b3 = base64Alphabet[d3];
- b4 = base64Alphabet[d4];
- decodedData[encodedIndex++] = (byte) (b1 << 2 | b2 >> 4);
- decodedData[encodedIndex++] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
- decodedData[encodedIndex++] = (byte) (b3 << 6 | b4);
- }
- return decodedData;
- }
- /**
- * remove WhiteSpace from MIME containing encoded Base64 data.
- *
- * @param data
- * the byte array of base64 data (with WS)
- * @return the new length
- */
- protected static int removeWhiteSpace(char[] data) {
- if (data == null)
- return 0;
- // count characters that's not whitespace
- int newSize = 0;
- int len = data.length;
- for (int i = 0; i < len; i++) {
- if (!isWhiteSpace(data[i]))
- data[newSize++] = data[i];
- }
- return newSize;
- }
- public static void main(String[] args) {
- System.out.println(encode("中华人民共和国".getBytes()));
- }
- }