• js编码解码 punyCode


      1 ; (function (w) {
      2 
      3     function IdnMapping() {
      4 
      5 
      6         /** Highest positive signed 32-bit float value */
      7         var maxInt = 2147483647, // aka. 0x7FFFFFFF or 2^31-1
      8 
      9             /** Bootstring parameters */
     10             base = 36,
     11             tMin = 1,
     12             tMax = 26,
     13             skew = 38,
     14             damp = 700,
     15             initialBias = 72,
     16             initialN = 128, // 0x80
     17             delimiter = '-', // 'x2D'
     18 
     19             /** Regular expressions */
     20             regexPunycode = /^xn--/,
     21             regexNonASCII = /[^x20-x7E]/, // unprintable ASCII chars + non-ASCII chars
     22             regexSeparators = /[x2Eu3002uFF0EuFF61]/g, // RFC 3490 separators
     23 
     24             /** Error messages */
     25             errors = {
     26                 'overflow': 'Overflow: input needs wider integers to process',
     27                 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
     28                 'invalid-input': 'Invalid input'
     29             },
     30 
     31             /** Convenience shortcuts */
     32             baseMinusTMin = base - tMin,
     33             floor = Math.floor,
     34             stringFromCharCode = String.fromCharCode,
     35 
     36             /** Temporary variable */
     37             key;
     38 
     39         /*--------------------------------------------------------------------------*/
     40 
     41         /**
     42          * A generic error utility function.
     43          * @private
     44          * @param {String} type The error type.
     45          * @returns {Error} Throws a `RangeError` with the applicable error message.
     46          */
     47         function error(type) {
     48             throw RangeError(errors[type]);
     49         }
     50 
     51         /**
     52          * A generic `Array#map` utility function.
     53          * @private
     54          * @param {Array} array The array to iterate over.
     55          * @param {Function} callback The function that gets called for every array
     56          * item.
     57          * @returns {Array} A new array of values returned by the callback function.
     58          */
     59         function map(array, fn) {
     60             var length = array.length;
     61             var result = [];
     62             while (length--) {
     63                 result[length] = fn(array[length]);
     64             }
     65             return result;
     66         }
     67 
     68         /**
     69          * A simple `Array#map`-like wrapper to work with domain name strings or email
     70          * addresses.
     71          * @private
     72          * @param {String} domain The domain name or email address.
     73          * @param {Function} callback The function that gets called for every
     74          * character.
     75          * @returns {Array} A new string of characters returned by the callback
     76          * function.
     77          */
     78         function mapDomain(string, fn) {
     79             var parts = string.split('@');
     80             var result = '';
     81             if (parts.length > 1) {
     82                 // In email addresses, only the domain name should be punycoded. Leave
     83                 // the local part (i.e. everything up to `@`) intact.
     84                 result = parts[0] + '@';
     85                 string = parts[1];
     86             }
     87             // Avoid `split(regex)` for IE8 compatibility. See #17.
     88             string = string.replace(regexSeparators, 'x2E');
     89             var labels = string.split('.');
     90             var encoded = map(labels, fn).join('.');
     91             return result + encoded;
     92         }
     93 
     94         /**
     95          * Creates an array containing the numeric code points of each Unicode
     96          * character in the string. While JavaScript uses UCS-2 internally,
     97          * this function will convert a pair of surrogate halves (each of which
     98          * UCS-2 exposes as separate characters) into a single code point,
     99          * matching UTF-16.
    100          * @see `punycode.ucs2.encode`
    101          * @memberOf punycode.ucs2
    102          * @name decode
    103          * @param {String} string The Unicode input string (UCS-2).
    104          * @returns {Array} The new array of code points.
    105          */
    106         function ucs2decode(string) {
    107             var output = [],
    108                 counter = 0,
    109                 length = string.length,
    110                 value,
    111                 extra;
    112             while (counter < length) {
    113                 value = string.charCodeAt(counter++);
    114                 if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
    115                     // high surrogate, and there is a next character
    116                     extra = string.charCodeAt(counter++);
    117                     if ((extra & 0xFC00) == 0xDC00) { // low surrogate
    118                         output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
    119                     } else {
    120                         // unmatched surrogate; only append this code unit, in case the next
    121                         // code unit is the high surrogate of a surrogate pair
    122                         output.push(value);
    123                         counter--;
    124                     }
    125                 } else {
    126                     output.push(value);
    127                 }
    128             }
    129             return output;
    130         }
    131 
    132         /**
    133          * Creates a string based on an array of numeric code points.
    134          * @see `punycode.ucs2.decode`
    135          * @memberOf punycode.ucs2
    136          * @name encode
    137          * @param {Array} codePoints The array of numeric code points.
    138          * @returns {String} The new Unicode string (UCS-2).
    139          */
    140         function ucs2encode(array) {
    141             return map(array,
    142                 function (value) {
    143                     var output = '';
    144                     if (value > 0xFFFF) {
    145                         value -= 0x10000;
    146                         output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800);
    147                         value = 0xDC00 | value & 0x3FF;
    148                     }
    149                     output += stringFromCharCode(value);
    150                     return output;
    151                 }).join('');
    152         }
    153 
    154         /**
    155          * Converts a basic code point into a digit/integer.
    156          * @see `digitToBasic()`
    157          * @private
    158          * @param {Number} codePoint The basic numeric code point value.
    159          * @returns {Number} The numeric value of a basic code point (for use in
    160          * representing integers) in the range `0` to `base - 1`, or `base` if
    161          * the code point does not represent a value.
    162          */
    163         function basicToDigit(codePoint) {
    164             if (codePoint - 48 < 10) {
    165                 return codePoint - 22;
    166             }
    167             if (codePoint - 65 < 26) {
    168                 return codePoint - 65;
    169             }
    170             if (codePoint - 97 < 26) {
    171                 return codePoint - 97;
    172             }
    173             return base;
    174         }
    175 
    176         /**
    177          * Converts a digit/integer into a basic code point.
    178          * @see `basicToDigit()`
    179          * @private
    180          * @param {Number} digit The numeric value of a basic code point.
    181          * @returns {Number} The basic code point whose value (when used for
    182          * representing integers) is `digit`, which needs to be in the range
    183          * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
    184          * used; else, the lowercase form is used. The behavior is undefined
    185          * if `flag` is non-zero and `digit` has no uppercase form.
    186          */
    187         function digitToBasic(digit, flag) {
    188             //  0..25 map to ASCII a..z or A..Z
    189             // 26..35 map to ASCII 0..9
    190             return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);
    191         }
    192 
    193         /**
    194          * Bias adaptation function as per section 3.4 of RFC 3492.
    195          * http://tools.ietf.org/html/rfc3492#section-3.4
    196          * @private
    197          */
    198         function adapt(delta, numPoints, firstTime) {
    199             var k = 0;
    200             delta = firstTime ? floor(delta / damp) : delta >> 1;
    201             delta += floor(delta / numPoints);
    202             for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) {
    203                 delta = floor(delta / baseMinusTMin);
    204             }
    205             return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
    206         }
    207 
    208         /**
    209          * Converts a Punycode string of ASCII-only symbols to a string of Unicode
    210          * symbols.
    211          * @memberOf punycode
    212          * @param {String} input The Punycode string of ASCII-only symbols.
    213          * @returns {String} The resulting string of Unicode symbols.
    214          */
    215         function decode(input) {
    216             // Don't use UCS-2
    217             var output = [],
    218                 inputLength = input.length,
    219                 out,
    220                 i = 0,
    221                 n = initialN,
    222                 bias = initialBias,
    223                 basic,
    224                 j,
    225                 index,
    226                 oldi,
    227                 w,
    228                 k,
    229                 digit,
    230                 t,
    231                 /** Cached calculation results */
    232                 baseMinusT;
    233 
    234             // Handle the basic code points: let `basic` be the number of input code
    235             // points before the last delimiter, or `0` if there is none, then copy
    236             // the first basic code points to the output.
    237 
    238             basic = input.lastIndexOf(delimiter);
    239             if (basic < 0) {
    240                 basic = 0;
    241             }
    242 
    243             for (j = 0; j < basic; ++j) {
    244                 // if it's not a basic code point
    245                 if (input.charCodeAt(j) >= 0x80) {
    246                     error('not-basic');
    247                 }
    248                 output.push(input.charCodeAt(j));
    249             }
    250 
    251             // Main decoding loop: start just after the last delimiter if any basic code
    252             // points were copied; start at the beginning otherwise.
    253 
    254             for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {
    255 
    256                 // `index` is the index of the next character to be consumed.
    257                 // Decode a generalized variable-length integer into `delta`,
    258                 // which gets added to `i`. The overflow checking is easier
    259                 // if we increase `i` as we go, then subtract off its starting
    260                 // value at the end to obtain `delta`.
    261                 for (oldi = i, w = 1, k = base; /* no condition */; k += base) {
    262 
    263                     if (index >= inputLength) {
    264                         error('invalid-input');
    265                     }
    266 
    267                     digit = basicToDigit(input.charCodeAt(index++));
    268 
    269                     if (digit >= base || digit > floor((maxInt - i) / w)) {
    270                         error('overflow');
    271                     }
    272 
    273                     i += digit * w;
    274                     t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
    275 
    276                     if (digit < t) {
    277                         break;
    278                     }
    279 
    280                     baseMinusT = base - t;
    281                     if (w > floor(maxInt / baseMinusT)) {
    282                         error('overflow');
    283                     }
    284 
    285                     w *= baseMinusT;
    286 
    287                 }
    288 
    289                 out = output.length + 1;
    290                 bias = adapt(i - oldi, out, oldi == 0);
    291 
    292                 // `i` was supposed to wrap around from `out` to `0`,
    293                 // incrementing `n` each time, so we'll fix that now:
    294                 if (floor(i / out) > maxInt - n) {
    295                     error('overflow');
    296                 }
    297 
    298                 n += floor(i / out);
    299                 i %= out;
    300 
    301                 // Insert `n` at position `i` of the output
    302                 output.splice(i++, 0, n);
    303 
    304             }
    305 
    306             return ucs2encode(output);
    307         }
    308 
    309         /**
    310          * Converts a string of Unicode symbols (e.g. a domain name label) to a
    311          * Punycode string of ASCII-only symbols.
    312          * @memberOf punycode
    313          * @param {String} input The string of Unicode symbols.
    314          * @returns {String} The resulting Punycode string of ASCII-only symbols.
    315          */
    316         function encode(input) {
    317             var n,
    318                 delta,
    319                 handledCPCount,
    320                 basicLength,
    321                 bias,
    322                 j,
    323                 m,
    324                 q,
    325                 k,
    326                 t,
    327                 currentValue,
    328                 output = [],
    329                 /** `inputLength` will hold the number of code points in `input`. */
    330                 inputLength,
    331                 /** Cached calculation results */
    332                 handledCPCountPlusOne,
    333                 baseMinusT,
    334                 qMinusT;
    335 
    336             // Convert the input in UCS-2 to Unicode
    337             input = ucs2decode(input);
    338 
    339             // Cache the length
    340             inputLength = input.length;
    341 
    342             // Initialize the state
    343             n = initialN;
    344             delta = 0;
    345             bias = initialBias;
    346 
    347             // Handle the basic code points
    348             for (j = 0; j < inputLength; ++j) {
    349                 currentValue = input[j];
    350                 if (currentValue < 0x80) {
    351                     output.push(stringFromCharCode(currentValue));
    352                 }
    353             }
    354 
    355             handledCPCount = basicLength = output.length;
    356 
    357             // `handledCPCount` is the number of code points that have been handled;
    358             // `basicLength` is the number of basic code points.
    359 
    360             // Finish the basic string - if it is not empty - with a delimiter
    361             if (basicLength) {
    362                 output.push(delimiter);
    363             }
    364 
    365             // Main encoding loop:
    366             while (handledCPCount < inputLength) {
    367 
    368                 // All non-basic code points < n have been handled already. Find the next
    369                 // larger one:
    370                 for (m = maxInt, j = 0; j < inputLength; ++j) {
    371                     currentValue = input[j];
    372                     if (currentValue >= n && currentValue < m) {
    373                         m = currentValue;
    374                     }
    375                 }
    376 
    377                 // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>,
    378                 // but guard against overflow
    379                 handledCPCountPlusOne = handledCPCount + 1;
    380                 if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {
    381                     error('overflow');
    382                 }
    383 
    384                 delta += (m - n) * handledCPCountPlusOne;
    385                 n = m;
    386 
    387                 for (j = 0; j < inputLength; ++j) {
    388                     currentValue = input[j];
    389 
    390                     if (currentValue < n && ++delta > maxInt) {
    391                         error('overflow');
    392                     }
    393 
    394                     if (currentValue == n) {
    395                         // Represent delta as a generalized variable-length integer
    396                         for (q = delta, k = base; /* no condition */; k += base) {
    397                             t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
    398                             if (q < t) {
    399                                 break;
    400                             }
    401                             qMinusT = q - t;
    402                             baseMinusT = base - t;
    403                             output.push(
    404                                 stringFromCharCode(digitToBasic(t + qMinusT % baseMinusT, 0))
    405                             );
    406                             q = floor(qMinusT / baseMinusT);
    407                         }
    408 
    409                         output.push(stringFromCharCode(digitToBasic(q, 0)));
    410                         bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength);
    411                         delta = 0;
    412                         ++handledCPCount;
    413                     }
    414                 }
    415 
    416                 ++delta;
    417                 ++n;
    418 
    419             }
    420             return output.join('');
    421         }
    422 
    423 
    424         this.toUnicode = function (input) {
    425             return mapDomain(input,
    426                 function (string) {
    427                     return regexPunycode.test(string)
    428                         ? decode(string.slice(4).toLowerCase())
    429                         : string;
    430                 });
    431         }
    432 
    433 
    434         this.toASCII = function (input) {
    435             return mapDomain(input,
    436                 function (string) {
    437                     return regexNonASCII.test(string)
    438                         ? 'xn--' + encode(string)
    439                         : string;
    440                 });
    441         }
    442 
    443 
    444     }
    445 
    446     window.IdnMapping = IdnMapping;
    447 })(window);
    View Code

    使用:

     1  <script>
     2         window.onload = function () {
     3             var idn = new IdnMapping();
     4        //toASCII
     5             var str = idn.toASCII("www.博客园.com");
     6             console.log(str);
     7         
     8        //toUnicode
     9             var str1 = idn.toUnicode(str);
    10             console.log(str1);
    11         }
    12     </script>
  • 相关阅读:
    [CSS揭秘]不规则投影
    [CSS揭秘]规则投影
    [CSS揭秘]伪随机背景
    [CSS揭秘]复杂的背景图案
    [CSS揭秘]条纹背景
    [CSS揭秘]连续的图像边框
    Git_Eclipse:[1]Git安装插件
    Git_常用命令
    上海 day38--多表查询、python操作MySQL
    上海 day37-- MySQL 单表查询,连表操作和子查询
  • 原文地址:https://www.cnblogs.com/gaobing/p/8421268.html
Copyright © 2020-2023  润新知