• Delphi System单元 Utf8ToAnsi、AnsiToUtf8、Utf8Decode、Utf8Encode、Utf8ToUnicode、UnicodeToUtf8 转换


    Delphi System单元 Utf8ToAnsi、AnsiToUtf8、Utf8Decode、Utf8Encode、Utf8ToUnicode、UnicodeToUtf8 转换

    单元:System

    原型:

    function UnicodeToUtf8(Dest: PChar; MaxDestBytes: Cardinal; Source: PWideChar; SourceChars: Cardinal): Cardinal;
    var
      i, count: Cardinal;
      c: Cardinal;
    begin
      Result := 0;
      if Source = nil then Exit;
      count := 0;
      i := 0;
      if Dest <> nil then
      begin
        while (i < SourceChars) and (count < MaxDestBytes) do
        begin
          c := Cardinal(Source[i]);
          Inc(i);
          if c <= $7F then
          begin
            Dest[count] := Char(c);
            Inc(count);
          end
          else if c > $7FF then
          begin
            if count + 3 > MaxDestBytes then
              break;
            Dest[count] := Char($E0 or (c shr 12));
            Dest[count+1] := Char($80 or ((c shr 6) and $3F));
            Dest[count+2] := Char($80 or (c and $3F));
            Inc(count,3);
          end
          else //  $7F < Source[i] <= $7FF
          begin
            if count + 2 > MaxDestBytes then
              break;
            Dest[count] := Char($C0 or (c shr 6));
            Dest[count+1] := Char($80 or (c and $3F));
            Inc(count,2);
          end;
        end;
        if count >= MaxDestBytes then count := MaxDestBytes-1;
        Dest[count] := #0;
      end
      else
      begin
        while i < SourceChars do
        begin
          c := Integer(Source[i]);
          Inc(i);
          if c > $7F then
          begin
            if c > $7FF then
              Inc(count);
            Inc(count);
          end;
          Inc(count);
        end;
      end;
      Result := count+1;  // convert zero based index to byte count
    end;
    
    function Utf8ToUnicode(Dest: PWideChar; Source: PChar; MaxChars: Integer): Integer;
    var
      len: Cardinal;
    begin
      len := 0;
      if Source <> nil then
        while Source[len] <> #0 do
          Inc(len);
      Result := Utf8ToUnicode(Dest, MaxChars, Source, len);
    end;
    
    function Utf8ToUnicode(Dest: PWideChar; MaxDestChars: Cardinal; Source: PChar; SourceBytes: Cardinal): Cardinal;
    var
      i, count: Cardinal;
      c: Byte;
      wc: Cardinal;
    begin
      if Source = nil then
      begin
        Result := 0;
        Exit;
      end;
      Result := Cardinal(-1);
      count := 0;
      i := 0;
      if Dest <> nil then
      begin
        while (i < SourceBytes) and (count < MaxDestChars) do
        begin
          wc := Cardinal(Source[i]);
          Inc(i);
          if (wc and $80) <> 0 then
          begin
            if i >= SourceBytes then Exit;          // incomplete multibyte char
            wc := wc and $3F;
            if (wc and $20) <> 0 then
            begin
              c := Byte(Source[i]);
              Inc(i);
              if (c and $C0) <> $80 then Exit;      // malformed trail byte or out of range char
              if i >= SourceBytes then Exit;        // incomplete multibyte char
              wc := (wc shl 6) or (c and $3F);
            end;
            c := Byte(Source[i]);
            Inc(i);
            if (c and $C0) <> $80 then Exit;       // malformed trail byte
    
            Dest[count] := WideChar((wc shl 6) or (c and $3F));
          end
          else
            Dest[count] := WideChar(wc);
          Inc(count);
        end;
        if count >= MaxDestChars then count := MaxDestChars-1;
        Dest[count] := #0;
      end
      else
      begin
        while (i < SourceBytes) do
        begin
          c := Byte(Source[i]);
          Inc(i);
          if (c and $80) <> 0 then
          begin
            if i >= SourceBytes then Exit;          // incomplete multibyte char
            c := c and $3F;
            if (c and $20) <> 0 then
            begin
              c := Byte(Source[i]);
              Inc(i);
              if (c and $C0) <> $80 then Exit;      // malformed trail byte or out of range char
              if i >= SourceBytes then Exit;        // incomplete multibyte char
            end;
            c := Byte(Source[i]);
            Inc(i);
            if (c and $C0) <> $80 then Exit;       // malformed trail byte
          end;
          Inc(count);
        end;
      end;
      Result := count+1;
    end;
    
    function Utf8Encode(const WS: WideString): UTF8String;
    var
      L: Integer;
      Temp: UTF8String;
    begin
      Result := '';
      if WS = '' then Exit;
      SetLength(Temp, Length(WS) * 3); // SetLength includes space for null terminator
    
      L := UnicodeToUtf8(PChar(Temp), Length(Temp)+1, PWideChar(WS), Length(WS));
      if L > 0 then
        SetLength(Temp, L-1)
      else
        Temp := '';
      Result := Temp;
    end;
    
    function Utf8Decode(const S: UTF8String): WideString;
    var
      L: Integer;
      Temp: WideString;
    begin
      Result := '';
      if S = '' then Exit;
      SetLength(Temp, Length(S));
    
      L := Utf8ToUnicode(PWideChar(Temp), Length(Temp)+1, PChar(S), Length(S));
      if L > 0 then
        SetLength(Temp, L-1)
      else
        Temp := '';
      Result := Temp;
    end;
    
    function AnsiToUtf8(const S: string): UTF8String;
    begin
      Result := Utf8Encode(S);
    end;
    
    function Utf8ToAnsi(const S: UTF8String): string;
    begin
      Result := Utf8Decode(S);
    end; 

    从源码中可以看到 

    • Utf8ToAnsi 调用了 Utf8Decode 函数
    • AnsiToUtf8 调用了 Utf8Encode 函数

    所以解码的时候用 Utf8ToAnsi 或 Utf8Decode 都可以的。

    创建时间:2020.12.22  更新时间:

  • 相关阅读:
    浅谈最长上升子序列(LIS)
    浅谈树链剖分
    数字计数
    windy数
    骑士(树形dp)
    旅游规划
    皇宫看守
    k8s安装报错
    linux文件夹存储潜规则
    搭建覆盖率测试环境
  • 原文地址:https://www.cnblogs.com/guorongtao/p/14171632.html
Copyright © 2020-2023  润新知