• Golang 效率初(粗)测


        从接触 Golang 开始,断断续续已有差不多一年左右的时间了,都是业余自己学学看看,尚主要限于语法及语言特性,还没有用它写过实际的项目。

        关于 Golang 的语法及语言特性,网上有很多资源可以学习。后面某个时间,我也许会写一篇粗浅的文章,来比较一下 Golang 和 C++、Delphi 甚至 C# 等语言语法方面的特性。

        我算是个急性子的人(当然现在好一些了),于是作为码农,显而易见会对“效率”比较敏感。这里的效率不单单指编译器生成的机器码优化程度,也包括编译器的编译速度,所以我对 C++ 兴趣不算大,虽然它是我平时的工作语言。

        言归正传。

        分别用 Golang、C++、Delphi 写了四个小例子,包括普通的应用场景、字符串(串接)操作及数据密集计算(当然也会涉及到譬如库函数的优化等)。我的电脑软硬件环境为:Win7 64bit,Xeon E3-1230(8核),16G RAM。Golang 版本是 1.3.1 Windows/386,VC 则用的 VS 2012,而 Delphi 则用的 XE6 Update1。VC 和 Delphi 编译设置为 Win32 & Release,Golang 则使用默认配置。

        所有测试计量单位均为毫秒(ms)。

        首先是计算 π 的例子,代码分别如下。

        Golang:

    package main
    
    import (
    	"fmt"
    	"time"
    )
    
    const cNumMax = 999999999
    
    func main() {
    	sign := 1.0
    	pi := 0.0
    	t1 := time.Now()
    	for i := 1; i < cNumMax+2; i += 2 {
    		pi += (1.0 / float64(i)) * sign
    		sign = -sign
    	}
    	pi *= 4
    	t2 := time.Now()
    	fmt.Printf("PI = %f; Time = %d
    ", pi, t2.Sub(t1)/time.Millisecond)
    }  
    C++:
    #include "stdafx.h"
    
    #include <stdio.h>
    #include <time.h>
    
    int _tmain(int argc, _TCHAR* argv[])
    {
        const int cNumMax = 999999999;
    	double sign = 1.0;
        double pi = 0;
    
        clock_t t1 = clock();
        for (int i = 1; i < cNumMax + 2; i += 2)
        {
            pi += (1.0f / (double)i) * sign;
            sign = -sign;
        }
        pi *= 4;
        clock_t t2 = clock();
        printf("PI = %lf; Time = %d
    ", pi, t2 - t1);
    
        return 0;
    }
    

        Delphi:

    program PiCalcer;
    
    {$APPTYPE CONSOLE}
    
    {$R *.res}
    
    uses
      System.SysUtils, System.DateUtils;
    
    const
      cNumMax = 999999999;
    
    var
      Sign: Double = 1.0;
      Pi  : Double = 0.0;
      I   : Integer;
      T1  : Double;
      T2  : Double;
      S   : string;
    
    begin
      T1 := Now;
      I := 1;
      while I < cNumMax + 2 do
      begin
        Pi := Pi + (1.0 / I) * Sign;
        Sign := -Sign;
        I := I + 2;
      end;
      Pi := Pi * 4;
      T2 := Now;
      S := Format('PI = %.6f; Time = %d', [Pi, MilliSecondsBetween(T2, T1)]);
      Writeln(S);
      Readln;
    end.
    

        分别执行 10 次,结果如下。

        Golang:2038 2028 2036 2024 2034 2015 2034 2018 2024 2018,平均:2026.9;

        C++     :2041 2052 2062 2036 2033 2049 2039 2026 2037 2038,平均:2041.3;

        Delphi :2594 2572 2574 2584 2574 2564 2575 2575 2571 2563,平均:2574.6。

        结果居然很不错,比 VC 还快,而 Delphi,大家都懂,优化向来不是它的“强项”。

        然后是个质数生成例子。

        Golang:

    package main
    
    import (
    	"fmt"
    	"time"
    )
    
    const cNumMax = 10000000
    
    func main() {
    	t1 := time.Now()
    
    	var nums [cNumMax + 1]int
    	var i, j int
    	for i = 2; i < cNumMax+1; i++ {
    		nums[i] = i
    	}
    	for i = 2; i < cNumMax+1; i++ {
    		j = 2
    		for j*i < cNumMax+1 {
    			nums[j*i] = 0
    			j++
    		}
    	}
    	cnt := 0
    	for i = 2; i < cNumMax+1; i++ {
    		if nums[i] != 0 {
    			cnt++
    		}
    	}
    
    	t2 := time.Now()
    	fmt.Println("Time:", t2.Sub(t1), " Count:", cnt)
    }  
    C++:
    #include "stdafx.h"
    
    #include <stdlib.h>
    #include <time.h>
    
    const int cNumMax = 10000000;
    
    int _tmain(int argc, _TCHAR* argv[])
    {
        clock_t t1 = clock();
    
        int *nums = (int*)malloc(sizeof(int) * (cNumMax + 1));
    
        int i;
        for (i = 2; i < cNumMax + 1; i++)
        {
            nums[i] = i;
        }
    
        int j;
        for (i = 2; i < cNumMax + 1; i++)
        {
            j = 2;
            while (j * i < cNumMax + 1)
            {
                nums[j * i] = 0;
                j++;
            }
        }
    
        int cnt = 0;
        for (i = 2; i < cNumMax + 1; i++)
        {
            if (nums[i] != 0)
            {
                cnt++;
            }
        }
    
        free(nums);
    
        clock_t t2 = clock();
        printf("Time: %dms; Count: %d
    ", t2 - t1, cnt);
    } 

        Delphi:

    program PrimeSieve;
    
    {$APPTYPE CONSOLE}
    
    {$R *.res}
    
    uses
      System.SysUtils, System.DateUtils;
    
    const
      cNumMax = 10000000;
    
    var
      T1, T2: Double;
      I, J  : Integer;
      Cnt   : Integer;
      Nums  : array of Integer;
    
    begin
      T1 := Now;
    
      SetLength(Nums, cNumMax + 1);
      for I := 2 to cNumMax do
        Nums[I] := I;
    
      for I := 2 to cNumMax do
      begin
        J := 2;
    
        while J * I < cNumMax + 1 do
        begin
          Nums[J * I] := 0;
          Inc(J);
        end;
      end;
    
      Cnt := 0;
      for I := 2 to cNumMax do
      begin
        if Nums[I] <> 0 then
          Inc(Cnt);
      end;
    
      SetLength(Nums, 0);
    
      T2 := Now;
    
      Writeln(Format('Cnt = %d; Time = %d', [Cnt, MilliSecondsBetween(T2, T1)]));
      Readln;
    end.  

        同样分别执行 10 次,结果如下。

        Golang:959 957 959 953 961 951 948 956 956 956,平均:955.6;

        C++     :965 965 967 953 961 964 963 960 956 956,平均:961;

        Delphi : 973 976 973 982 981 970 977 979 971 977,平均:975.9;

        仍然,Golang 看上去最快,而 Delphi 则很正常地居末。

        所以我忍不住想要来一个能展现 Delphi 优点的例子,这个例子几乎毫无疑问,和字符串操作(及内存管理器)相关,所以有如下字符串串接的示例(其中涉及到了譬如 IntToStr / itoa 这样的函数调用,我自己实现了个 C++ 版的 IntToStr)。

        Golang:

    package main
    
    import (
    	"bytes"
    	"fmt"
    	"strconv"
    	"time"
    )
    
    const cNumMax = 1000000
    
    // bytes.Buffer(7.2.6)
    func testViaBuffer() string {
    	var buf bytes.Buffer
    	for i := 0; i < cNumMax; i++ {
    		buf.WriteString(strconv.Itoa(i))
    	}
    	return buf.String()
    }
    
    // +=
    func testViaNormal() string {
    	var ret string
    	for i := 0; i < cNumMax; i++ {
    		ret += strconv.Itoa(i)
    	}
    	return ret
    }
    
    func main() {
    	fmt.Println("Test via bytes.Buffer...")
    	t1 := time.Now()
    	s := testViaBuffer()
    	t2 := time.Now()
    	fmt.Printf("Result: %s...(Length = %d); Time: %dms
    ", s[2000:2005], len(s), t2.Sub(t1)/time.Millisecond)
    
    	/*
    		fmt.Println("Test via normal way...")
    		t1 = time.Now()
    		s = testViaNormal()
    		t2 = time.Now()
    		fmt.Printf("Result: %s...(Length = %d); Time: %dms
    ", s[2000:2005], len(s), t2.Sub(t1)/time.Millisecond)
    	*/
    }  
    C++:
    #include "stdafx.h"
    
    #include <time.h>
    #include <stdarg.h>
    #include <string>
    #include <iostream>
    using namespace std;
    
    const int cNumMax = 1000000;
    
    wstring FormatV(const wchar_t* pwcFormat, va_list argList)
    {
        wstring ws;
        int nLen = _vscwprintf(pwcFormat, argList);
        if (nLen > 0)
        {
            ws.resize(nLen);
            vswprintf_s(&ws[0], nLen + 1, pwcFormat, argList);
        }
        return ws;
    }
    
    wstring __cdecl Format(const wchar_t* pwcFormat, ...)
    {
        va_list argList;
        va_start(argList, pwcFormat);
        wstring ws = FormatV(pwcFormat, argList);    
        va_end(argList);
        return ws;
    }
    
    string FormatVA(const char* pcFormat, va_list argList)
    {
        string s;
        int nLen = _vscprintf(pcFormat, argList);
        if (nLen > 0)
        {
            s.resize(nLen);
            vsprintf_s(&s[0], nLen + 1, pcFormat, argList);
        }
        return s;
    }
    
    string __cdecl FormatA(const char* pcFormat, ...)
    {
        va_list argList;
        va_start(argList, pcFormat);
        string s = FormatVA(pcFormat, argList);
        va_end(argList);
        return s;
    }
    
    wstring IntToStr(int nValue)
    {
        return Format(L"%d", nValue);
    }
    
    string IntToStrA(int nValue)
    {
        return FormatA("%d", nValue);
    }
    
    wstring testW()
    {
        wstring ret = L"";
        for (int i = 0; i < cNumMax; i++)
        {
            ret += IntToStr(i);
        }
        return ret;
    }
    
    string test()
    {
        string ret = "";
        for (int i = 0; i < cNumMax; i++)
        {
            ret += IntToStrA(i);
        }
        return ret;
    }
    
    int _tmain(int argc, _TCHAR* argv[])
    {
        cout << "Starting test with a loop num of " << cNumMax << endl;
        clock_t t1 = clock();
        string s = test();
        clock_t t2 = clock();   
        cout << "Result: " << s.substr(2000, 5) << "..." << "; Size: " << s.size() << "; Time: " << t2 - t1 << "ms" << endl;
    
        cout << endl;
    
        cout << "Starting test for WSTRING with a loop num of " << cNumMax << endl;
        t1 = clock();
        wstring ws = testW();
        t2 = clock();   
        wcout << "Result: " << ws.substr(2000, 5) << "..." << "; Size: " << ws.size() << "; Time: " << t2 - t1 << "ms" << endl;
    
        return 0;
    }  

        Delphi:

    program StrPerformanceTest;
    
    {$APPTYPE CONSOLE}
    
    {$R *.res}
    
    uses
      System.SysUtils, System.DateUtils;
    
    const
      cNumMax = 1000000;
    
    function TestViaStringBuilder: string;
    var
      SB: TStringBuilder;
      I : Integer;
    begin
      SB := TStringBuilder.Create;
      for I := 0 to cNumMax - 1 do
        SB.Append(IntToStr(I));
      Result := SB.ToString;
      FreeAndNil(SB);
    end;
    
    function TestViaNormal: string;
    var
      I : Integer;
    begin
      Result := '';
      for I := 0 to cNumMax - 1 do
        Result := Result + IntToStr(I);
    end;
    
    var
      T1: Double;
      T2: Double;
      S : string;
    begin
      Writeln('Starting test with a loop num of ', cNumMax, '...');
      T1 := Now;
      S := TestViaStringBuilder;
      T2 := Now;
      Writeln(Format('Test via TStringBuilder result: %s...(Length = %d); Time: %dms', [Copy(S, 2001, 5), Length(S), MilliSecondsBetween(T2, T1)]));
    
      T1 := Now;
      S := TestViaNormal;
      T2 := Now;
      Writeln(Format('Test via normal-way(+=) result: %s...(Length = %d); Time: %dms', [Copy(S, 2001, 5), Length(S), MilliSecondsBetween(T2, T1)]));
      Readln;
    end.
    

     分别执行 10 次。悲剧的是,Golang 里的字符串 += 操作实在太慢了,我实在不想等下去,所以只给出了其官方推荐的使用 bytes.Buffer 的结果。而在这个例子中,Delphi 使用 TStringBuilder 并未显示出什么优化(FastMM 实在太强悍了!),所以我也只给出了普通的串接结果(AnsiString 和 string 都是 Delphi 的原生类型,有着类同的内存布局,效率上应没有什么差别,所以这里只测试了 string)。

        Golang                    :141 148 134 119 133 123 145 127 122 132,平均:132.4;

        C++(std::string)   :384 400 384 385 389 391 389 384 390 383,平均:387.9;

        C++(std::wstring) :519 521 522 521 519 522 518 519 518 518,平均:519.7;

        Delphi(string)       :41 41 41 41 41 41 41 41 44 41,平均:41.3;

        果然,Delphi 大幅领先,当然这主要归功于 FastMM,这个开源的 Pascal 家族的内存管理器实在太强大了!

        当然这个测试对 C++ 并不公平,因为 Golang 的写法并非普通的串接,只是我不知道 STL 或 Boost 里有无类似 StringBuilder 这样的利器呢?

        最后是个数据密集计算型的例子。

        Golang:

    package main
    
    import (
    	"fmt"
    	"time"
    )
    
    const cSize int = 30
    
    type mymatrix [cSize][cSize]int
    
    func mkmatrix(rows, cols int, mx *mymatrix) {
    	rows--
    	cols--
    	count := 1
    	for r := 0; r <= rows; r++ {
    		for c := 0; c <= cols; c++ {
    			mx[r][c] = count
    			count++
    		}
    	}
    }
    
    func multmatrix(rows, cols int, m1, m2 *mymatrix, mm *mymatrix) {
    	rows--
    	cols--
    	for i := 0; i <= rows; i++ {
    		for j := 0; j <= cols; j++ {
    			val := 0
    			for k := 0; k <= cols; k++ {
    				val += m1[i][k] * m2[k][j]
    				mm[i][j] = val
    			}
    		}
    	}
    }
    
    func main() {
    	var m1, m2, mm mymatrix
    	mkmatrix(cSize, cSize, &m1)
    	mkmatrix(cSize, cSize, &m2)
    	t0 := time.Now()
    	for i := 0; i <= 100000; i++ {
    		multmatrix(cSize, cSize, &m1, &m2, &mm)
    	}
    	t := time.Since(t0)
    	fmt.Println(mm[0][0], mm[2][3], mm[3][2], mm[4][4], mm[29][29])
    	fmt.Println("tick = ", t)
    }  
    C++:
    #include "stdafx.h"
    
    #include <time.h>
    #include <iostream>
    
    using namespace std;
    
    const int MATRIX_SIZE = 30;
    
    int Matrix[MATRIX_SIZE][MATRIX_SIZE];
    
    void MakeMatrix(int rows, int cols, int mx[MATRIX_SIZE][MATRIX_SIZE])
    {
    	rows--;
    	cols--;
    	int count = 1;
    	for (int r = 0; r <= rows; r++)
    	{
    		for (int c = 0; c <= cols; c++)
    		{
    			mx[r][c] = count;
    			count++;
    		}
    	}
    }
    
    void MatrixMult(int rows, int cols, const int m1[MATRIX_SIZE][MATRIX_SIZE], const int m2[MATRIX_SIZE][MATRIX_SIZE], int mx[MATRIX_SIZE][MATRIX_SIZE])
    {
    	rows--;
    	cols--;
    
    	int val;
    	for (int i = 0; i <= rows; i++)
    	{
    		for (int j = 0; j <= cols; j++)
    		{
    			val = 0;
    			for (int k = 0; k <= cols; k++)
    			{
    				val += m1[i][k] * m2[k][j];
    				mx[i][j] = val;
    			}
    		}
    	}
    }
    
    int _tmain(int argc, _TCHAR* argv[])
    {
    	int num = 100000;
    
    	int m1[MATRIX_SIZE][MATRIX_SIZE], m2[MATRIX_SIZE][MATRIX_SIZE], mx[MATRIX_SIZE][MATRIX_SIZE];
    	MakeMatrix(MATRIX_SIZE, MATRIX_SIZE, m1);
    	MakeMatrix(MATRIX_SIZE, MATRIX_SIZE, m2);
    
    	clock_t t1 = clock();
    	for (int i = 0; i <= num; i++)
    	{
    		MatrixMult(MATRIX_SIZE, MATRIX_SIZE, m1, m2, mx);
    	}
    	clock_t t2 = clock();
    	cout << mx[0][0] << " " << mx[2][3] << " " << mx[3][2] << " " << mx[4][4] << endl;
    	cout << t2 - t1 << " ms" << endl;
    
    	return 0;
    }

        Delphi:

    program Project1;
    
    {$APPTYPE CONSOLE}
    
    {$R *.res}
    
    uses
      System.SysUtils, System.DateUtils;
    
    const
      cSize = 30;
    
    type
      TMatrix = array[0..cSize - 1, 0..cSize - 1] of Integer;
    
    procedure MakeMatrix(Rows, Cols: Integer; var Mx: TMatrix);
    var
      R, C, Count: Integer;
    begin
      Dec(Rows);
      Dec(Cols);
      Count := 1;
      for R := 0 to Rows do
        for C := 0 to Cols do
        begin
          Mx[R, C] := Count;
          Inc(Count);
        end;
    end;
    
    procedure MatrixMult(Rows, Cols: Integer; const M1, M2: TMatrix; var Mx: TMatrix); inline;
    var
      I, J, K, Val: Integer;
    begin
      Dec(Rows);
      Dec(Cols);
      for I := 0 to Rows do
        for J := 0 to Cols do
        begin
          Val := 0;
          for K := 0 to Cols do
            Inc(Val, M1[I, K] * M2[K, J]);
          Mx[I, J] := Val;
        end;
    end;
    
    var
      Num, I    : Integer;
      M1, M2, Mx: TMatrix;
      T1, T2    : Double;
    
    begin
      Num := 100000;
      MakeMatrix(cSize, cSize, M1);
      MakeMatrix(cSize, cSize, M2);
      T1 := Now;
      for I := 0 to Num do
        MatrixMult(cSize, cSize, M1, M2, Mx);
      T2 := Now;
      WriteLn(Mx[0, 0], ' ', Mx[2, 3], ' ', Mx[3, 2], ' ', Mx[4, 4], ' ', mx[29, 29]);
      WriteLn(' C = ', MilliSecondsBetween(T2, T1), ' ms');
    end.  

        分别执行 10 次后结果如下。

        Golang:8757 8790 8713 8748 8737 8744 8752 8752 8746 8754,平均:8749.3;

        C++     :1723 1735 1714 1707 1713 1725 1708 1723 1720 1725,平均:1719.3;

        Delphi :2384 2362 2359 2389 2362 2351 2340 2352 2356 2352,平均:2360.7;

        在这样的密集运算例子里,Golang 的表现实在很差,Golang 的编译器优化还有很长的路。而 Delphi 则不出意外,不温不火,勉强也还算能接受吧。

        至此,或许大致可以这样初步评断,Golang 在大部分应用场景下在效率方面是满足要求的,而若涉及到密集运算,当前比较好的方法应该是要通过 CGo 了。考虑到 Golang 强大的 goroutine 和 channel、丰富的标准库(譬如网络方面)、精简的语法和非常快速的编译速度(几乎媲美 Delphi),后端开发尝试下 Golang 应是比较可行的,而也确实有不少早已用 Golang 作后端开发的项目实例了。

        注:关于 Golang 的语言语法及并发方面的特性,过段时间再浅叙。

        经由 Colin 同学建议,测试字符串串接中使用的自实现版 IntToStr 效率不行,对 C++ 很不公平,于是我用回了 _itoa_s 和 _itow_s 这俩库函数,如下:

    #include "stdafx.h"
    
    #include <time.h>
    #include <stdarg.h>
    #include <string>
    #include <iostream>
    using namespace std;
    
    const int cNumMax = 1000000;
    
    wstring testW()
    {
        wstring ret = L"";
        wchar_t ws[10];
        for (int i = 0; i < cNumMax; i++)
        {
            _itow_s(i, ws, 10);
            ret += ws;
        }
        return ret;
    }
    
    string test()
    {
        string ret = "";
        char s[10];
        for (int i = 0; i < cNumMax; i++)
        {
            _itoa_s(i, s, 10);
            ret += s;
        }
        return ret;
    }
    
    int _tmain(int argc, _TCHAR* argv[])
    {
        cout << "Starting test with a loop num of " << cNumMax << endl;
        clock_t t1 = clock();
        string s = test();
        clock_t t2 = clock();   
        cout << "Result: " << s.substr(2000, 5) << "..." << "; Size: " << s.size() << "; Time: " << t2 - t1 << "ms" << endl;
    
        cout << endl;
    
        cout << "Starting test for WSTRING with a loop num of " << cNumMax << endl;
        t1 = clock();
        wstring ws = testW();
        t2 = clock();   
        wcout << "Result: " << ws.substr(2000, 5) << "..." << "; Size: " << ws.size() << "; Time: " << t2 - t1 << "ms" << endl;
    
        return 0;
    }
    

      测试 10 次,效率果然大幅提升,平均大约分别是:std::string - 70ms、std::wstring - 75ms,相当快速!不过还是比 Delphi 慢了 40% 左右。

  • 相关阅读:
    Java编译期和运行期
    深入理解重载和重写及与之相关的多态性 Overloading and Overriding(转)
    Java编译期优化与运行期优化技术浅析
    JAVA反射
    JSP笔记(二)
    JSP笔记(一)
    字符串之String类
    JAVA的Random类介绍
    (转)详细分析css float 属性
    协议与委托
  • 原文地址:https://www.cnblogs.com/ecofast/p/4043873.html
Copyright © 2020-2023  润新知