▶ 书中第十三章的程序,主要讲了汇编语言和 C/++ 相互调用的方法
● 代码,数组求和的几种优化
1 int arraySum(int array[], int count) ; O0 2 { 3 010716D0 push ebp 4 010716D1 mov ebp,esp 5 010716D3 sub esp,0D8h 6 010716D9 push ebx 7 010716DA push esi 8 010716DB push edi 9 010716DC lea edi,[ebp-0D8h] 10 010716E2 mov ecx,36h 11 010716E7 mov eax,0CCCCCCCCh 12 010716EC rep stos dword ptr es:[edi] ; 栈顶 13 int i; 14 int sum = 0; 15 010716EE mov dword ptr [sum],0 16 17 for (i = 0; i < count; i++) 18 010716F5 mov dword ptr [i],0 19 010716FC jmp arraySum+37h (01071707h) ; 跳转循环判断 20 010716FE mov eax,dword ptr [i] ; ┓ 21 01071701 add eax,1 ; ┣ 取出 i 来加一再放回去 22 01071704 mov dword ptr [i],eax ; ┛ 23 01071707 mov eax,dword ptr [i] ; ┓ 24 0107170A cmp eax,dword ptr [count] ; ┣ 循环判断 25 0107170D jge arraySum+50h (01071720h) ; ┛ 26 sum += array[i]; 27 0107170F mov eax,dword ptr [i] ; 循环主体,各变量取出来,加完了 sum 放回去 28 01071712 mov ecx,dword ptr [array] 29 01071715 mov edx,dword ptr [sum] 30 01071718 add edx,dword ptr [ecx+eax*4] 31 0107171B mov dword ptr [sum],edx 32 0107171E jmp arraySum+2Eh (010716FEh) 33 34 return sum; 35 01071720 mov eax,dword ptr [sum] ; 返回值放入 eax 36 } 37 01071723 pop edi 38 01071724 pop esi 39 01071725 pop ebx 40 01071726 mov esp,ebp 41 01071728 pop ebp 42 01071729 ret
1 int arraySum(int array[], int count) ; O1 2 { 3 00E51743 push ebp ; 优化了寄存器和栈 4 00E51744 mov ebp,esp 5 int i; 6 int sum = 0; 7 00E51746 xor eax,eax 8 00E51748 mov ecx,eax ; ecx 存储 i 9 10 for (i = 0; i < count; i++) 11 00E5174A cmp dword ptr [count],eax ; ┳ 数组长 <0 则退出 12 00E5174D jle arraySum+18h (0E5175Bh) ; ┛ 13 00E5174F mov edx,dword ptr [array] ; ┓ 14 sum += array[i]; ; ┣ 循环主体 15 00E51752 add eax,dword ptr [edx+ecx*4] ; ┛ 16 00E51755 inc ecx ; ┓ 17 00E51756 cmp ecx,dword ptr [count] ; ┣ i++ 和循环判断 18 00E51759 jl arraySum+0Fh (0E51752h) ; ┛ 19 20 return sum; 21 } 22 00E5175B pop ebp 23 00E5175C ret 24 int array[10] = { 1,2,3,4,5,6,7,8,9,10 }; ; 主函数的部分 25 26 int sum = arraySum(array, 10); 27 return 0; 28 00E5175D xor eax,eax 29 } 30 00E5175F ret
1 int arraySum(int array[], int count) ; O2 / Ox,用了XMM,没啃完 2 { 3 000E1760 push ebp 4 000E1761 mov ebp,esp 5 000E1763 push ecx 6 7 for (i = 0; i < count; i++) 8 000E1764 mov edx,dword ptr [array] 9 000E1767 xor ecx,ecx 10 000E1769 push ebx 11 000E176A push esi 12 000E176B mov esi,dword ptr [count] 13 000E176E xor eax,eax 14 000E1770 mov dword ptr [ebp-4],ecx 15 000E1773 push edi 16 000E1774 test esi,esi 17 000E1776 jle arraySum+7Bh (0E17DBh) 18 000E1778 cmp esi,8 19 000E177B jb arraySum+7Bh (0E17DBh) 20 int i; 21 int sum = 0; 22 000E177D mov ecx,esi 23 000E177F and ecx,80000007h 24 000E1785 jns arraySum+2Ch (0E178Ch) 25 000E1787 dec ecx 26 000E1788 or ecx,0FFFFFFF8h 27 000E178B inc ecx 28 000E178C mov edi,esi 29 000E178E xorps xmm2,xmm2 30 000E1791 sub edi,ecx 31 000E1793 xorps xmm1,xmm1 32 000E1796 nop word ptr [eax+eax] 33 sum += array[i]; 34 000E17A0 movups xmm0,xmmword ptr [edx+eax*4] 35 000E17A4 paddd xmm2,xmm0 36 000E17A8 movups xmm0,xmmword ptr [edx+eax*4+10h] 37 000E17AD add eax,8 38 000E17B0 paddd xmm1,xmm0 39 000E17B4 cmp eax,edi 40 000E17B6 jl arraySum+40h (0E17A0h) 41 int i; 42 int sum = 0; 43 000E17B8 paddd xmm1,xmm2 44 000E17BC movaps xmm0,xmm1 45 000E17BF psrldq xmm0,8 46 000E17C4 paddd xmm1,xmm0 47 000E17C8 movups xmm0,xmm1 48 000E17CB psrldq xmm0,4 49 000E17D0 paddd xmm1,xmm0 50 000E17D4 movd ecx,xmm1 51 000E17D8 mov dword ptr [sum],ecx 52 53 for (i = 0; i < count; i++) 54 000E17DB xor edi,edi 55 000E17DD xor ebx,ebx 56 000E17DF cmp eax,esi 57 000E17E1 jge arraySum+0B4h (0E1814h) 58 000E17E3 mov ecx,esi 59 000E17E5 sub ecx,eax 60 000E17E7 cmp ecx,2 61 000E17EA jl arraySum+9Eh (0E17FEh) 62 000E17EC lea ecx,[esi-1] 63 000E17EF nop 64 sum += array[i]; 65 000E17F0 add edi,dword ptr [edx+eax*4] 66 000E17F3 add ebx,dword ptr [edx+eax*4+4] 67 000E17F7 add eax,2 68 000E17FA cmp eax,ecx 69 000E17FC jl arraySum+90h (0E17F0h) 70 71 for (i = 0; i < count; i++) 72 000E17FE cmp eax,esi 73 sum += array[i]; 74 000E1800 mov esi,dword ptr [sum] 75 000E1803 jge arraySum+0A8h (0E1808h) 76 000E1805 add esi,dword ptr [edx+eax*4] 77 000E1808 lea eax,[ebx+edi] 78 000E180B pop edi 79 000E180C add eax,esi 80 81 return sum; 82 } 83 000E180E pop esi 84 000E180F pop ebx 85 000E1810 mov esp,ebp 86 000E1812 pop ebp 87 000E1813 ret 88 000E1814 pop edi 89 000E1815 pop esi 90 000E1816 mov eax,ecx 91 000E1818 pop ebx 92 000E1819 mov esp,ebp 93 000E181B pop ebp 94 000E181C ret
● C 内嵌汇编(代码段),检查对象大小
1 #include <iostream> 2 3 #pragma warning (disable:4101) // 关闭警告 "unreferenced local variables" 4 5 int main() 6 { 7 struct Package 8 { 9 int intInStruct; // 4 10 float floatInStruct; // 4 11 long doubleInStruct; // 4 12 } myPackage; 13 14 char myChar; 15 bool myBool; 16 short myShort; 17 int myInt; 18 long myLong; 19 float myFloat; 20 double myDouble; 21 long double myLongDouble; 22 long myLongArray[10]; 23 24 __asm // 内联汇编代码 25 { 26 mov eax, myPackage.doubleInStruct;// 地址 27 28 mov eax, LENGTH myInt; // 1 29 mov eax, LENGTH myLongArray; // 10 30 31 mov eax, TYPE myChar; // 1 32 mov eax, TYPE myBool; // 1 33 mov eax, TYPE myShort; // 2 34 mov eax, TYPE myInt; // 4 35 mov eax, TYPE myLong; // 4 36 mov eax, TYPE myFloat; // 4 37 mov eax, TYPE myDouble; // 8 38 mov eax, TYPE myLongDouble; // 8 39 40 mov eax, TYPE myPackage; // 12 41 mov eax, TYPE myLongArray; // 4 42 43 mov eax, LENGTH myPackage; // 1 44 mov eax, LENGTH myLongArray; // 10 45 46 mov eax, SIZE myPackage; // 12,SIZE 返回 LENGTH * TYPE 47 mov eax, SIZE myLongArray; // 40 48 } 49 return 0; 50 }
● C++ 内嵌汇编(单独的函数),加密字符串
1 #include <iostream> 2 #include <string> 3 4 using namespace std; 5 6 void TranslateBuffer(char * buf, unsigned count, unsigned char eChar) 7 { 8 __asm 9 { 10 mov esi, buf // esi 指向待处理的内存 11 mov ecx, count 12 mov al, eChar 13 L1: 14 xor[esi], al 15 inc esi 16 Loop L1 17 } 18 } 19 20 int main(int argcount, char * args[]) 21 { 22 string buffer; 23 unsigned char encryptCode; 24 25 cout << "Input the string: "; // 需要加密的字符串 26 cin >> buffer; 27 cout << " Encryption code [0-255]? "; // 秘钥 28 cin >> encryptCode; 29 30 TranslateBuffer(&buffer[0], buffer.length(), encryptCode); 31 32 cout << "Output: " << buffer; 33 getchar(); 34 getchar(); 35 return 0; 36 }
● C++ 内嵌汇编(独立文件),线性查找。要点:单独使用 ml.exe 编译 index2.asm 生成 index2.obj,在工程中添加为资源文件。输出结果:没开优化,效率奇低(C++ 7193 ms,ASM 34852 ms)
1 // index.h 2 extern "C" 3 { 4 long index( long n, unsigned array[], unsigned count); 5 long index2(long n, unsigned array[], unsigned count); 6 }
1 // index.cpp 2 #include "index.h" 3 4 long index(long searchVal, unsigned array[], unsigned count) 5 { 6 for (unsigned i = 0; i < count; i++) 7 { 8 if (array[i] == searchVal) 9 return i; 10 } 11 return -1; 12 }
1 ; index2.asm 2 .586 3 .model flat, C 4 index2 PROTO, srchVal:DWORD, arrayPtr:PTR DWORD, count:DWORD 5 6 .code 7 8 index2 PROC uses ecx esi edi, srchVal:DWORD, arrayPtr:PTR DWORD, count:DWORD 9 NOT_FOUND = -1 10 11 mov eax, srchVal ; 目标值 12 mov ecx, count 13 mov esi, arrayPtr 14 mov edi, 0 ; 数组下标 15 16 L1: 17 cmp [esi + edi * 4], eax 18 je found 19 inc edi 20 loop L1 21 22 notFound: 23 mov eax, NOT_FOUND 24 jmp short exit 25 26 found: 27 mov eax, edi 28 29 exit: 30 ret 31 index2 ENDP 32 END
1 // main.cpp 2 #include <iostream> 3 #include <time.h> 4 #include "index.h" 5 6 using namespace std; 7 8 int main() 9 { 10 const unsigned ARRAY_SIZE = 100000, LOOP_SIZE = 1000000; // 数组大小和重复次数 11 const unsigned searchVal = rand(); 12 clock_t startTime, endTime; 13 unsigned array[ARRAY_SIZE]; 14 long count = 0; 15 16 for (unsigned i = 0; i < ARRAY_SIZE; i++) 17 array[i] = rand(); 18 19 startTime = clock(); // 分别使用 C++ 和 ASM 计算同一个函数 20 for (unsigned loop = 0; loop < LOOP_SIZE; loop++) 21 count = index(searchVal, array, ARRAY_SIZE); 22 endTime = clock(); 23 cout << "Elapsed time: " << endTime - startTime << " ms, " << ((count == -1) ? "not" : "") << "found" << endl; 24 25 startTime = clock(); 26 for (unsigned loop = 0; loop < LOOP_SIZE; loop++) 27 count = index2(searchVal, array, ARRAY_SIZE); 28 endTime = clock(); 29 cout << "Elapsed time: " << endTime - startTime << " ms, " << ((count == -1) ? "not" : "") << "found" << endl; 30 31 getchar(); 32 return 0; 33 }