在C语言中,如果我们要访问一个数组的某个下标对应的元素,通常的写法是a[i]。但从汇编的角度看,写成i[a]一点问题都没有。
下面通过代码给出证明。
o foo1.c
1 int main(int argc, char *argv[]) 2 { 3 unsigned int a[] = {1, 2, 3}; 4 unsigned int n = sizeof (a) / sizeof (int); 5 6 unsigned int sum = 0; 7 for (unsigned int i = 0; i < n; i++) 8 sum += a[i]; 9 10 return sum; 11 }
o foo2.c
1 int main(int argc, char *argv[]) 2 { 3 unsigned int a[] = {1, 2, 3}; 4 unsigned int n = sizeof (a) / sizeof (int); 5 6 unsigned int sum = 0; 7 for (unsigned int i = 0; i < n; i++) 8 sum += i[a]; 9 10 return sum; 11 }
o foo3.c
1 int main(int argc, char *argv[]) 2 { 3 unsigned int a[] = {1, 2, 3}; 4 unsigned int n = sizeof (a) / sizeof (int); 5 6 unsigned int sum = 0; 7 for (unsigned int i = 0; i < n; i++) 8 sum += *(i+a); 9 10 return sum; 11 }
o 编译和运行
1 $ gcc -g -Wall -std=gnu99 -m32 -o foo1 foo1.c 2 $ gcc -g -Wall -std=gnu99 -m32 -o foo2 foo2.c 3 $ gcc -g -Wall -std=gnu99 -m32 -o foo3 foo3.c 4 $ ./foo1; echo $? 5 6 6 $ ./foo2; echo $? 7 6 8 $ ./foo3; echo $? 9 6
o 反汇编后diff
1) foo1.gdb.out
1 (gdb) disas /m main 2 Dump of assembler code for function main: 3 2 { 4 0x080483ed <+0>: push ebp 5 0x080483ee <+1>: mov ebp,esp 6 0x080483f0 <+3>: sub esp,0x20 7 8 3 unsigned int a[] = {1, 2, 3}; 9 0x080483f3 <+6>: mov DWORD PTR [ebp-0xc],0x1 10 0x080483fa <+13>: mov DWORD PTR [ebp-0x8],0x2 11 0x08048401 <+20>: mov DWORD PTR [ebp-0x4],0x3 12 13 4 unsigned int n = sizeof (a) / sizeof (int); 14 0x08048408 <+27>: mov DWORD PTR [ebp-0x10],0x3 15 16 5 17 6 unsigned int sum = 0; 18 0x0804840f <+34>: mov DWORD PTR [ebp-0x18],0x0 19 20 7 for (unsigned int i = 0; i < n; i++) 21 0x08048416 <+41>: mov DWORD PTR [ebp-0x14],0x0 22 0x0804841d <+48>: jmp 0x804842d <main+64> 23 0x08048429 <+60>: add DWORD PTR [ebp-0x14],0x1 24 0x0804842d <+64>: mov eax,DWORD PTR [ebp-0x14] 25 0x08048430 <+67>: cmp eax,DWORD PTR [ebp-0x10] 26 0x08048433 <+70>: jb 0x804841f <main+50> 27 28 8 sum += a[i]; 29 0x0804841f <+50>: mov eax,DWORD PTR [ebp-0x14] 30 0x08048422 <+53>: mov eax,DWORD PTR [ebp+eax*4-0xc] 31 0x08048426 <+57>: add DWORD PTR [ebp-0x18],eax 32 33 9 34 10 return sum; 35 0x08048435 <+72>: mov eax,DWORD PTR [ebp-0x18] 36 37 11 } 38 0x08048438 <+75>: leave 39 0x08048439 <+76>: ret 40 41 End of assembler dump.
2) foo2.gdb.out
1 (gdb) disas /m main 2 Dump of assembler code for function main: 3 2 { 4 0x080483ed <+0>: push ebp 5 0x080483ee <+1>: mov ebp,esp 6 0x080483f0 <+3>: sub esp,0x20 7 8 3 unsigned int a[] = {1, 2, 3}; 9 0x080483f3 <+6>: mov DWORD PTR [ebp-0xc],0x1 10 0x080483fa <+13>: mov DWORD PTR [ebp-0x8],0x2 11 0x08048401 <+20>: mov DWORD PTR [ebp-0x4],0x3 12 13 4 unsigned int n = sizeof (a) / sizeof (int); 14 0x08048408 <+27>: mov DWORD PTR [ebp-0x10],0x3 15 16 5 17 6 unsigned int sum = 0; 18 0x0804840f <+34>: mov DWORD PTR [ebp-0x18],0x0 19 20 7 for (unsigned int i = 0; i < n; i++) 21 0x08048416 <+41>: mov DWORD PTR [ebp-0x14],0x0 22 0x0804841d <+48>: jmp 0x804842d <main+64> 23 0x08048429 <+60>: add DWORD PTR [ebp-0x14],0x1 24 0x0804842d <+64>: mov eax,DWORD PTR [ebp-0x14] 25 0x08048430 <+67>: cmp eax,DWORD PTR [ebp-0x10] 26 0x08048433 <+70>: jb 0x804841f <main+50> 27 28 8 sum += i[a]; 29 0x0804841f <+50>: mov eax,DWORD PTR [ebp-0x14] 30 0x08048422 <+53>: mov eax,DWORD PTR [ebp+eax*4-0xc] 31 0x08048426 <+57>: add DWORD PTR [ebp-0x18],eax 32 33 9 34 10 return sum; 35 0x08048435 <+72>: mov eax,DWORD PTR [ebp-0x18] 36 37 11 } 38 0x08048438 <+75>: leave 39 0x08048439 <+76>: ret 40 41 End of assembler dump.
3) foo3.gdb.out
1 (gdb) disas /m main 2 Dump of assembler code for function main: 3 2 { 4 0x080483ed <+0>: push ebp 5 0x080483ee <+1>: mov ebp,esp 6 0x080483f0 <+3>: sub esp,0x20 7 8 3 unsigned int a[] = {1, 2, 3}; 9 0x080483f3 <+6>: mov DWORD PTR [ebp-0xc],0x1 10 0x080483fa <+13>: mov DWORD PTR [ebp-0x8],0x2 11 0x08048401 <+20>: mov DWORD PTR [ebp-0x4],0x3 12 13 4 unsigned int n = sizeof (a) / sizeof (int); 14 0x08048408 <+27>: mov DWORD PTR [ebp-0x10],0x3 15 16 5 17 6 unsigned int sum = 0; 18 0x0804840f <+34>: mov DWORD PTR [ebp-0x18],0x0 19 20 7 for (unsigned int i = 0; i < n; i++) 21 0x08048416 <+41>: mov DWORD PTR [ebp-0x14],0x0 22 0x0804841d <+48>: jmp 0x8048437 <main+74> 23 0x08048433 <+70>: add DWORD PTR [ebp-0x14],0x1 24 0x08048437 <+74>: mov eax,DWORD PTR [ebp-0x14] 25 0x0804843a <+77>: cmp eax,DWORD PTR [ebp-0x10] 26 0x0804843d <+80>: jb 0x804841f <main+50> 27 28 8 sum += *(i+a); 29 0x0804841f <+50>: mov eax,DWORD PTR [ebp-0x14] 30 0x08048422 <+53>: lea edx,[eax*4+0x0] 31 0x08048429 <+60>: lea eax,[ebp-0xc] 32 0x0804842c <+63>: add eax,edx 33 0x0804842e <+65>: mov eax,DWORD PTR [eax] 34 0x08048430 <+67>: add DWORD PTR [ebp-0x18],eax 35 36 9 37 10 return sum; 38 0x0804843f <+82>: mov eax,DWORD PTR [ebp-0x18] 39 40 11 } 41 0x08048442 <+85>: leave 42 0x08048443 <+86>: ret 43 44 End of assembler dump.
4) a[i] v.s. i[a]
5) i[a] v.s. *(i+a)
结论: a[i]==i[a]==*(i+a)==*(a+i)
分析: 在编译器的眼里,数组名a不过是一段连续内存的首地址。获取某个元素a[i]不过是在a对应的首地址上做偏移,找到对应的内存地址后从中取出其中的内容即可。
PS: 我在面试别人的过程中,如果求职的工程师说他懂汇编,我一般会问这样的问题,"能否在C代码中使用i[a]去访问数组a的第i个元素?"。无论对方说能与不能,我都很乐意进一步问"为什么能/不能?"从而挖掘出其对汇编及编译过程的理解深度。 通常,优秀的程序员能回答得富有计算机思维(即使他判定为不能, 比如"我觉得不能,编译器应该不支持这种怪诞的用法..."),而那些机械的程序员一般会选择放弃思考为什么能/不能。