在编程中有时会把多个函数串在一起依次调用,以达到特定的目的,在这里我们把这样的调用方式形象地称为函数链调用。函数链中的函数有些是独立的,有些则只用在函数组合中,不会单独调用。对象的连续配置和组合是函数链调用比较常用场合,去除语法糖,linq也是一个函数链的运用。下面通过即时编译中的一个例子,来看看函数链的用法。
几种热门的语言C#、java、js都采用了即时编译的方式,即时编译需要有相应的汇编类库供调用,以便把VM代码转换成本地机器代码。
Apple的汇编类库是比较常用的,被firefox,webkit用于js的即时编译,下面的代码是Apple汇编类库的一般调用方式:
masm.push(ecx); masm.move(ebp, edx); masm.add32(0x12 ,edx); masm.push(edx); masm.load32(MacroAssembler::Address(edx),edx); masm.push(edx); masm.load32(MacroAssembler::Address(edx),edx); masm.add32(edx,r); masm.call(MacroAssembler::Address(r)); masm.pop(ecx);
下面再看看chrome中v8的调用习惯:
#define __ masm()-> __ mov(ebx, Operand(esp, kSavedRegistersAreaSize)); __ Set(ecx, Immediate(0)); __ lea(edx, Operand(esp, kSavedRegistersAreaSize + 1 * kPointerSize)); __ sub(edx, Operand(ebp)); __ neg(edx); Label pop_loop; __ bind(&pop_loop); __ pop(Operand(edx, 0)); #undef __
与前面的调用方式差别不大,通过宏代换使得汇编调用看的更直观,遵循了宏定义用过即取消定义的习惯。
从上面的代码可以看出,普通的函数调用方式,大部分的汇编码调用还是比较整洁,涉及到内存调用的部分显得有些不太直观,对于 mov [ebx + 2 * ecx + 0x1000],eax这样的语句写起来会有些复杂。下面我们试着看看有没有更直观的方式来表现。
在这里我们可以看到,函数与真实汇编之间存在一定程度的失配:汇编语言本身是描述的,具有较强的组合能力,而用单个函数去模拟这样的能力,往往有点力不从心,这样失配的结果就引起功能的简化和简洁性的减弱。利用多个函数一起协同的能力,函数链可以用于解决这样的失配问题,使得调用代码书写得象汇编一样简洁。
下面的代码是一些准备工作,定义了汇编要用到的一些结构,如寄存器、地址、操作还有卷标。寄存器和卷标的代码都非常简单,操作和地址的代码复杂一些,主要是定义了一些操作符的重载,这些函数体现了函数链中函数的特点:要么返回自身或者返回新对象,以备后续调用。另外还有一些宏定义,这些宏都比较简单,不复杂。为简单起见在这里程序并不作实际的本地代码转化工作,只保证书写的代码能编译通过。具体的代码如下:
struct TNode { }; struct TOp; struct TLabel { inline TOp & operator () (TOp & r){return r;} }; struct TInt : TNode { int val; TInt(int v):val(v){} }; struct TReg : TNode { int reg; TReg(int r):reg(r){} TReg():reg(0){} inline bool operator != (TReg & l){ return this->reg != l.reg;} inline bool operator == (TReg & l){ return this->reg == l.reg;} inline bool operator > (TReg & l){ return this->reg > l.reg;} inline bool operator < (TReg & l){ return this->reg < l.reg;} }; struct TAdr : TNode { int typ; TReg* base; int scale; TReg* index; int direct; TAdr():typ(0),base(NULL),scale(0),index(NULL),direct(0){} }; struct TAlloc { static TAdr* allocAdr() { return new TAdr; } static TReg* allocReg() { return new TReg(); } static void free(TAdr* p) { delete p; } static void free(TReg* p) { delete p; } }; inline TAdr & operator + (TReg & l,TReg & r) { TAdr* adr = TAlloc::allocAdr(); adr->base = &l; adr->index = &r; return *adr; }; inline TAdr & operator * (int l,TReg & r) { TAdr* adr = TAlloc::allocAdr(); adr->scale = l; adr->index = &r; return *adr; }; inline TAdr & operator + (TReg & r,int l) { TAdr* adr = TAlloc::allocAdr(); adr->base = &r; adr->direct = l; return *adr; }; inline TAdr & operator + (TAdr & adr,int l) { adr.direct = l; return adr; }; inline TAdr & operator + (TReg & l,TAdr & r) { TAdr* adr = TAlloc::allocAdr(); adr->base = &l; adr->index = r.index; adr->scale = r.scale; adr->direct += r.direct; return *adr; }; struct TOp { int op; TNode* left; TNode* right; TOp(int _op):op(_op),left(NULL),right(NULL){} inline TOp & operator () (TReg & r) { if(left) right = &r; else left = &r; return *this; }; inline TOp & operator () (TAdr & r) { if(left) right = &r; else left = &r; return *this; }; inline TOp & operator () (TInt & r) { if(left) right = &r; else left = &r; return *this; }; inline TOp & operator () (int r) { if(left) right = &TInt(r); else left = &TInt(r); return *this; }; inline TOp & operator [] (TAdr & r) { if(left) right = &r; else left = &r; return *this; }; inline TOp & operator [] (TReg & r) { if(left) right = &r; else left = &r; return *this; }; inline TOp & operator [] (int r) { if(left) right = &TInt(r); else left = &TInt(r); return *this; }; inline TOp & operator + (TLabel r) { return *this; } }; struct TOpcode { static const unsigned char mov = 1; static const unsigned char add = 2; static const unsigned char sub = 3; static const unsigned char mul = 4; static const unsigned char div = 5; static const unsigned char jmp = 6; static const unsigned char push = 7; static const unsigned char pop = 8; static const unsigned char call = 9; static const unsigned char ret = 10; }; #define ncode_mov (TOp(TOpcode::mov)) #define ncode_add (TOp(TOpcode::add)) #define ncode_sub (TOp(TOpcode::sub)) #define ncode_mul (TOp(TOpcode::mul)) #define ncode_div (TOp(TOpcode::div)) #define ncode_push (TOp(TOpcode::push)) #define ncode_pop (TOp(TOpcode::pop)) #define ncode_jmp (TOp(TOpcode::jmp)) + #define ncode_call (TOp(TOpcode::call)) #define ncode_ret (TOp(TOpcode::ret)) #define _(x,...) ncode_##x __VA_ARGS__ #define eax (TReg(0)) #define ecx (TReg(1)) #define edx (TReg(2)) #define ebx (TReg(3)) #define esp (TReg(4)) #define ebp (TReg(5)) #define esi (TReg(6)) #define edi (TReg(7))
通过上面的准备,现在可以书写汇编调用代码了:
int _tmain(int argc, _TCHAR* argv[]) { TLabel L1,L2; _(push ebp); _(mov ebp,esp); _(push esi); _(push edi); _(mov ebx, eax); _(mov eax,[ebx + 2 * ecx]); _(mov [ebx + 2 * ecx + 0x1000],eax); L1 _(mov eax,[eax]); L2 _(mov eax,[0x1234]); _(call eax); _(jmp L1); _(jmp L2); _(pop edi); _(pop esi); _(mov esp,ebp); _(pop ebp); _(ret ); exit(1); }
是不是看起来像嵌入式汇编代码,但只是形似而已,这里是函数调用,而嵌入式汇编码是执行码。现在看起来是否更直观,YY一下。
现在再看看宏展开后的实际代码,是不是都是一些函数链调用?
int wmain(int argc, _TCHAR* argv[]) { TLabel L1,L2; (TOp(TOpcode::push)) (TReg(5)) ; (TOp(TOpcode::mov)) (TReg(5)) (TReg(4)); (TOp(TOpcode::push)) (TReg(6)) ; (TOp(TOpcode::push)) (TReg(7)) ; (TOp(TOpcode::mov)) (TReg(3)) (TReg(0)); (TOp(TOpcode::mov)) (TReg(0)) [(TReg(3)) + 2 * (TReg(1))]; (TOp(TOpcode::mov)) [(TReg(3)) + 2 * (TReg(1)) + 0x1000] (TReg(0)); L1 (TOp(TOpcode::mov)) (TReg(0)) [(TReg(0))]; L2 (TOp(TOpcode::mov)) (TReg(0)) [0x1234]; (TOp(TOpcode::call)) (TReg(0)) ; (TOp(TOpcode::jmp)) + L1 ; (TOp(TOpcode::jmp)) + L2 ; (TOp(TOpcode::pop)) (TReg(7)) ; (TOp(TOpcode::pop)) (TReg(6)) ; (TOp(TOpcode::mov)) (TReg(4)) (TReg(5)); (TOp(TOpcode::pop)) (TReg(5)) ; (TOp(TOpcode::ret)) ; exit(1); }
有头晕的感觉吧?正好应了一点,简单的背后是复杂。
下面是完整的事例代码:
#include <stdio.h> #include <stdlib.h> #include <tchar.h> struct TNode { }; struct TOp; struct TLabel { inline TOp & operator () (TOp & r){return r;} }; struct TInt : TNode { int val; TInt(int v):val(v){} }; struct TReg : TNode { int reg; TReg(int r):reg(r){} TReg():reg(0){} inline bool operator != (TReg & l){ return this->reg != l.reg;} inline bool operator == (TReg & l){ return this->reg == l.reg;} inline bool operator > (TReg & l){ return this->reg > l.reg;} inline bool operator < (TReg & l){ return this->reg < l.reg;} }; struct TAdr : TNode { int typ; TReg* base; int scale; TReg* index; int direct; TAdr():typ(0),base(NULL),scale(0),index(NULL),direct(0){} }; struct TAlloc { static TAdr* allocAdr() { return new TAdr; } static TReg* allocReg() { return new TReg(); } static void free(TAdr* p) { delete p; } static void free(TReg* p) { delete p; } }; inline TAdr & operator + (TReg & l,TReg & r) { TAdr* adr = TAlloc::allocAdr(); adr->base = &l; adr->index = &r; return *adr; }; inline TAdr & operator * (int l,TReg & r) { TAdr* adr = TAlloc::allocAdr(); adr->scale = l; adr->index = &r; return *adr; }; inline TAdr & operator + (TReg & r,int l) { TAdr* adr = TAlloc::allocAdr(); adr->base = &r; adr->direct = l; return *adr; }; inline TAdr & operator + (TAdr & adr,int l) { adr.direct = l; return adr; }; inline TAdr & operator + (TReg & l,TAdr & r) { TAdr* adr = TAlloc::allocAdr(); adr->base = &l; adr->index = r.index; adr->scale = r.scale; adr->direct += r.direct; return *adr; }; struct TOp { int op; TNode* left; TNode* right; TOp(int _op):op(_op),left(NULL),right(NULL){} inline TOp & operator () (TReg & r) { if(left) right = &r; else left = &r; return *this; }; inline TOp & operator () (TAdr & r) { if(left) right = &r; else left = &r; return *this; }; inline TOp & operator () (TInt & r) { if(left) right = &r; else left = &r; return *this; }; inline TOp & operator () (int r) { if(left) right = &TInt(r); else left = &TInt(r); return *this; }; inline TOp & operator [] (TAdr & r) { if(left) right = &r; else left = &r; return *this; }; inline TOp & operator [] (TReg & r) { if(left) right = &r; else left = &r; return *this; }; inline TOp & operator [] (int r) { if(left) right = &TInt(r); else left = &TInt(r); return *this; }; inline TOp & operator + (TLabel r) { return *this; } }; struct TOpcode { static const unsigned char mov = 1; static const unsigned char add = 2; static const unsigned char sub = 3; static const unsigned char mul = 4; static const unsigned char div = 5; static const unsigned char jmp = 6; static const unsigned char push = 7; static const unsigned char pop = 8; static const unsigned char call = 9; static const unsigned char ret = 10; }; #define ncode_mov (TOp(TOpcode::mov)) #define ncode_add (TOp(TOpcode::add)) #define ncode_sub (TOp(TOpcode::sub)) #define ncode_mul (TOp(TOpcode::mul)) #define ncode_div (TOp(TOpcode::div)) #define ncode_push (TOp(TOpcode::push)) #define ncode_pop (TOp(TOpcode::pop)) #define ncode_jmp (TOp(TOpcode::jmp)) + #define ncode_call (TOp(TOpcode::call)) #define ncode_ret (TOp(TOpcode::ret)) #define _(x,...) ncode_##x __VA_ARGS__ #define eax (TReg(0)) #define ecx (TReg(1)) #define edx (TReg(2)) #define ebx (TReg(3)) #define esp (TReg(4)) #define ebp (TReg(5)) #define esi (TReg(6)) #define edi (TReg(7)) int _tmain(int argc, _TCHAR* argv[]) { TLabel L1,L2; _(push ebp); _(mov ebp,esp); _(push esi); _(push edi); _(mov ebx, eax); _(mov eax,[ebx + 2 * ecx]); _(mov [ebx + 2 * ecx + 0x1000],eax); L1 _(mov eax,[eax]); L2 _(mov eax,[0x1234]); _(call eax); _(jmp L1); _(jmp L2); _(pop edi); _(pop esi); _(mov esp,ebp); _(pop ebp); _(ret ); exit(1); }
-----复杂,并不会因奥卡姆剃刀而减少。