简单分析下php中的分支背后的实现
<?php
if($a == 1){
echo "a";
}else{
echo "b";
}
1.语法分析
unticked_statement:
'{' inner_statement_list '}'
| T_IF parenthesis_expr { zend_do_if_cond(&$2, &$1 TSRMLS_CC); } statement { zend_do_if_after_statement(&$1, 1 TSRMLS_CC); }
elseif_list else_single { zend_do_if_end(TSRMLS_C); }
parenthesis_expr:
'(' expr ')' { $$ = $2; }
| '(' yield_expr ')' { $$ = $2; }
;
elseif_list:
/* empty */
| elseif_list T_ELSEIF parenthesis_expr { zend_do_if_cond(&$3, &$2 TSRMLS_CC); } statement { zend_do_if_after_statement(&$2, 0 TSRMLS_CC); }
;
%token T_ELSEIF "elseif (T_ELSEIF)"
else_single:
/* empty */
| T_ELSE statement
;
%token T_ELSE "else (T_ELSE)"
expr_without_variable:
| expr T_IS_EQUAL expr { zend_do_binary_op(ZEND_IS_EQUAL, &$$, &$1, &$3 TSRMLS_CC); }
对于上面的php代码来说
if 匹配 T_IF
$a == 1 匹配 parenthesis_expr , 同时语法分析器要执行 zend_do_if_cond
op为zend_is_equal
void zend_do_binary_op(zend_uchar op, znode *result, const znode *op1, const znode *op2 TSRMLS_DC) /* {{{ */ { zend_op *opline = get_next_op(CG(active_op_array) TSRMLS_CC); opline->opcode = op; opline->result_type = IS_TMP_VAR; opline->result.var = get_temporary_variable(CG(active_op_array)); SET_NODE(opline->op1, op1); SET_NODE(opline->op2, op2); GET_NODE(result, opline->result); }
static int ZEND_FASTCALL ZEND_IS_EQUAL_SPEC_CV_TMP_HANDLER(ZEND_OPCODE_HANDLER_ARGS) { USE_OPLINE zend_free_op free_op2; zval *result = &EX_T(opline->result.var).tmp_var; SAVE_OPLINE(); ZVAL_BOOL(result, fast_equal_function(result, _get_zval_ptr_cv_BP_VAR_R(execute_data, opline->op1.var TSRMLS_CC), _get_zval_ptr_tmp(opline->op2.var, execute_data, &free_op2 TSRMLS_CC) TSRMLS_CC)); zval_dtor(free_op2.var); CHECK_EXCEPTION(); ZEND_VM_NEXT_OPCODE(); }
echo "a" 匹配 statement, 同时语法分析器要执行 zend_do_if_after_statement
else 匹配 T_ELSE, 同时语法分析器再执行zend_do_if_end
由于是先分析 $a==1, 那么它对应的opcode的opline_num 假设为1,发现if ($a == 1) 匹配BNF后,执行下面的函数,得到新的opline_num (这里为2)
,同时得到新的opline, 设置opcode为ZEND_JMPZ, 将op1设置为$a==1对应的opcode, 将op2设置为unused, 为什么呢?后来看代码才知道,
当当前分支不成立时,是要跳转的,在执行下面的函数时,还不知道要跳转的opline_num, 只有当分析完statement后,才知道这个跳转opline_num
void zend_do_if_cond(const znode *cond, znode *closing_bracket_token TSRMLS_DC) /* {{{ */
{
int if_cond_op_number = get_next_op_number(CG(active_op_array));
zend_op *opline = get_next_op(CG(active_op_array) TSRMLS_CC);
opline->opcode = ZEND_JMPZ;
SET_NODE(opline->op1, cond);
closing_bracket_token->u.op.opline_num = if_cond_op_number;
SET_UNUSED(opline->op2);
INC_BPC(CG(active_op_array));
}
echo "a"; 这个匹配statement 就不说了,然后执行zend_do_if_after_statement,这里也要获得一个新的opline以及opline_num(这里为3),
设置opcode为ZEND_JMP, 意思为无条件跳转,同时设置opcodes的数组里第2个元素的属性op2.opline_num设置为5,为什么不是4,是因为本身的无条件跳转ZEND_JMP也算一个opcode
void zend_do_if_after_statement(const znode *closing_bracket_token, unsigned char initialize TSRMLS_DC) /* {{{ */
{
int if_end_op_number = get_next_op_number(CG(active_op_array));
zend_op *opline = get_next_op(CG(active_op_array) TSRMLS_CC);
zend_llist *jmp_list_ptr;
opline->opcode = ZEND_JMP;
/* save for backpatching */
if (initialize) {
zend_llist jmp_list;
zend_llist_init(&jmp_list, sizeof(int), NULL, 0);
zend_stack_push(&CG(bp_stack), (void *) &jmp_list, sizeof(zend_llist));
}
zend_stack_top(&CG(bp_stack), (void **) &jmp_list_ptr);
zend_llist_add_element(jmp_list_ptr, &if_end_op_number);
CG(active_op_array)->opcodes[closing_bracket_token->u.op.opline_num].op2.opline_num = if_end_op_number+1;
SET_UNUSED(opline->op1);
SET_UNUSED(opline->op2);
}
else { echo "b"; } 匹配 T_ELSE statement后,马上执行 zend_do_if_end,得到新的opline_num,注意:现在的opcodes数组里有4个opcode了
第一个是$a==1对应的opcode, opline=1
第二个是ZEND_JMPZ 当 $a!=1时的opcode, opline=5
第三个是 echo "a";对应的opcode, opline =3
第四个是ZEND_JMP对应的opcode, opline=6
第五个是echo "b";对应的opcode opline=5
所以ZEND_JMP 对应的是 else {echo "b";}之后的opcode了
void zend_do_if_end(TSRMLS_D) /* {{{ */ { int next_op_number = get_next_op_number(CG(active_op_array)); zend_llist *jmp_list_ptr; zend_llist_element *le; zend_stack_top(&CG(bp_stack), (void **) &jmp_list_ptr); for (le=jmp_list_ptr->head; le; le = le->next) { CG(active_op_array)->opcodes[*((int *) le->data)].op1.opline_num = next_op_number; } zend_llist_destroy(jmp_list_ptr); zend_stack_del_top(&CG(bp_stack)); DEC_BPC(CG(active_op_array)); }
pass_two函数,处理op_array中的各个opline, 上面unused掉的op2,在这里又重新赋值,就是当分支不成立时,要跳转的opcode所在的行号
ZEND_API int pass_two(zend_op_array *op_array TSRMLS_DC)
{
zend_op *opline, *end;
if (op_array->type!=ZEND_USER_FUNCTION && op_array->type!=ZEND_EVAL_CODE) {
return 0;
}
。。。。
opline = op_array->opcodes;
end = opline + op_array->last;
while (opline < end) {
if (opline->op1_type == IS_CONST) {
opline->op1.zv = &op_array->literals[opline->op1.constant].constant;
}
if (opline->op2_type == IS_CONST) {
opline->op2.zv = &op_array->literals[opline->op2.constant].constant;
}
switch (opline->opcode) {
case ZEND_GOTO:
if (Z_TYPE_P(opline->op2.zv) != IS_LONG) {
zend_resolve_goto_label(op_array, opline, 1 TSRMLS_CC);
}
/* break omitted intentionally */
case ZEND_JMP:
case ZEND_FAST_CALL:
opline->op1.jmp_addr = &op_array->opcodes[opline->op1.opline_num];
break;
case ZEND_JMPZ:
case ZEND_JMPNZ:
case ZEND_JMPZ_EX:
case ZEND_JMPNZ_EX:
case ZEND_JMP_SET:
case ZEND_JMP_SET_VAR:
opline->op2.jmp_addr = &op_array->opcodes[opline->op2.opline_num];
break;
case ZEND_RETURN:
case ZEND_RETURN_BY_REF:
if (op_array->fn_flags & ZEND_ACC_GENERATOR) {
if (opline->op1_type != IS_CONST || Z_TYPE_P(opline->op1.zv) != IS_NULL) {
CG(zend_lineno) = opline->lineno;
zend_error(E_COMPILE_ERROR, "Generators cannot return values using "return"");
}
opline->opcode = ZEND_GENERATOR_RETURN;
}
break;
}
ZEND_VM_SET_OPCODE_HANDLER(opline);
opline++;
}
op_array->fn_flags |= ZEND_ACC_DONE_PASS_TWO;
return 0;
}
static int ZEND_FASTCALL ZEND_JMPZ_SPEC_CONST_HANDLER(ZEND_OPCODE_HANDLER_ARGS)
{
USE_OPLINE
zval *val;
int ret;
SAVE_OPLINE();
//opline->op1为 if后面的表达式的值
val = opline->op1.zv;
if (IS_CONST == IS_TMP_VAR && EXPECTED(Z_TYPE_P(val) == IS_BOOL)) {
ret = Z_LVAL_P(val);
} else {
//判断val是否为1 true 0 false,如果为1,执行下一条opcode,如果为0,进行opcode的跳转
ret = i_zend_is_true(val);
if (UNEXPECTED(EG(exception) != NULL)) {
HANDLE_EXCEPTION();
}
}
if (!ret) {
#if DEBUG_ZEND>=2
printf("Conditional jmp to %d
", opline->op2.opline_num);
#endif
ZEND_VM_SET_OPCODE(opline->op2.jmp_addr);
ZEND_VM_CONTINUE();
}
ZEND_VM_NEXT_OPCODE();
}
关于i_zend_is_true的实现
static zend_always_inline int i_zend_is_true(zval *op)
{
int result;
switch (Z_TYPE_P(op)) {
case IS_NULL:
result = 0;
break;
case IS_LONG:
case IS_BOOL:
case IS_RESOURCE:
result = (Z_LVAL_P(op)?1:0);
break;
case IS_DOUBLE:
result = (Z_DVAL_P(op) ? 1 : 0);
break;
case IS_STRING:
if (Z_STRLEN_P(op) == 0
|| (Z_STRLEN_P(op)==1 && Z_STRVAL_P(op)[0]=='0')) {
result = 0;
} else {
result = 1;
}
break;
case IS_ARRAY:
result = (zend_hash_num_elements(Z_ARRVAL_P(op))?1:0);
break;
case IS_OBJECT:
if(IS_ZEND_STD_OBJECT(*op)) {
TSRMLS_FETCH();
if (Z_OBJ_HT_P(op)->cast_object) {
zval tmp;
if (Z_OBJ_HT_P(op)->cast_object(op, &tmp, IS_BOOL TSRMLS_CC) == SUCCESS) {
result = Z_LVAL(tmp);
break;
}
} else if (Z_OBJ_HT_P(op)->get) {
zval *tmp = Z_OBJ_HT_P(op)->get(op TSRMLS_CC);
if(Z_TYPE_P(tmp) != IS_OBJECT) {
/* for safety - avoid loop */
convert_to_boolean(tmp);
result = Z_LVAL_P(tmp);
zval_ptr_dtor(&tmp);
break;
}
}
}
result = 1;
break;
default:
result = 0;
break;
}
return result;
}
<?php if($a == 1){ echo "a"; }else{ echo "b"; } echo "c";
1. zend_is_equsl
op1.zv $a
op1_type cv
op2.zv 1
op2_type const
result.zv 0/1
result_type tmp_var
2. zend_jmpz
op1.zv 上面的0/1
op2 暂时没有
3.zend_echo
4.zend_jmp 下一个跳转,这里可以知道jmpz的跳转地址了,就是当前 opline_num+1, 于是 zend_jmpz的 op2.opline_num=5
5.zend_echo
这时知道zend_jmp的跳转地址 ,是5, 即 zend_jmp 的op1.opline_num为5