Lua函数执行流程及函数延续点问题的研究


第一：Lua函数调用的总体流程

在保护性调用的情况下，lua中函数调用的流程如下，非保护性调用的流程更加简单，请追踪lua_call函数


int docall (lua_State *L, int narg, int nres) 
|
—— int lua_pcallk (lua_State *L, int nargs, int nresults, int errfunc ...)
   |
   —— luaD_pcall (lua_State *L, Pfunc func, void *u,ptrdiff_t old_top, ptrdiff_t ef) 
      |
      -- luaD_rawrunprotected (lua_State *L, Pfunc f, void *ud)  
         |
         -- void f_call (lua_State *L, void *ud) 
            |
            -- void luaD_call (lua_State *L, StkId func, int nResults, int allowyield) 
               |
               -- int luaD_precall (lua_State *L, StkId func, int nresults)
                  |
                  -- luaD_poscall
                     |
                     ---...

第二：函数调用的方式和异常处理

可以看到 luaD_rawrunprotected 函数调用的实际上是 f_call，真正调用的函数在f_call中被调用，封装这一层的意义就是为了实现保护性调用。保护性调用的情况下lua虚拟机使用lua_longjmp为函数实现堆栈续传功能，也就是当错误发生的时候，在Lua内部能够最终跳转到调用点继续向下执行。所有使用luaD_rawrunprotected函数的的调用都不会因为错误直接导致程序退出，而是回到调用点然后将状态返回给外层逻辑处理。

//保护性调用
int luaD_rawrunprotected (lua_State *L, Pfunc f, void *ud) {
  unsigned short oldnCcalls = L->nCcalls;
  struct lua_longjmp lj;
  lj.status = LUA_OK;
  lj.previous = L->errorJmp;  /* chain new error handler */
  L->errorJmp = &lj;
  LUAI_TRY(L, &lj,
    (*f)(L, ud); 当f函数调用出异常会回到这里继续向下走
  );
  L->errorJmp = lj.previous;  /* restore old error handler */
  L->nCcalls = oldnCcalls;
  return lj.status;
}

对于Lua而言，只有LUA_YIELD是被视为可恢复的异常 #define errorstatus(s) ((s) > LUA_YIELD)，对于其他的错误就要报错了。

其实对于调用一个函数，无论是lua函数还是c函数，可以使用lua_pacall(lua_call)：这种方式的调用我们可以看到，在调用到 luaD_call 这个流程是，allowyield传的是0，也就是说是不允许挂起的，因此如果你在函数中如果使用了yield相关的函数试图挂起程序时候，再lua_yieldk中会报错：attempt to yield from outside a coroutine。因此我是不是可以理解为，如果你需要在函数中yield，就不能通过lua_pcall和lua_call的形式发起函数调用。当然还是有一种形式是使用lua_resume发起函数调用：我们知道resume的功能是唤醒一个挂起的线程（coroutine），当第一次调用的时候他只是简单的执行函数体，只有在之前有过yield挂起的记录之后再次调用resume才具备恢复线程的功能，这种方式是允许函数让出线程（yield挂起）的，下面会介绍到。

LUA_API int lua_yieldk (lua_State *L, int nresults, lua_KContext ctx,
                        lua_KFunction k) {
  CallInfo *ci = L->ci;
  luai_userstateyield(L, nresults);
  lua_lock(L);
  api_checknelems(L, nresults);
  if (L->nny > 0) {
    if (L != G(L)->mainthread)
      luaG_runerror(L, "attempt to yield across a C-call boundary");
    else
      luaG_runerror(L, "attempt to yield from outside a coroutine");  //这里报错！！
  }
  L->status = LUA_YIELD;
  ci->extra = savestack(L, ci->func);  /* save current 'func' */
  if (isLua(ci)) {  /* inside a hook? */
    api_check(L, k == NULL, "hooks cannot continue after yielding");
  }
  else {
    if ((ci->u.c.k = k) != NULL)  /* is there a continuation? */
      ci->u.c.ctx = ctx;  /* save context */
    ci->func = L->top - nresults - 1;  /* protect stack below results */
    luaD_throw(L, LUA_YIELD);
  }
  lua_assert(ci->callstatus & CIST_HOOKED);  /* must be inside a hook */
  lua_unlock(L);
  return 0;  /* return to 'luaD_hook' */
}

归纳一下上面的内容：如果你调用的是不会挂起线程的函数体或者函数块，使用lua_pcall（lua_call）以及lua_resume都能够正常执行函数，如果函数体中含有挂起线程的流程，必须使用lua_resume发起函数调用。

第三：函数调用的核心函数

lua_precall是函数调用的前半部分，lua_postcall顾名思义对应函数调用的后半部分。如果调用的是C函数，那么在lua_precall中调整调整就直接调用了，然后直接调用lua_postcall函数调用就算结束了，然而如果是lua函数，需要交给lua虚拟机执行指令集调用，因此lua_precall只是将堆栈调整妥当，等到lvm执行完毕之后在执行lua_postcall，调整返回值。

 int luaD_precall (lua_State *L, StkId func, int nresults) {
   lua_CFunction f;
   CallInfo *ci;
   int n;  /* number of arguments (Lua) or returns (C) */
   ptrdiff_t funcr = savestack(L, func);
   switch (ttype(func)) {
     case LUA_TLCF:  /* light C function */
       f = fvalue(func);
       goto Cfunc;
     case LUA_TCCL: {  /* C closure */
       f = clCvalue(func)->f;
      Cfunc:
       luaC_checkGC(L);  /* stack grow uses memory */
       luaD_checkstack(L, LUA_MINSTACK);  /* ensure minimum stack size */
       ci = next_ci(L);  /* now 'enter' new function */  //新创建调用链，将调用信息录入
       ci->nresults = nresults;
       ci->func = restorestack(L, funcr);
       ci->top = L->top + LUA_MINSTACK;
       lua_assert(ci->top <= L->stack_last);
       ci->callstatus = 0;
       if (L->hookmask & LUA_MASKCALL)
         luaD_hook(L, LUA_HOOKCALL, -1);
       lua_unlock(L);
       n = (*f)(L);  /* do the actual call */    //如果是c闭包函数或者c函数，则直接调用
       lua_lock(L);
       api_checknelems(L, n);
       luaD_poscall(L, L->top - n, n);           //调整堆栈
       return 1;
     }
     case LUA_TLCL: {  /* Lua function: prepare its call */
       StkId base;
       Proto *p = clLvalue(func)->p;
       n = cast_int(L->top - func) - 1;  /* number of real arguments */
       luaC_checkGC(L);  /* stack grow uses memory */
       luaD_checkstack(L, p->maxstacksize);
       for (; n < p->numparams; n++) //如果函数定义的参数个数大于实际的参数个数，则用nil值补足 (可以看出来越靠后的参数越靠近栈顶部)
         setnilvalue(L->top++);  /* complete missing arguments */
       if (!p->is_vararg) {   //非缺省参数的函数 函数定义中不带 ...
         func = restorestack(L, funcr);
         base = func + 1;
       }
       else { //带缺省参数的函数，函数定义中带 ...
         base = adjust_varargs(L, p, n);
         func = restorestack(L, funcr);  /* previous call can change stack */
       }
       ci = next_ci(L);  /* now 'enter' new function */
       ci->nresults = nresults;
       ci->func = func;
       ci->u.l.base = base;
       ci->top = base + p->maxstacksize;
       lua_assert(ci->top <= L->stack_last);
       ci->u.l.savedpc = p->code;  /* starting point */
       ci->callstatus = CIST_LUA;
       L->top = ci->top;
       if (L->hookmask & LUA_MASKCALL)
         callhook(L, ci);
       return 0;
     }
     //元表驱动的函数调用，"call": 函数调用操作 func(args)。 当 Lua 尝试调用一个非函数的值的时候会触发这个事件 （即 func 不是一个函数）。 查找 func 的元方法， 如果找得到，就调用这个元方法， func 作为第一个参数传
 入，原来调用的参数（args）后依次排在后面。
     default: {  /* not a function */
       luaD_checkstack(L, 1);  /* ensure space for metamethod */
       func = restorestack(L, funcr);  /* previous call may change stack */
       tryfuncTM(L, func);  /* try to get '__call' metamethod */
       return luaD_precall(L, func, nresults);  /* now it must be a function */
     }
   }
 }

　　lua_postcall主要是调整函数调用后的堆栈，特别是调整返回值和函数调用链，代码描述还是挺清楚的。

int luaD_poscall (lua_State *L, StkId firstResult, int nres) {
  StkId res;
  int wanted, i;
  CallInfo *ci = L->ci;
  if (L->hookmask & (LUA_MASKRET | LUA_MASKLINE)) {
    if (L->hookmask & LUA_MASKRET) {
      ptrdiff_t fr = savestack(L, firstResult);  /* hook may change stack */
      luaD_hook(L, LUA_HOOKRET, -1);
      firstResult = restorestack(L, fr);
    }
    L->oldpc = ci->previous->u.l.savedpc;  /* 'oldpc' for caller function */
  }
  res = ci->func;  /* res == final position of 1st result */
  wanted = ci->nresults;
  L->ci = ci->previous;  /* back to caller */
  /* move results to correct place */
  for (i = wanted; i != 0 && nres-- > 0; i--)
    setobjs2s(L, res++, firstResult++);
  while (i-- > 0)
    setnilvalue(res++);
  L->top = res;
  return (wanted - LUA_MULTRET);  /* 0 iff wanted == LUA_MULTRET */
}

第四：关于续传函数的使用

上面提到了lua中函数调用的异常处理，依赖于ljmp进行异常恢复，但是如果调用链中在c函数中挂起，那么再次使用lua_resume试图恢复调用栈的时候，C中的堆栈已经丢失了。通俗点讲就是：你在一个函数A中yield，函数B中第一次resume开始执行A函数，当遇到yield时候调用流程被打断，线程被挂起，当你再次调用resume的时候，你希望的是回到A函数中继续执行A在yield函数下面的代码段，但是这个是做不到的，因为C的堆栈在Lua虚拟机中已经无从查找了！因此lua提供了续点函数来间接处理这个难题，你可以在lua_pcallk或者lua_callk中传入一个k函数，也就是续点函数，当你的调用中某个yield被resume唤醒的时候，由于并不能够回到这个C函数中继续执行，但是他回到你提供的k函数，让你作为一个中间的跳板做一下事情！这就是续点函数。lua_pcallk和lua_callk函数不能在最外层调用的，还是上面提到的这个问题，最外层的函数调用如果不是用lua_resume发起的话就会出现上面提到的错误。其实这个也好理解，因为你的函数中含有yield相关的代码段，因此你的function就是allowyield的，但是通过lua_pcallk和lua_callk实际上调用的都是luaD_call不允许allowyield的版本。

LUA_API void lua_callk (lua_State *L, int nargs, int nresults,
                        lua_KContext ctx, lua_KFunction k) {
  StkId func;
  lua_lock(L);
  api_check(L, k == NULL || !isLua(L->ci),
    "cannot use continuations inside hooks");
  api_checknelems(L, nargs+1);
  api_check(L, L->status == LUA_OK, "cannot do calls on non-normal thread");
  checkresults(L, nargs, nresults);
  func = L->top - (nargs+1);
  if (k != NULL && L->nny == 0) {  /* need to prepare continuation? */
    L->ci->u.c.k = k;  /* save continuation */
    L->ci->u.c.ctx = ctx;  /* save context */
    luaD_call(L, func, nresults, 1);  /* do the call */  //yield版本
  }
  else  /* no continuation or no yieldable */
    luaD_call(L, func, nresults, 0);  /* just do the call */   //notyield版本
  adjustresults(L, nresults);
  lua_unlock(L);
}

　　也许大家会有疑问，我传入了k函数，为什么不是调用yield版本，原因就在于L->nny这个值在luaState初始化的时候就不是0而是1，因此总会进noyield的版本。而用lua_resume的时候发起函数调用的时候，在lua_resume这个函数一开始就将L->nny重置为0，所以在lua_resume的外层保护下，lua_pcallk和luacallk能够顺利进入yield版本。

//这里已经调整好参数和函数位置， p3,p2,p1,func.errfunc 为栈上从上而下的排布
LUA_API int lua_pcallk (lua_State *L, int nargs, int nresults, int errfunc,
                        lua_KContext ctx, lua_KFunction k) {
  struct CallS c;
  int status;
  ptrdiff_t func;
  lua_lock(L);
  api_check(L, k == NULL || !isLua(L->ci),
    "cannot use continuations inside hooks");
  api_checknelems(L, nargs+1);
  api_check(L, L->status == LUA_OK, "cannot do calls on non-normal thread");
  checkresults(L, nargs, nresults);
  if (errfunc == 0)
    func = 0;
  else {
    StkId o = index2addr(L, errfunc);
    api_checkstackindex(L, errfunc, o);
    func = savestack(L, o);
  }
  c.func = L->top - (nargs+1);  /* function to be called */  //指向函数位置
  if (k == NULL || L->nny > 0) {  /* no continuation or no yieldable? */
    c.nresults = nresults;  /* do a 'conventional' protected call */
    status = luaD_pcall(L, f_call, &c, savestack(L, c.func), func); //调用f_call
  }
  else {  /* prepare continuation (call is already protected by 'resume') */
    CallInfo *ci = L->ci;
    ci->u.c.k = k;  /* save continuation */
    ci->u.c.ctx = ctx;  /* save context */
    /* save information for error recovery */
    ci->extra = savestack(L, c.func);
    ci->u.c.old_errfunc = L->errfunc;
    L->errfunc = func;
    setoah(ci->callstatus, L->allowhook);  /* save value of 'allowhook' */
    ci->callstatus |= CIST_YPCALL;  /* function can do error recovery */
    luaD_call(L, c.func, nresults, 1);  /* do the call */
    ci->callstatus &= ~CIST_YPCALL;
    L->errfunc = ci->u.c.old_errfunc;
    status = LUA_OK;  /* if it is here, there were no errors */
  }
  adjustresults(L, nresults);
  lua_unlock(L);
  return status;
}

　　下面是一个测试代码用于验证上面的结论，注释部分是不可运行因为外层直接使用lua_pcallk进行函数调用。

#include <stdio.h>
#include <string.h>
#include <lua.h>
#include <lauxlib.h>
#include <lualib.h>
#include <dlfcn.h>
#include <math.h>

static int cont(lua_State *L, int status, lua_KContext ctx) {
  printf("error occurred!!
");
  return 0;
}

static int pcall_test(lua_State *L) {
  return lua_yield(L,0);
}

static int mytest(lua_State *L) {
  printf("mytest
");
  lua_pushcfunction(L, pcall_test);
  int ret = lua_pcallk(L, 0, 0, 0, 0, cont);
  return 1;
}

int main(void) {
  lua_State *L = luaL_newstate();
  luaL_openlibs(L);
  lua_pushcfunction(L, mytest);

  //lua_pushcfunction(L, pcall_test);
  //lua_callk(L, 0, 0, 0, cont);
  /*if(ret != 0)
    {
    const char* err = luaL_checkstring(L, -1);
  //err : attempt to yield from outside a coroutine
  printf("%s
", err);
  }*/

  //lua_resume(L, NULL, 0);

  int ret = lua_resume(L, NULL, 0);
  if((ret!=LUA_OK) && (ret!=LUA_YIELD))
  {
    const char* err = luaL_checkstring(L, -1);
    printf("%s
", err);
    return;
  }
  ret = lua_resume(L, NULL, 0);
  
  lua_close(L);
  return 0;
}

相关阅读:
成本直降50% | 阿里云发布云原生网关，开启下一代网关新进程
 阿里云容器服务全面升级为 ACK Anywhere，让云的边界拓展至企业需要的每个场景
 云拨测助力节卡机器人全面优化海外网站性能
 如何加速云原生数据应用？这个开源项目备受关注
 课程升级 | 极速构建知识体系，即学即用 Serverless
ECS 选款利器！PTS助您快速上云！
Morphling：云原生部署 AI ，如何把降本做到极致？
报名领奖｜云栖大会，10月19-22日杭州不见不散！
Dubbo3.0｜阿里巴巴服务框架三位一体的选择与实践
 告别Kafka Stream，让轻量级流处理更加简单
原文地址：https://www.cnblogs.com/biyeqingfeng/p/4978714.html