• 《lua设计与实现》第3章 字符串


    3.1 概述

        字符串在Lua中是不可变的数据。每当使用不存在的字符串时,就会创建一份新的数据,创建之后是不可更改的。

    3.2 字符串实现

    //luaconf.h:595
    // 用于8字节对齐
    #define LUAI_USER_ALIGNMENT_T    union { double u; void *s; long l; }
    
    //limits.h:47
    // 用于8字节对齐typedef LUAI_USER_ALIGNMENT_T L_Umaxalign;
    
    //lobject.h:199
    typedef union TString {
      L_Umaxalign dummy;  /* ensures maximum alignment for strings */
      struct {
        CommonHeader;
        lu_byte reserved;  //1:系统保留,不会在GC阶段回收
        unsigned int hash;
        size_t len;
      } tsv;
    } TString;
    
    
    //lstate.h:68
    typedef struct global_State {
      stringtable strt;  /* hash table for strings */
      //......
    } global_State;
    
    //lstate.h:38
    typedef struct stringtable {
      GCObject **hash; //哈希桶,每个槽又是一个GCObject *,数据TString使用链式存储
      lu_int32 nuse;   /* number of elements */
      int size;
    } stringtable;
    
    //为了避免数据(TString)量太大导致查找退化成线性操作,需要重新散列:
    //lstring.c:22
    void luaS_resize (lua_State *L, int newsize) {
      GCObject **newhash;
      stringtable *tb;
      int i;
      if (G(L)->gcstate == GCSsweepstring)
        return;  /* cannot resize during GC traverse */
      newhash = luaM_newvector(L, newsize, GCObject *);
      tb = &G(L)->strt;
      for (i=0; i<newsize; i++) newhash[i] = NULL;
      /* rehash 重新散列*/
      for (i=0; i<tb->size; i++) {
        GCObject *p = tb->hash[i];
        while (p) {  /* for each node in the list */
          GCObject *next = p->gch.next;  /* save next */
          unsigned int h = gco2ts(p)->hash;
          int h1 = lmod(h, newsize);  /* new position */
          lua_assert(cast_int(h%newsize) == lmod(h, newsize));
          p->gch.next = newhash[h1];  /* chain it */
          newhash[h1] = p;
          p = next;
        }
      }
      luaM_freearray(L, tb->hash, tb->size, TString *); //释放旧的散列桶
      tb->size = newsize;
      tb->hash = newhash;
    }

         有两处关于luaS_resize函数的调用:

    //lgc.c:431
    //这里会进行检查,如果此时桶的数量太大(利用率不到1/4 且大于 MINSTRTABSIZE * 2),
    //则会将散列桶数组减少为原来的一半
    static void checkSizes (lua_State *L) {
      global_State *g = G(L);
      /* check size of string hash */
      if (g->strt.nuse < cast(lu_int32, g->strt.size/4) &&
          g->strt.size > MINSTRTABSIZE*2)
        luaS_resize(L, g->strt.size/2);  /* table is too big */
      /* check size of buffer */
      if (luaZ_sizebuffer(&g->buff) > LUA_MINBUFFER*2) {  /* buffer too big? */
        size_t newsize = luaZ_sizebuffer(&g->buff) / 2;
        luaZ_resizebuffer(L, &g->buff, newsize);
      }
    }
    
    //lstring.c:75
    //分配一个新的字符串
    TString *luaS_newlstr (lua_State *L, const char *str, size_t l) {
      GCObject *o;
      unsigned int h = cast(unsigned int, l);  /* seed */
      size_t step = (l>>5)+1;  /* if string is too long, don't hash all its chars */
      size_t l1;
      for (l1=l; l1>=step; l1-=step)  /* compute hash */
        h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1]));
      for (o = G(L)->strt.hash[lmod(h, G(L)->strt.size)];
           o != NULL;
           o = o->gch.next) {
        TString *ts = rawgco2ts(o);
        if (ts->tsv.len == l && (memcmp(str, getstr(ts), l) == 0)) {
          /* string may be dead: 需要更改为不需要在GC阶段回收*/
          if (isdead(G(L), o)) changewhite(o);
          return ts; //该字符串已经存在,直接返回结果
        }
      }
      return newlstr(L, str, l, h);  /* not found: 分配新的字符串*/
    }
    
    //lstring.c:50
    //分配一个新的字符串
    static TString *newlstr (lua_State *L, const char *str, size_t l,
                                           unsigned int h) {
      TString *ts;
      stringtable *tb;
      if (l+1 > (MAX_SIZET - sizeof(TString))/sizeof(char))
        luaM_toobig(L);
      ts = cast(TString *, luaM_malloc(L, (l+1)*sizeof(char)+sizeof(TString)));
      ts->tsv.len = l;
      ts->tsv.hash = h;
      ts->tsv.marked = luaC_white(G(L));
      ts->tsv.tt = LUA_TSTRING;
      ts->tsv.reserved = 0;
      memcpy(ts+1, str, l*sizeof(char));
      ((char *)(ts+1))[l] = '';  /* ending 0 */
      tb = &G(L)->strt;
      h = lmod(h, tb->size);
      ts->tsv.next = tb->hash[h];  /* chain new entry */
      tb->hash[h] = obj2gco(ts);
      tb->nuse++;
      if (tb->nuse > cast(lu_int32, tb->size) && tb->size <= MAX_INT/2)
        luaS_resize(L, tb->size*2);  /* too crowded */
      return ts;
    }

        TString结构体中的字段reserved用于标示是不是保留字

    //llex.c:37
    /* ORDER RESERVED */
    const char *const luaX_tokens [] = {
        "and", "break", "do", "else", "elseif",
        "end", "false", "for", "function", "if",
        "in", "local", "nil", "not", "or", "repeat",
        "return", "then", "true", "until", "while",
        "..", "...", "==", ">=", "<=", "~=",
        "<number>", "<name>", "<string>", "<eof>",
        NULL
    };
    
    //llex.h:24
    enum RESERVED {
      /* terminal symbols denoted by reserved words */
      TK_AND = FIRST_RESERVED, TK_BREAK,
      TK_DO, TK_ELSE, TK_ELSEIF, TK_END, TK_FALSE, TK_FOR, TK_FUNCTION,
      TK_IF, TK_IN, TK_LOCAL, TK_NIL, TK_NOT, TK_OR, TK_REPEAT,
      TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE,
      /* other terminal symbols */
      TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE, TK_NUMBER,
      TK_NAME, TK_STRING, TK_EOS
    };
    
    //lstring.h:20
    #define luaS_new(L, s)    (luaS_newlstr(L, s, strlen(s)))
    
    //lgc.h:60
    #define FIXEDBIT  5
    
    //lstring.h:24
    #define luaS_fix(s) l_setbit((s)->tsv.marked, FIXEDBIT)
    
    //llex.h:36
    /* number of reserved words */
    #define NUM_RESERVED    (cast(int, TK_WHILE-FIRST_RESERVED+1))
    
    //llex.c:64
    //最开始新建保留字,并对reserved赋值
    void luaX_init (lua_State *L) {
      int i;
      for (i=0; i<NUM_RESERVED; i++) {
        TString *ts = luaS_new(L, luaX_tokens[i]);
        luaS_fix(ts);  /* reserved words are never collected */
        lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN);
        ts->tsv.reserved = cast_byte(i+1);  /* reserved word */
      }
    }
  • 相关阅读:
    Excel操作快速填充和快速分析
    Excel操作用excel实现爬取网站上表格数据
    Excel操作条件格式的使用
    Excel操作数据验证规范单元格输入
    转载:PostgreSQL 高质量学习交流网站及链接推荐
    转载:PBKDF2加密算法
    idea 离线发布项目到linux
    2022 Google Kick Start Round A
    Codeforces Round #764 (Div. 3) 题解AG
    109个shell实用脚本
  • 原文地址:https://www.cnblogs.com/yyqng/p/14296103.html
Copyright © 2020-2023  润新知