3.1 概述
字符串在Lua中是不可变的数据。每当使用不存在的字符串时,就会创建一份新的数据,创建之后是不可更改的。
3.2 字符串实现
//luaconf.h:595 // 用于8字节对齐 #define LUAI_USER_ALIGNMENT_T union { double u; void *s; long l; } //limits.h:47 // 用于8字节对齐typedef LUAI_USER_ALIGNMENT_T L_Umaxalign; //lobject.h:199 typedef union TString { L_Umaxalign dummy; /* ensures maximum alignment for strings */ struct { CommonHeader; lu_byte reserved; //1:系统保留,不会在GC阶段回收 unsigned int hash; size_t len; } tsv; } TString; //lstate.h:68 typedef struct global_State { stringtable strt; /* hash table for strings */ //...... } global_State; //lstate.h:38 typedef struct stringtable { GCObject **hash; //哈希桶,每个槽又是一个GCObject *,数据TString使用链式存储 lu_int32 nuse; /* number of elements */ int size; } stringtable; //为了避免数据(TString)量太大导致查找退化成线性操作,需要重新散列: //lstring.c:22 void luaS_resize (lua_State *L, int newsize) { GCObject **newhash; stringtable *tb; int i; if (G(L)->gcstate == GCSsweepstring) return; /* cannot resize during GC traverse */ newhash = luaM_newvector(L, newsize, GCObject *); tb = &G(L)->strt; for (i=0; i<newsize; i++) newhash[i] = NULL; /* rehash 重新散列*/ for (i=0; i<tb->size; i++) { GCObject *p = tb->hash[i]; while (p) { /* for each node in the list */ GCObject *next = p->gch.next; /* save next */ unsigned int h = gco2ts(p)->hash; int h1 = lmod(h, newsize); /* new position */ lua_assert(cast_int(h%newsize) == lmod(h, newsize)); p->gch.next = newhash[h1]; /* chain it */ newhash[h1] = p; p = next; } } luaM_freearray(L, tb->hash, tb->size, TString *); //释放旧的散列桶 tb->size = newsize; tb->hash = newhash; }
有两处关于luaS_resize函数的调用:
//lgc.c:431 //这里会进行检查,如果此时桶的数量太大(利用率不到1/4 且大于 MINSTRTABSIZE * 2), //则会将散列桶数组减少为原来的一半 static void checkSizes (lua_State *L) { global_State *g = G(L); /* check size of string hash */ if (g->strt.nuse < cast(lu_int32, g->strt.size/4) && g->strt.size > MINSTRTABSIZE*2) luaS_resize(L, g->strt.size/2); /* table is too big */ /* check size of buffer */ if (luaZ_sizebuffer(&g->buff) > LUA_MINBUFFER*2) { /* buffer too big? */ size_t newsize = luaZ_sizebuffer(&g->buff) / 2; luaZ_resizebuffer(L, &g->buff, newsize); } } //lstring.c:75 //分配一个新的字符串 TString *luaS_newlstr (lua_State *L, const char *str, size_t l) { GCObject *o; unsigned int h = cast(unsigned int, l); /* seed */ size_t step = (l>>5)+1; /* if string is too long, don't hash all its chars */ size_t l1; for (l1=l; l1>=step; l1-=step) /* compute hash */ h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1])); for (o = G(L)->strt.hash[lmod(h, G(L)->strt.size)]; o != NULL; o = o->gch.next) { TString *ts = rawgco2ts(o); if (ts->tsv.len == l && (memcmp(str, getstr(ts), l) == 0)) { /* string may be dead: 需要更改为不需要在GC阶段回收*/ if (isdead(G(L), o)) changewhite(o); return ts; //该字符串已经存在,直接返回结果 } } return newlstr(L, str, l, h); /* not found: 分配新的字符串*/ } //lstring.c:50 //分配一个新的字符串 static TString *newlstr (lua_State *L, const char *str, size_t l, unsigned int h) { TString *ts; stringtable *tb; if (l+1 > (MAX_SIZET - sizeof(TString))/sizeof(char)) luaM_toobig(L); ts = cast(TString *, luaM_malloc(L, (l+1)*sizeof(char)+sizeof(TString))); ts->tsv.len = l; ts->tsv.hash = h; ts->tsv.marked = luaC_white(G(L)); ts->tsv.tt = LUA_TSTRING; ts->tsv.reserved = 0; memcpy(ts+1, str, l*sizeof(char)); ((char *)(ts+1))[l] = '