计划每天花1小时学习Redis 源码。在博客上做个记录。
1 typedef struct dict { 2 dictType *type; 3 void *privdata; 4 dictht ht[2]; 5 int rehashidx; /* rehashing not in progress if rehashidx == -1 */ 6 int iterators; /* number of iterators currently running */ 7 } dict;
1 typedef struct dictht { 2 dictEntry **table; 3 unsigned long size; 4 unsigned long sizemask; 5 unsigned long used; 6 } dictht;
1 typedef struct dictEntry { 2 void *key; 3 union { 4 void *val; 5 uint64_t u64; 6 int64_t s64; 7 } v; 8 struct dictEntry *next; 9 } dictEntry;
一个字典有两个哈希表, 冲突后采用了链地址法,很好理解。
#define dictGetKey(he) ((he)->key) #define dictGetVal(he) ((he)->v.val) #define dictGetSignedIntegerVal(he) ((he)->v.s64) #define dictGetUnsignedIntegerVal(he) ((he)->v.u64)
1 /* And a case insensitive hash function (based on djb hash) */ 2 unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len) { 3 unsigned int hash = (unsigned int)dict_hash_function_seed; 4 5 while (len--) 6 hash = ((hash << 5) + hash) + (tolower(*buf++)); /* hash * 33 + c */ 7 return hash; 8 }
The magic of number 33 (why it works better than many other constants, prime or not) has never been adequately explained.
1 dict *dictCreate(dictType *type, void *privDataPtr){ 2 dict *d = malloc(sizeof(*d)); 3 _dictInit(d,type,privDataPtr); 4 return d; 5 } 6 7 int _dictInit(dict *d, dictType *type, void *privDataPtr){ 8 _dictReset(&d->ht[0]); 9 _dictReset(&d->ht[1]); 10 11 d->type = type; 12 d->privdata = privDataPtr; 13 d->rehashidx = -1; 14 d->iterators = 0; 15 16 return DICT_OK; 17 } 18 19 static void _dictReset(dictht *ht){ 20 ht->table = NULL; 21 ht->size = 0; 22 ht->sizemask = 0; 23 ht->used = 0; 24 }
对字典进行紧缩处理,让 哈希表中的数/哈希表长度接近1:
1 int dictResize(dict *d){ 2 int minimal; 3 4 if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR; 5 6 minimal = d->ht[0].used; 7 8 if (minimal < DICT_HT_INITIAL_SIZE) 9 minimal = DICT_HT_INITIAL_SIZE; 10 11 return dictExpand(d, minimal); 12 } 13 14 #define dictIsRehashing(ht) ((ht)->rehashidx != -1) 15 #define DICT_HT_INITIAL_SIZE 4
1 static unsigned long _dictNextPower(unsigned long size){ 2 unsigned long i = DICT_HT_INITIAL_SIZE; 3 4 if (size >= LONG_MAX) return LONG_MAX; 5 while(1) { 6 if (i >= size) 7 return i; 8 i *= 2; 9 } 10 } 11 12 int dictExpand(dict *d, unsigned long size){ 13 dictht n; /* the new hash table */ 14 15 unsigned long realsize = _dictNextPower(size); 16 17 /* the size is invalid if it is smaller than the number of 18 * elements already inside the hash table */ 19 if (dictIsRehashing(d) || d->ht[0].used > size) 20 return DICT_ERR; 21 22 /* Allocate the new hash table and initialize all pointers to NULL */ 23 n.size = realsize; 24 n.sizemask = realsize-1; 25 n.table = zcalloc(realsize*sizeof(dictEntry*)); 26 n.used = 0; 27 28 /* Is this the first initialization? If so it's not really a rehashing 29 * we just set the first hash table so that it can accept keys. */ 30 if (d->ht[0].table == NULL) { 31 d->ht[0] = n; 32 return DICT_OK; 33 } 34 35 /* Prepare a second hash table for incremental rehashing */ 36 d->ht[1] = n; 37 d->rehashidx = 0; 38 39 return DICT_OK; 40 }
新建了一个哈希表n,size是扩展后的size,ht[0].table 为空说明这是第一次初始化,不是扩展,直接赋值。
ht[0].table 不为空,说明这是一次扩展,把n赋给ht[1],ReHash标志rehashix也被设为0.
1 /* Add an element to the target hash table */ 2 int dictAdd(dict *d, void *key, void *val){ 3 dictEntry *entry = dictAddRaw(d,key); 4 5 if (!entry) return DICT_ERR; 6 dictSetVal(d, entry, val); 7 return DICT_OK; 8 }
1 dictEntry *dictAddRaw(dict *d, void *key){ 2 int index; 3 dictEntry *entry; 4 dictht *ht; 5 6 if (dictIsRehashing(d)) _dictRehashStep(d); 7 8 /* Get the index of the new element, or -1 if 9 * the element already exists. */ 10 if ((index = _dictKeyIndex(d, key)) == -1) 11 return NULL; 12 13 /* Allocate the memory and store the new entry */ 14 ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0]; 15 entry = zmalloc(sizeof(*entry)); 16 entry->next = ht->table[index]; 17 ht->table[index] = entry; 18 ht->used++; 19 20 /* Set the hash entry fields. */ 21 dictSetKey(d, entry, key); 22 return entry; 23 }
1 /* Returns the index of a free slot that can be populated with 2 * an hash entry for the given 'key'. 3 * If the key already exists, -1 is returned. 4 * 5 * Note that if we are in the process of rehashing the hash table, the 6 * index is always returned in the context of the second (new) hash table. */ 7 static int _dictKeyIndex(dict *d, const void *key){ 8 unsigned int h, idx, table; 9 dictEntry *he; 10 11 /* Expand the hash table if needed */ 12 if (_dictExpandIfNeeded(d) == DICT_ERR) 13 return -1; 14 /* Compute the key hash value */ 15 h = dictHashKey(d, key); 16 for (table = 0; table <= 1; table++) { 17 idx = h & d->ht[table].sizemask; 18 /* Search if this slot does not already contain the given key */ 19 he = d->ht[table].table[idx]; 20 while(he) { 21 if (dictCompareKeys(d, key, he->key)) 22 return -1; 23 he = he->next; 24 } 25 if (!dictIsRehashing(d)) break; 26 } 27 return idx; 28 }
1 /* Expand the hash table if needed */ 2 static int _dictExpandIfNeeded(dict *d){ 3 /* Incremental rehashing already in progress. Return. */ 4 if (dictIsRehashing(d)) return DICT_OK; 5 6 /* If the hash table is empty expand it to the initial size. */ 7 if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE); 8 9 /* If we reached the 1:1 ratio, and we are allowed to resize the hash 10 * table (global setting) or we should avoid it but the ratio between 11 * elements/buckets is over the "safe" threshold, we resize doubling 12 * the number of buckets. */ 13 if (d->ht[0].used >= d->ht[0].size && 14 (dict_can_resize || 15 d->ht[0].used/d->ht[0].size > dict_force_resize_ratio)) 16 { 17 return dictExpand(d, d->ht[0].used*2); 18 } 19 return DICT_OK; 20 }
dict_can_resize是个全局变量。dict_force_resize_ratio = 5.
/* Using dictEnableResize() / dictDisableResize() we make possible to * enable/disable resizing of the hash table as needed. This is very important * for Redis, as we use copy-on-write and don't want to move too much memory * around when there is a child performing saving operations. * * Note that even when dict_can_resize is set to 0, not all resizes are * prevented: an hash table is still allowed to grow if the ratio between * the number of elements and the buckets > dict_force_resize_ratio. */
1 void dictEnableResize(void) { 2 dict_can_resize = 1; 3 } 4 5 void dictDisableResize(void) { 6 dict_can_resize = 0; 7 }
字典的 rehash 操作实际上就是执行以下任务:
- 创建一个比 ht[0]->table 更大的 ht[1]->table ;
- 将 ht[0]->table 中的所有键值对迁移到 ht[1]->table ;
- 将原有 ht[0] 的数据清空,并将 ht[1] 替换为新的 ht[0] ;
经过以上步骤之后, 程序就在不改变原有键值对数据的基础上, 增大了哈希表的大小。
1 int dictRehash(dict *d, int n) { 2 if (!dictIsRehashing(d)) return 0; 3 4 while(n--) { 5 dictEntry *de, *nextde; 6 7 /* Check if we already rehashed the whole table... */ 8 if (d->ht[0].used == 0) { 9 zfree(d->ht[0].table); 10 d->ht[0] = d->ht[1]; 11 _dictReset(&d->ht[1]); 12 d->rehashidx = -1; 13 return 0; 14 } 15 16 /* Note that rehashidx can't overflow as we are sure there are more 17 * elements because ht[0].used != 0 */ 18 assert(d->ht[0].size > (unsigned)d->rehashidx); 19 while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++; 20 de = d->ht[0].table[d->rehashidx]; 21 /* Move all the keys in this bucket from the old to the new hash HT */ 22 while(de) { 23 unsigned int h; 24 25 nextde = de->next; 26 /* Get the index in the new hash table */ 27 h = dictHashKey(d, de->key) & d->ht[1].sizemask; 28 de->next = d->ht[1].table[h]; 29 d->ht[1].table[h] = de; 30 d->ht[0].used--; 31 d->ht[1].used++; 32 de = nextde; 33 } 34 d->ht[0].table[d->rehashidx] = NULL; 35 d->rehashidx++; 36 } 37 return 1; 38 }
执行过程中,ht[0]中的元素如果都已经转到了ht[1]中,即ht[0].used == 0,停止执行,释放ht[0].table指向的空间,ht[1]变为ht[0],将rehashidx置为-1。
1 341 adlist.c 2 93 adlist.h 3 810 dict.c 4 173 dict.h 5 732 sds.c 6 99 sds.h 7 2248 total
主要参考了《Redis 设计与实现》 。谢谢90后作者了。