• redis源码学习_字典


    redis中字典有以下要点:

    (1)它就是一个键值对,对于hash冲突的处理采用了头插法的链式存储来解决。

    (2)对rehash,扩展就是取第一个大于等于used * 2的2 ^ n的数作为新的hash表大小;缩紧就是取第一个大于等于used的2 ^ n的数作为新的hash表大小。后面会介绍到dict结构体中是有dictht ht[2]这个成员变量的,为什么是2个呢?就是为了做rehash时交替使用的。那么何时扩展,何时缩紧呢?有个负载因子的概念(负载因子 = used / size,注意这里面的used也是包括了链式存储解决冲突时候的元素个数),没有执行BGSAVE或者BGREWRITEAOF时大于1就会扩展,小于0.1就会缩紧。搞的时候会用渐进式rehash的方法来搞,再此过程中对于字典的删、改、查会在2个ht上面进行,而增只会在新的ht上进行。

    主要关注dict.c和dici.h。

    首先先看一下结构体dictht:

     1 /* This is our hash table structure. Every dictionary has two of this as we
     2  * implement incremental rehashing, for the old to the new table. */
     3 typedef struct dictht {
     4     //每个具体table[i]中的节点数据类型是dictEntry 结构表示, 每个 dictEntry 结构都保存着一个键值对:
     5     dictEntry **table;
     6 
     7     // 哈希表大小
     8     unsigned long size;
     9     
    10     // 哈希表大小掩码,用于计算索引值,总是等于 size - 1
    11     unsigned long sizemask; 
    12 
    13     // 该哈希表已有节点的数量
    14     unsigned long used;
    15 
    16 } dictht;

    再看结构体dictEntry:

     1 typedef struct dictEntry { 
     2     //
     3     void *key;
     4     
     5     //
     6     union {
     7         void *val;
     8         uint64_t u64;
     9         int64_t s64;
    10     } v;
    11 
    12     
    13     //next 属性是指向另一个哈希表节点的指针, 这个指针可以将多个哈希值相同的键值对连接在一次, 以此来解决键冲突(collision)的问题。
    14     struct dictEntry *next;
    15 
    16 } dictEntry;

    对于k1和k0值相等的情况下,见下图:

     再来看结构体dict:

     1 typedef struct dict {//dictCreate创建和初始化
     2     // 类型特定函数,实现多态
     3     dictType *type;
     4 
     5     // 私有数据 
     6     void *privdata;
     7 
     8     // 哈希表
     9     dictht ht[2];
    10 
    11     // rehash 索引
    12     int rehashidx; /* rehashing not in progress if rehashidx == -1 */
    13 
    14     // 目前正在运行的安全迭代器的数量
    15     int iterators; /* number of iterators currently running */
    16 
    17 } dict; 

    再看结构体dictType:

     1 typedef struct dictType {
     2     unsigned int (*hashFunction)(const void *key);
     3 
     4     void *(*keyDup)(void *privdata, const void *key);
     5 
     6     void *(*valDup)(void *privdata, const void *obj);
     7 
     8     int (*keyCompare)(void *privdata, const void *key1, const void *key2);
     9 
    10     void (*keyDestructor)(void *privdata, void *key);
    11     
    12     void (*valDestructor)(void *privdata, void *obj);
    13 
    14 } dictType;

    里面就是各种函数指针。

     

    dictCreate:创建字典,比较简单,不过多解释

     1 /* Create a new hash table */
     2 dict *dictCreate(dictType *type,
     3         void *privDataPtr)
     4 {
     5     dict *d = zmalloc(sizeof(*d));
     6 
     7     _dictInit(d,type,privDataPtr);
     8 
     9     return d;
    10 }

    _dictInit:初始化操作

     1 /* Initialize the hash table */
     2 int _dictInit(dict *d, dictType *type,
     3         void *privDataPtr)
     4 {
     5     // 初始化两个哈希表的各项属性值
     6     // 但暂时还不分配内存给哈希表数组
     7     _dictReset(&d->ht[0]);
     8     _dictReset(&d->ht[1]);
     9 
    10     // 设置类型特定函数
    11     d->type = type;
    12 
    13     // 设置私有数据
    14     d->privdata = privDataPtr;
    15 
    16     // 设置哈希表 rehash 状态
    17     d->rehashidx = -1;
    18 
    19     // 设置字典的安全迭代器数量
    20     d->iterators = 0;
    21 
    22     return DICT_OK;
    23 }

    _dictReset:赋初值

    1 static void _dictReset(dictht *ht)
    2 {
    3     ht->table = NULL;
    4     ht->size = 0;
    5     ht->sizemask = 0;
    6     ht->used = 0;
    7 }

     dictAdd:添加k-v到字典中,调用关系比较乱,我们来看一下

    dictAdd

      -->dictAddRaw

        -->_dictRehashStep

          -->dictRehash

        -->_dictKeyIndex

          -->_dictExpandIfNeeded

            -->dictExpand

              -->_dictNextPower

        -->dictSetKey

      -->dictSetVal

     有了调用关系就好办了,下面来分别分析以上函数都做了什么。

    _dictRehashStep:分步rehash包裹函数

    1 static void _dictRehashStep(dict *d) {
    2     if (d->iterators == 0) dictRehash(d,1);
    3 }

    dictRehash:分步rehash

     1 int dictRehash(dict *d, int n) { 
     2     // 只可以在 rehash 进行中时执行
     3     if (!dictIsRehashing(d)) return 0;
     4 
     5     // 进行 N 步迁移
     6     while(n--) {
     7         dictEntry *de, *nextde;
     8 
     9         /* Check if we already rehashed the whole table... */
    10         // 如果 0 号哈希表为空,那么表示 rehash 执行完毕
    11         if (d->ht[0].used == 0) {
    12             zfree(d->ht[0].table);
    13             d->ht[0] = d->ht[1];
    14             _dictReset(&d->ht[1]);
    15             d->rehashidx = -1;
    16             return 0;
    17         }
    18 
    19         /* Note that rehashidx can't overflow as we are sure there are more
    20          * elements because ht[0].used != 0 */
    21         assert(d->ht[0].size > (unsigned)d->rehashidx);
    22 
    23         // 略过数组中为空的索引,找到下一个非空索引
    24         while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;
    25 
    26         // 指向该索引的链表表头节点
    27         de = d->ht[0].table[d->rehashidx];
    28         /* Move all the keys in this bucket from the old to the new hash HT */
    29         while(de) { 
    30             unsigned int h;
    31 
    32             nextde = de->next;
    33 
    34             /* Get the index in the new hash table */
    35             h = dictHashKey(d, de->key) & d->ht[1].sizemask;
    36 
    37             de->next = d->ht[1].table[h];
    38             d->ht[1].table[h] = de;
    39 
    40             // 更新计数器
    41             d->ht[0].used--;
    42             d->ht[1].used++;
    43 
    44             // 继续处理下个节点
    45             de = nextde;
    46         }
    47         // 将刚迁移完的哈希表索引的指针设为空
    48         d->ht[0].table[d->rehashidx] = NULL;
    49         // 更新 rehash 索引
    50         d->rehashidx++;
    51     }
    52 
    53     return 1;
    54 }

    dictExpand:扩充函数,是否需要rehash的标志rehashidx也是从这里面搞的,这样它就不为-1了。

     1 /* Expand or create the hash table */
     2 int dictExpand(dict *d, unsigned long size)
     3 {
     4     // 新哈希表
     5     dictht n; /* the new hash table */
     6 
     7     unsigned long realsize = _dictNextPower(size);
     8 
     9     /* the size is invalid if it is smaller than the number of
    10      * elements already inside the hash table */
    11     if (dictIsRehashing(d) || d->ht[0].used > size)
    12         return DICT_ERR;
    13 
    14     /* Allocate the new hash table and initialize all pointers to NULL */
    15     n.size = realsize;
    16     n.sizemask = realsize-1;
    17     n.table = zcalloc(realsize*sizeof(dictEntry*));
    18     n.used = 0;
    19  
    20     //下面是要区分2种情况的,需要注意了
    21 
    22     /* Is this the first initialization? If so it's not really a rehashing
    23      * we just set the first hash table so that it can accept keys. */
    24     // 如果 0 号哈希表为空,那么这是一次初始化:
    25     if (d->ht[0].table == NULL) {
    26         d->ht[0] = n;
    27         return DICT_OK;
    28     }
    29 
    30     /* Prepare a second hash table for incremental rehashing */
    31     // 如果 0 号哈希表非空,那么这是一次 rehash :
    32     // 程序将新哈希表设置为 1 号哈希表,
    33     // 并将字典的 rehash 标识打开,让程序可以开始对字典进行 rehash
    34     d->ht[1] = n;
    35     d->rehashidx = 0;
    36     return DICT_OK;
    37 
    38 }

    dictAddRaw:添加新的键到字典

     1 /* Low level add. This function adds the entry but instead of setting
     2  * a value returns the dictEntry structure to the user, that will make
     3  * sure to fill the value field as he wishes.
     4  *
     5  * This function is also directly exposed to user API to be called
     6  * mainly in order to store non-pointers inside the hash value, example:
     7  *
     8  * entry = dictAddRaw(dict,mykey);
     9  * if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
    10  *
    11  * Return values:
    12  *
    13  * If key already exists NULL is returned.
    14  * If key was added, the hash entry is returned to be manipulated by the caller.
    15  */
    16 dictEntry *dictAddRaw(dict *d, void *key)
    17 {
    18     int index;
    19     dictEntry *entry;
    20     dictht *ht;
    21 
    22     if (dictIsRehashing(d)) _dictRehashStep(d);
    23 
    24     /* Get the index of the new element, or -1 if
    25      * the element already exists. */
    26     if ((index = _dictKeyIndex(d, key)) == -1)
    27         return NULL;
    28 
    29     /* Allocate the memory and store the new entry */
    30     ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
    31     entry = zmalloc(sizeof(*entry));
    32     entry->next = ht->table[index];
    33     ht->table[index] = entry;
    34     ht->used++;
    35 
    36     /* Set the hash entry fields. */
    37     dictSetKey(d, entry, key);
    38 
    39     return entry;
    40 }

    dictAdd:添加新的键值对到字典

     1 /* Add an element to the target hash table */   
     2 int dictAdd(dict *d, void *key, void *val)
     3 {
     4     dictEntry *entry = dictAddRaw(d,key);
     5 
     6     // 键已存在,添加失败
     7     if (!entry) return DICT_ERR;
     8 
     9     // 键不存在,设置节点的值
    10     dictSetVal(d, entry, val);
    11 
    12     return DICT_OK;
    13 }

    dictReplace:更新键值对,原来没有就添加,原来有就更新值

     1 /* Add an element, discarding the old if the key already exists.
     2  * Return 1 if the key was added from scratch, 0 if there was already an element with such key and dictReplace() just performed a value update operation. 
     3  */ 
     4 int dictReplace(dict *d, void *key, void *val)
     5 {
     6     dictEntry *entry, auxentry;
     7 
     8     /* Try to add the element. If the key
     9      * does not exists dictAdd will suceed. */
    10     if (dictAdd(d, key, val) == DICT_OK)
    11         return 1;
    12 
    13     /* It already exists, get the entry */
    14     entry = dictFind(d, key);
    15    
    16     auxentry = *entry;
    17 
    18     dictSetVal(d, entry, val);
    19 
    20     dictFreeVal(d, &auxentry);
    21 
    22     return 0;
    23 }

     dictGenericDelete:删除指定key对应的一个元素

     1 /* Search and remove an element */
     2 static int dictGenericDelete(dict *d, const void *key, int nofree)
     3 {
     4     unsigned int h, idx;
     5     dictEntry *he, *prevHe;
     6     int table;
     7 
     8     if (d->ht[0].size == 0) return DICT_ERR; /* d->ht[0].table is NULL */
     9 
    10     if (dictIsRehashing(d)) _dictRehashStep(d);
    11 
    12     // 计算哈希值
    13     h = dictHashKey(d, key);
    14 
    15     // 遍历哈希表
    16     for (table = 0; table <= 1; table++) {
    17 
    18         // 计算索引值 
    19         idx = h & d->ht[table].sizemask;
    20         // 指向该索引上的链表,就是链表的第一个元素
    21         he = d->ht[table].table[idx];
    22         prevHe = NULL;
    23 
    24         while(he) {
    25         
    26             if (dictCompareKeys(d, key, he->key)) {
    27                
    28                 /* Unlink the element from the list */
    29                 // 就是删除一个就得了
    30                 if (prevHe)
    31                     prevHe->next = he->next;
    32                 else
    33                     d->ht[table].table[idx] = he->next;
    34 
    35                 if (!nofree) {
    36                     dictFreeKey(d, he);
    37                     dictFreeVal(d, he);
    38                 }
    39                 
    40                 zfree(he);
    41 
    42                 d->ht[table].used--;
    43 
    44                 return DICT_OK;
    45             }
    46 
    47             prevHe = he;
    48             he = he->next;
    49         }
    50 
    51         if (!dictIsRehashing(d)) break;
    52     }
    53 
    54     return DICT_ERR; /* not found */
    55 }

     _dictClear:释放hash

     1 /* Destroy an entire dictionary */
     2 int _dictClear(dict *d, dictht *ht, void(callback)(void *)) {
     3     unsigned long i;
     4 
     5     /* Free all the elements */
     6     for (i = 0; i < ht->size && ht->used > 0; i++) {
     7         dictEntry *he, *nextHe;
     8 
     9         if (callback && (i & 65535) == 0) callback(d->privdata); 
    10 
    11         // 跳过空索引
    12         if ((he = ht->table[i]) == NULL) continue;
    13 
    14         // 遍历整个链表
    15         while(he) {
    16             nextHe = he->next;
    17             dictFreeKey(d, he);
    18             dictFreeVal(d, he);
    19             zfree(he);
    20 
    21             ht->used--;
    22 
    23             he = nextHe;
    24         }
    25     }
    26 
    27     /* Free the table and the allocated cache structure */
    28     zfree(ht->table);
    29 
    30     /* Re-initialize the table */
    31     _dictReset(ht);
    32 
    33     return DICT_OK; /* never fails */
    34 }

     关于dictGetIterator、dictGetSafeIterator、dictNext、dictReleaseIterator具体应用暂时分析一下:

    首先先看用到这几个函数的地方吧

     1 void keysCommand(redisClient *c) {
     2     dictIterator *di;
     3     dictEntry *de;
     4 
     5     sds pattern = c->argv[1]->ptr;
     6 
     7     int plen = sdslen(pattern), allkeys;
     8     unsigned long numkeys = 0;
     9     void *replylen = addDeferredMultiBulkLength(c);
    10 
    11     /* 首先先搞一个迭代器出来 */
    12     di = dictGetSafeIterator(c->db->dict);
    13     allkeys = (pattern[0] == '*' && pattern[1] == '');
    14     
    15     /* 开始遍历迭代器 */
    16     while((de = dictNext(di)) != NULL) {
    17         sds key = dictGetKey(de);
    18         robj *keyobj;
    19 
    20         if (allkeys || stringmatchlen(pattern,plen,key,sdslen(key),0)) {
    21 
    22             keyobj = createStringObject(key,sdslen(key));
    23 
    24             if (expireIfNeeded(c->db,keyobj) == 0) {
    25                 addReplyBulk(c,keyobj);
    26                 numkeys++;
    27             }
    28 
    29             decrRefCount(keyobj);
    30         }
    31     }
    32 
    33     /* 释放迭代器 */          
    34     dictReleaseIterator(di);
    35 
    36     setDeferredMultiBulkLength(c,replylen,numkeys);
    37 }

    有了上面的应用场景,我们再来看上面几个函数的实现就好办了。

    先看dictIterator结构体:

     1 /* If safe is set to 1 this is a safe iterator, that means, you can call
     2  * dictAdd, dictFind, and other functions against the dictionary even while
     3  * iterating. Otherwise it is a non safe iterator, and only dictNext()
     4  * should be called while iterating. */
     5 typedef struct dictIterator {
     6         
     7     // 被迭代的字典
     8     dict *d;
     9 
    10     // table :正在被迭代的哈希表号码,值可以是 0 或 1 。  dictht ht[2];中的哪一个
    11     // index :迭代器当前所指向的哈希表索引位置。  对应具体的桶槽位号
    12     // safe :标识这个迭代器是否安全
    13     int table, index, safe;
    14 
    15     // entry :当前迭代到的节点的指针
    16     // nextEntry :当前迭代节点的下一个节点
    17     dictEntry *entry, *nextEntry;
    18 
    19     long long fingerprint; /* unsafe iterator fingerprint for misuse detection */
    20 } dictIterator;

    dictGetIterator:创建迭代器

     1 dictIterator *dictGetIterator(dict *d)
     2 {
     3     dictIterator *iter = zmalloc(sizeof(*iter));
     4 
     5     iter->d = d;
     6     iter->table = 0;
     7     iter->index = -1;
     8     iter->safe = 0;
     9     iter->entry = NULL;
    10     iter->nextEntry = NULL;
    11 
    12     return iter;
    13 }

    dictNext:迭代,遍历桶中的所有节点,第一次遍历该函数的时候直接获取具体桶首元素,否则使用nextEntry。这个函数比较难理解……

     1 dictEntry *dictNext(dictIterator *iter)
     2 {
     3     while (1) {
     4 
     5         // 进入这个循环有两种可能:
     6         // 1) 这是迭代器第一次运行
     7         // 2) 当前索引链表中的节点已经迭代完(NULL 为链表的表尾)
     8         if (iter->entry == NULL) {
     9 
    10             // 指向被迭代的哈希表
    11             dictht *ht = &iter->d->ht[iter->table];
    12 
    13             // 初次迭代时执行
    14             if (iter->index == -1 && iter->table == 0) {
    15                 // 如果是安全迭代器,那么更新安全迭代器计数器
    16                 if (iter->safe)
    17                     iter->d->iterators++;
    18                 // 如果是不安全迭代器,那么计算指纹
    19                 else
    20                     iter->fingerprint = dictFingerprint(iter->d);
    21             }
    22             // 更新索引
    23             iter->index++; //默认值为-1,自加后刚好为0,即第一个桶
    24 
    25             // 如果迭代器的当前索引大于当前被迭代的哈希表的大小
    26             // 那么说明这个哈希表已经迭代完毕
    27             if (iter->index >= (signed) ht->size) {
    28                 // 如果正在 rehash 的话,那么说明 1 号哈希表也正在使用中
    29                 // 那么继续对 1 号哈希表进行迭代
    30                 if (dictIsRehashing(iter->d) && iter->table == 0) {
    31                     iter->table++;
    32                     iter->index = 0;
    33                     ht = &iter->d->ht[1];
    34                 // 如果没有 rehash ,那么说明迭代已经完成
    35                 } else {
    36                     break;
    37                 }
    38             }
    39 
    40             // 如果进行到这里,说明这个哈希表并未迭代完
    41             // 更新节点指针,指向下个索引链表的表头节点
    42             iter->entry = ht->table[iter->index];
    43         } else {
    44             // 执行到这里,说明程序正在迭代某个链表
    45             // 将节点指针指向链表的下个节点
    46             iter->entry = iter->nextEntry;
    47         }
    48 
    49         if (iter->entry) {
    50             /* We need to save the 'next' here, the iterator user
    51              * may delete the entry we are returning. */
    52             iter->nextEntry = iter->entry->next;
    53             return iter->entry;
    54         }
    55     }
    56 
    57     // 迭代完毕
    58     return NULL;
    59 }

    dictReleaseIterator:释放迭代器,没有什么好说的了

     1 void dictReleaseIterator(dictIterator *iter)
     2 {
     3 
     4     if (!(iter->index == -1 && iter->table == 0)) {
     5         // 释放安全迭代器时,安全迭代器计数器减一
     6         if (iter->safe)
     7             iter->d->iterators--;
     8         // 释放不安全迭代器时,验证指纹是否有变化
     9         else
    10             assert(iter->fingerprint == dictFingerprint(iter->d));
    11     }
    12     zfree(iter);
    13 }

    关于迭代器里面,安全和非安全迭代器还不是特别明白;另外dictScan暂时没有深入研究,后续再深入研究吧。

  • 相关阅读:
    MongoDB查询语句 (增、删、改、查)
    MongoDB简单查询语句
    jquery Select Change事件
    c# 远程监控(4) 接收端 RTP包重组 分屏显示
    c# 远程监控(3) RTP协议 RTP.NET.DLL
    c# 远程监控(1) 大纲
    c# 远程监控(2) 摄像头调研及模拟
    TortoiseGit记住用户名和密码
    winform ListView和DataGridView实现分页
    制作符合平台的CodeSmith代码生产模版
  • 原文地址:https://www.cnblogs.com/abc-begin/p/7535379.html
Copyright © 2020-2023  润新知