• redis 5.0.2 源码阅读——字典dict


    redis中字典相关的文件为:dict.h与dict.c

    与其说是一个字典,道不如说是一个哈希表。

    一、数据结构

    1.1 dictEntry结构体

     1 /**
     2  * dictEntry是一个kv对的单向链表,其中v是一个联合体,支持数字,或者是指向一块内存的指针。
     3  */
     4 typedef struct dictEntry {
     5     void *key;
     6     union {
     7         void *val;
     8         uint64_t u64;
     9         int64_t s64;
    10         double d;
    11     } v;
    12     struct dictEntry *next;
    13 } dictEntry;

    具体结构形如

     1 /*
     2 +---------------+
     3 |void *key      |
     4 +---------------+
     5 |union{...} v   |
     6 +---------------+
     7 |dictEntry *next|---+
     8 +---------------+   |
     9                     |
    10 +---------------+ <-+
    11 |void *key      |
    12 +---------------+
    13 |union{...} v   |
    14 +---------------+
    15 |dictEntry *next|
    16 +---------------+
    17 */

    为了节约篇幅,后续用以下结构表示

    1 /*
    2 +---+  +---+
    3 |K|V|->|K|V|->NULL
    4 +---+  +---+
    5 */

    1.2 distht结构体

     1 /**
     2  * This is our hash table structure. Every dictionary has two of this as we
     3  * implement incremental rehashing, for the old to the new table.
     4  * 这是我们的哈希表结构。 每个字典都有两个这样的,因为我们实现了增量重新散列,从旧表到新表。
     5  * 使用开链法解决冲突问题
     6  *
     7  * 其中,table指向大小为sizeof(dictEntry*) * size的一片内存空间,每个dictEntry*可以视为一个bucket,
     8  * 每个bucket下挂着一个dictEntry单向链表。
     9  * size的值始终为2的位数,而sizemask的值始终为size-1,其作用是决定kv对要挂在哪个bucket上。
    10  * 举个例子,size=4时,sizemask=3,其二进制为 0011,若通过hash函数计算出来key对应的hash值hash_value为5,
    11  * 二进制为0101,则通过位运算 sizemask & hash_value = 0011 & 0101 = 0001,十进制为1,则将会挂在idx = 1的bucket上。
    12  */
    13 typedef struct dictht {
    14     //dictEntry*类型的数组
    15     dictEntry **table;
    16     //dictEntry*数组的长度
    17     unsigned long size;
    18     /**
    19      * 这样写可能更容易理解
    20      * const unsigned long size = 4;
    21      * dictEntry *table[size];
    22      */
    23 
    24     //sizemask,始终为size-1
    25     unsigned long sizemask;
    26 
    27     //当前总dictEntry数量
    28     unsigned long used;
    29 } dictht;

    dictht是一个hash table,整体结构大致为

     1 /*
     2 +----------------------+   +---> +-----------------+  +---+
     3 |dictEntry **table     |---+     |dictEntry *bucket|->|K|V|->NULL
     4 +----------------------+         +-----------------+  +---+
     5 |unsigned long size = 4|         |dictEntry *bucket|->NULL
     6 +----------------------+         +-----------------+
     7 |unsigned long sizemask|         |dictEntry *bucket|->NULL
     8 +----------------------+         +-----------------+
     9 |unsigned long used    |         |dictEntry *bucket|->NULL
    10 +----------------------+         +-----------------+
    11 */

    1.3 dictType结构体

     1 /**
     2  * dictType用于自定义一些操作的方法,如hash函数、拷贝key、拷贝value、比较key、销毁key、销毁value。
     3  */
     4 typedef struct dictType {
     5     uint64_t (*hashFunction)(const void *key);
     6     void *(*keyDup)(void *privdata, const void *key);
     7     void *(*valDup)(void *privdata, const void *obj);
     8     int (*keyCompare)(void *privdata, const void *key1, const void *key2);
     9     void (*keyDestructor)(void *privdata, void *key);
    10     void (*valDestructor)(void *privdata, void *obj);
    11 } dictType;

    1.4 dict结构体

     1 typedef struct dict {
     2     dictType *type;
     3     //type中函数的传入参数
     4     void *privdata;
     5     dictht ht[2];
     6     /**
     7      * rehashidx,是与ht[2]配合实现渐进式rehash操作的。若使用一步到位的方式,
     8      * 当key的数量非常大的时候,rehashing期间,是会卡死所有操作的。
     9      */
    10     long rehashidx; /* rehashing not in progress if rehashidx == -1 */
    11     /**
    12      * iterators,是用于记录当前使用的迭代器数量,与rehashing操作有关。
    13      */
    14     unsigned long iterators; /* number of iterators currently running */
    15 } dict;

    之前提到的dictType与dictht都是dict的成员变量。除此之外,还有privdata,是在创建dict的时候调用者传入,用于特定操作时回传给函数的。如

     1 /**
     2  * 利用宏定义实现函数的调用
     3  * 依次是设置有符号的信号值、设置无符号类型的信号值、设置double类型的值、
     4  *      释放key、设置key、判断key是否相等
     5  */
     6 #define dictSetSignedIntegerVal(entry, _val_) 
     7     do { (entry)->v.s64 = _val_; } while(0)
     8 
     9 #define dictSetUnsignedIntegerVal(entry, _val_) 
    10     do { (entry)->v.u64 = _val_; } while(0)
    11 
    12 #define dictSetDoubleVal(entry, _val_) 
    13     do { (entry)->v.d = _val_; } while(0)
    14 
    15 #define dictFreeKey(d, entry) 
    16     if ((d)->type->keyDestructor) 
    17         (d)->type->keyDestructor((d)->privdata, (entry)->key)
    18 
    19 #define dictSetKey(d, entry, _key_) do { 
    20     if ((d)->type->keyDup) 
    21         (entry)->key = (d)->type->keyDup((d)->privdata, _key_); 
    22     else 
    23         (entry)->key = (_key_); 
    24 } while(0)
    25 
    26 #define dictCompareKeys(d, key1, key2) 
    27     (((d)->type->keyCompare) ? 
    28         (d)->type->keyCompare((d)->privdata, key1, key2) : 
    29         (key1) == (key2))

    1.5 迭代器

    iterators,是用于记录当前使用的安全迭代器数量,与rehashing操作有关。

     1 /**
     2  * If safe is set to 1 this is a safe iterator, that means, you can call
     3  * dictAdd, dictFind, and other functions against the dictionary even while
     4  * iterating. Otherwise it is a non safe iterator, and only dictNext()
     5  * should be called while iterating.
     6  * 如果是个安全的迭代器,即safe == 1,则在迭代中可以调用dictAdd、dictFind等方法,否则只能调用dictNext。
     7  * index表示,ht[table]对应的bucket的idx。
     8  */
     9 typedef struct dictIterator {
    10     dict *d;
    11     long index;
    12     int table, safe;
    13     dictEntry *entry, *nextEntry;
    14     /* unsafe iterator fingerprint for misuse detection. */
    15     long long fingerprint;
    16 } dictIterator;

    整体结构如下:

     1 /*
     2 +---------+    /+-----------+   +-->+----------+  +---+
     3 |dictType*|   / |dictEntry**|---+   |dictEntry*|->|K|V|->NULL
     4 +---------+  /  +-----------+       +----------+  +---+
     5 |privdata | /   |size       |       |dictEntry*|->NULL
     6 +---------+/    +-----------+       +----------+
     7 |ht[2]    |     |sizemask   |       |dictEntry*|->NULL
     8 +---------+    +-----------+       +----------+
     9 |rehashidx|    |used       |       |dictEntry*|->NULL
    10 +---------+    +-----------+       +----------+
    11 |iterators|   
    12 +---------+    +-----------+
    13                 |dictEntry**|-->NULL
    14                 +-----------+
    15                 |size       |
    16                 +-----------+
    17                 |sizemask   |
    18                 +-----------+
    19                 |used       |
    20                 +-----------+
    21 */

    二、创建

    2.1 创建和复位函数

     1 /**
     2  * Reset a hash table already initialized with ht_init().
     3  * NOTE: This function should only be called by ht_destroy().
     4  * 重置一个使用ht_init函数初始化的哈希表
     5  * 只能通过ht_destroy函数调用
     6  */
     7 
     8 static void _dictReset(dictht *ht)
     9 {
    10     ht->table = NULL;
    11     ht->size = 0;
    12     ht->sizemask = 0;
    13     ht->used = 0;
    14 }
    15 
    16 /* Create a new hash table 创建一个新的哈希表*/
    17 dict *dictCreate(dictType *type,
    18         void *privDataPtr)
    19 {
    20     //分配内存空间
    21     dict *d = zmalloc(sizeof(*d));
    22 
    23     //对哈希表进行初始化
    24     _dictInit(d,type,privDataPtr);
    25     return d;
    26 }
    27 
    28 /* Initialize the hash table 哈希表的初始化*/
    29 int _dictInit(dict *d, dictType *type,
    30         void *privDataPtr)
    31 {
    32     _dictReset(&d->ht[0]);
    33     _dictReset(&d->ht[1]);
    34     d->type = type;
    35     d->privdata = privDataPtr;
    36     d->rehashidx = -1;
    37     d->iterators = 0;
    38     return DICT_OK;
    39 }

      可以调用dictCreate创建一个空的dict,它会分配好dict的空间,并初始化所有成员变量。在这里把privdata传入并保存。搜了一下整个redis源码的dictCreate调用,看到传入的值全为NULL。目前的理解暂时不清楚这个变量是什么时候赋值的。

    2.2 结构图

    初始化后的dict结构如下:

     1 /*
     2 +------------+    /+-----------+
     3 |dictType*   |   / |dictEntry**|-->NULL
     4 +------------+  /  +-----------+
     5 |privdata    | /   |size=0     |
     6 +------------+/    +-----------+
     7 |ht[2]       |     |sizemask=0 |
     8 +------------+    +-----------+
     9 |rehashidx=-1|    |used=0     |
    10 +------------+    +-----------+
    11 |iterators=0 |   
    12 +------------+    +-----------+
    13                    |dictEntry**|-->NULL
    14                    +-----------+
    15                    |size=0     |
    16                    +-----------+
    17                    |sizemask=0 |
    18                    +-----------+
    19                    |used=0     |
    20                    +-----------+
    21 */

    刚创建好的dict是存不了任何数据的,其两个hash table的size都为0

    2.3 resize函数

     1 /**
     2  * Resize the table to the minimal size that contains all the elements,
     3  * but with the invariant of a USED/BUCKETS ratio near to <= 1
     4  * 重新设置哈希表的大小,重新设置后的大小能保存所有的元素
     5  * 保持used/buckets的比例<=1不变
     6 */
     7 int dictResize(dict *d)
     8 {
     9     int minimal;
    10 
    11     /**
    12      * #define dictIsRehashing(d) ((d)->rehashidx != -1)
    13      * 当dict_can_resize为0或(d)->rehashidx不为-1时,直接返回1,也就是失败
    14      */
    15     if (!dict_can_resize || dictIsRehashing(d))
    16         return DICT_ERR;
    17     //得到当前元素的个数
    18     minimal = d->ht[0].used;
    19     if (minimal < DICT_HT_INITIAL_SIZE) //DICT_HT_INITIAL_SIZE:4
    20         minimal = DICT_HT_INITIAL_SIZE;
    21     return dictExpand(d, minimal);
    22 }
    23 
    24 /**
    25  * Expand or create the hash table
    26  * 扩容或者创建哈希表
    27  * d:原来的封装哈希表
    28  * size:期望的哈希表桶数
    29  *
    30  * 这个函数主要是产生一个新的HASH表(dictht),并让将dict.rehashidx= 0。表示開始进行rehash动作
    31  */
    32 int dictExpand(dict *d, unsigned long size)
    33 {
    34     /* the size is invalid if it is smaller than the number of
    35      * elements already inside the hash table */
    36     if (dictIsRehashing(d) || d->ht[0].used > size)
    37         return DICT_ERR;
    38 
    39     dictht n; /* the new hash table */
    40     //得到合适大小的哈希表的桶数
    41     unsigned long realsize = _dictNextPower(size);
    42 
    43     /* Rehashing to the same table size is not useful. 如果容量大小没有发生变化,返回DICT_ERR,也就是1*/
    44     if (realsize == d->ht[0].size) return DICT_ERR;
    45 
    46     /**
    47      * Allocate the new hash table and initialize all pointers to NULL
    48      * 初始化新的哈希表的size和sizemask,为table分配内存空间
    49      */
    50     n.size = realsize;
    51     n.sizemask = realsize-1;
    52     n.table = zcalloc(realsize*sizeof(dictEntry*));
    53     n.used = 0;
    54 
    55     /**
    56      * Is this the first initialization? If so it's not really a rehashing
    57      * we just set the first hash table so that it can accept keys.
    58      * 判断是否是滴第一次初始化,如果是,那就是不是rehashing操作,我们只需要设置ht的第一个哈希表(ht[0])
    59      * 然后返回DICT_OK,也就是0,成功。也就是说如果ht[0].table == NULL,说明是第一次初始化,
    60      * 那不是真正的重新哈希,相当于创建哈希表的操作,只需要设置第一个哈希表即可
    61      */
    62     if (d->ht[0].table == NULL) {
    63         d->ht[0] = n;
    64         return DICT_OK;
    65     }
    66 
    67     /**
    68      * Prepare a second hash table for incremental rehashing
    69      * 假设 ht[0] 不为空。那么这就是一次扩展字典的行为
    70      * 将新哈希表设置为 ht[1] ,并打开 rehash 标识
    71      */
    72     d->ht[1] = n;
    73     d->rehashidx = 0;
    74     return DICT_OK;
    75 }
    76 
    77 /* Our hash table capability is a power of two 哈希表的容量是2的倍数*/
    78 static unsigned long _dictNextPower(unsigned long size)
    79 {
    80     //#define DICT_HT_INITIAL_SIZE     4
    81     unsigned long i = DICT_HT_INITIAL_SIZE;
    82 
    83     //如果size大于等于LONG_MAX,设置为LONG_MAX + 1LU
    84     if (size >= LONG_MAX)
    85         return LONG_MAX + 1LU;
    86     while(1) {
    87         if (i >= size)
    88             return i;
    89         //以两倍的速度扩大
    90         i *= 2;
    91     }
    92 }

      _dictNextPower用于获取当前要分配给hash table的size,得到的值一定是2的倍数,初始值为4。

      dictExpand,从源码注释上看,它是为了扩容hash table,或者创建一个。它不允许与rehashing操作同时进行,也不能强制缩容。在使用_dictNextPower得到需要的size之后,它先是使用一个临时变量n去分配空间,然后进行判断,若ht[0].table的值为NULL,则认为是刚create出来的dict,直接把n赋值给ht[0],否则给ht[1],并开始rehashing操作。

    三、rehashing操作

    3.1 示例字典

    若有这样一个dict,假设K1、K2、K3、K4计算出来的hash值分别为0、5、2、7,使用sizemask计算出来的idx分别为0、1、2、3

     1 /*
     2                                                       +----+
     3                                                    +->|K1|V|->NULL
     4 +------------+    /+-----------+  +->+----------+ /   +----+
     5 |dictType*   |   / |dictEntry**|--+  |dictEntry*|/    +----+
     6 +------------+  /  +-----------+     +----------+ +-->|K2|V|->NULL
     7 |privdata    | /   |size=4     |     |dictEntry*|/    +----+
     8 +------------+/    +-----------+     +----------+
     9 |ht[2]       |     |sizemask=3 |     |dictEntry*|    +----+
    10 +------------+    +-----------+     +----------+ +-->|K3|V|->NULL
    11 |rehashidx=-1|    |used=4     |     |dictEntry*|    +----+
    12 +------------+    +-----------+     +----------+    +----+
    13 |iterators=0 |                                    +->|K4|V|->NULL
    14 +------------+    +-----------+                      +----+
    15                    |dictEntry**|-->NULL
    16                    +-----------+
    17                    |size=0     |
    18                    +-----------+
    19                    |sizemask=0 |
    20                    +-----------+
    21                    |used=0     |
    22                    +-----------+
    23 */

    3.2 是否rehashing判断

    判断是否需要对哈希表大小进行扩容

     1 /* Expand the hash table if needed 如果必要的话就扩大这个哈希表*/
     2 static int _dictExpandIfNeeded(dict *d)
     3 {
     4     /**
     5      * Incremental rehashing already in progress. Return.
     6      * 如果已经处于rehashing过程中
     7      * #define dictIsRehashing(d) ((d)->rehashidx != -1)
     8      */
     9     if (dictIsRehashing(d))
    10         return DICT_OK;
    11 
    12     /**
    13      * If the hash table is empty expand it to the initial size.
    14      * 如果哈希表是空的,就是直接将ht[0]扩容为哈希表的初始值4
    15      */
    16     if (d->ht[0].size == 0)
    17         return dictExpand(d, DICT_HT_INITIAL_SIZE);
    18 
    19     /* If we reached the 1:1 ratio, and we are allowed to resize the hash
    20      * table (global setting) or we should avoid it but the ratio between
    21      * elements/buckets is over the "safe" threshold, we resize doubling
    22      * the number of buckets.
    23      *
    24      * static unsigned int dict_force_resize_ratio = 5;
    25      * static int dict_can_resize = 1;
    26      *
    27      * 当used >= size并且(dict_can_resize == TRUE或ht[0]哈希表中存在的元素个数超过哈希表桶数的五倍)的时候
    28      * 需要调用dictExpand进入rehashing状态。dict_can_resize默认为1
    29      *
    30      * 假设哈希表的已用节点数 >= 哈希表的大小。
    31      * 而且下面条件任一个为真:
    32      *      1) dict_can_resize 为真
    33      *      2) 已用节点数除以哈希表大小之比大于
    34      *          dict_force_resize_ratio
    35      * 那么调用 dictExpand 对哈希表进行扩展
    36      * 扩展的体积至少为已使用节点数的两倍
    37      *
    38      * DICT便会进行收缩。让total / bk_num 接近 1:1。
    39      */
    40     if (d->ht[0].used >= d->ht[0].size &&
    41         (dict_can_resize ||
    42          d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
    43     {
    44         //需要的size为当前used * 2,即为8。调用dictExpand之后的结构:
    45         return dictExpand(d, d->ht[0].used*2);
    46     }
    47     return DICT_OK;
    48 }

    通过函数_dictExpandIfNeeded,可知当used >= size且dict_can_resize == TRUE的时候,需要调用dictExpand进入rehashing状态dict_can_resize默认为1

    1 static int dict_can_resize = 1;
    2 static unsigned int dict_force_resize_ratio = 5;

    3.3 开始rehashing

    需要的size为当前used * 2,即为8。调用dictExpand之后的结构:

     1 /*
     2                                                        +----+
     3                                                     +->|K1|V|->NULL
     4                                    +->+----------+ /   +----+
     5                                    |  |dictEntry*|/    +----+
     6                                    |  +----------+ +-->|K2|V|->NULL
     7                                    |  |dictEntry*|/    +----+
     8  +------------+    /+-----------+  |  +----------+
     9  |dictType*   |   / |dictEntry**|--+  |dictEntry*|    +----+
    10  +------------+  /  +-----------+     +----------+ +-->|K3|V|->NULL
    11  |privdata    | /   |size=4     |     |dictEntry*|    +----+
    12  +------------+/    +-----------+     +----------+    +----+
    13  |ht[2]       |     |sizemask=3 |                   +->|K4|V|->NULL
    14  +------------+    +-----------+                      +----+
    15  |rehashidx=0 |    |used=4     |
    16  +------------+    +-----------+
    17  |iterators=0 |   
    18  +------------+    +-----------+  +->+----------+
    19                     |dictEntry**|--+  |dictEntry*|->NULL
    20                     +-----------+     +----------+
    21                     |size=8     |     |dictEntry*|->NULL
    22                     +-----------+     +----------+
    23                     |sizemask=7 |     |dictEntry*|->NULL
    24                     +-----------+     +----------+
    25                     |used=0     |     |dictEntry*|->NULL
    26                     +-----------+     +----------+
    27                                       |dictEntry*|->NULL
    28                                       +----------+
    29                                       |dictEntry*|->NULL
    30                                       +----------+
    31                                       |dictEntry*|->NULL
    32                                       +----------+
    33                                       |dictEntry*|->NULL
    34                                       +----------+
    35 */

      经过_dictExpandIfNeeded可以知道是否需要进行rehash操作,如果需要的话,再通过dictExpand函数,就可以得到合适大小的哈希表,并且该函数还会将rehashing设置为0,这样dictrehash函数就可以根据rehashing操作

     1 /**
     2  * Performs N steps of incremental rehashing. Returns 1 if there are still
     3  * keys to move from the old to the new hash table, otherwise 0 is returned.
     4  *
     5  * Note that a rehashing step consists in moving a bucket (that may have more
     6  * than one key as we use chaining) from the old to the new hash table, however
     7  * since part of the hash table may be composed of empty spaces, it is not
     8  * guaranteed that this function will rehash even a single bucket, since it
     9  * will visit at max N*10 empty buckets in total, otherwise the amount of
    10  * work it does would be unbound and the function may block for a long time.
    11  *
    12  * 实现持续的重新哈希,如果还有需要重新哈希的key,返回1,否则返回0
    13  *
    14  * 需要注意的是,rehash持续将bucket从老的哈希表移到新的哈希表,但是,因为有的哈希表是空的,
    15  * 因此函数不能保证即使一个bucket也会被rehash,因为函数最多一共会访问N*10个空bucket,不然的话,
    16  * 函数将会耗费过多性能,而且函数会被阻塞一段时间
    17  */
    18 int dictRehash(dict *d, int n) {
    19     int empty_visits = n*10; /* Max number of empty buckets to visit. */
    20     if (!dictIsRehashing(d)) return 0;
    21 
    22     while(n-- && d->ht[0].used != 0) {
    23         dictEntry *de, *nextde;
    24 
    25         /* Note that rehashidx can't overflow as we are sure there are more
    26          * elements because ht[0].used != 0 */
    27         assert(d->ht[0].size > (unsigned long)d->rehashidx);
    28 
    29         /* 找到非空的哈希表下标 */
    30         while(d->ht[0].table[d->rehashidx] == NULL) {
    31             d->rehashidx++;
    32             /**
    33              * rehashing时允许最多跳过10n的空bucket,就要退出流程
    34              */
    35             if (--empty_visits == 0)
    36                 return 1;
    37         }
    38         de = d->ht[0].table[d->rehashidx];
    39         /**
    40          * Move all the keys in this bucket from the old to the new hash HT
    41          * 实现将bucket从老的哈希表移到新的哈希表
    42          */
    43         while(de) {
    44             uint64_t h;
    45 
    46             nextde = de->next;
    47             /* Get the index in the new hash table 获取哈希值*/
    48             h = dictHashKey(d, de->key) & d->ht[1].sizemask;
    49             de->next = d->ht[1].table[h];
    50             d->ht[1].table[h] = de;
    51             d->ht[0].used--;
    52             d->ht[1].used++;
    53             de = nextde;
    54         }
    55         d->ht[0].table[d->rehashidx] = NULL;
    56         d->rehashidx++;
    57     }
    58 
    59     /**
    60      * Check if we already rehashed the whole table...
    61      * 当ht[0]->used为0时,认为ht[0]的所有dictEntry已经移至ht[1],此时return 0,
    62      * 否则 return 1,告诉调用者,还需要继续进行rehashing操作.
    63      */
    64     if (d->ht[0].used == 0) {
    65         /**
    66          * 此时ht[0]->used为0,释放原ht[0]的hash table,把ht[1]赋值给ht[0],并设置ht[1] = NULL,
    67          * 最后重置rehashidx=-1,rehashing操作结束
    68          */
    69         zfree(d->ht[0].table);
    70         d->ht[0] = d->ht[1];
    71         _dictReset(&d->ht[1]);
    72         d->rehashidx = -1;
    73         return 0;
    74     }
    75 
    76     /* More to rehash... */
    77     return 1;
    78 }

      rehashing操作将会把ht[0]里,rehashidx的值对应的bucket下的所有dictEntry,移至ht[1],之后对rehashidx进行自增处理。当ht[0]->used为0时,认为ht[0]的所有dictEntry已经移至ht[1],此时return 0,否则 return 1,告诉调用者,还需要继续进行rehashing操作。同时,rehashing时允许最多跳过10n的空bucket,否则,就要退出流程,返回1,。假设传入的n=1,即只进行一次rehashing操作,转换至完成之后的结构:

     1 /*
     2 
     3                                                     +->NULL
     4                                    +->+----------+ /
     5                                    |  |dictEntry*|/    +----+
     6                                    |  +----------+ +-->|K2|V|->NULL
     7                                    |  |dictEntry*|/    +----+
     8  +------------+    /+-----------+  |  +----------+
     9  |dictType*   |   / |dictEntry**|--+  |dictEntry*|    +----+
    10  +------------+  /  +-----------+     +----------+ +-->|K3|V|->NULL
    11  |privdata    | /   |size=4     |     |dictEntry*|    +----+
    12  +------------+/    +-----------+     +----------+    +----+
    13  |ht[2]       |     |sizemask=3 |                   +->|K4|V|->NULL
    14  +------------+    +-----------+                      +----+
    15  |rehashidx=1 |    |used=3     |
    16  +------------+    +-----------+
    17  |iterators=0 |   
    18  +------------+    +-----------+  +->+----------+   +----+
    19                     |dictEntry**|--+  |dictEntry*|-->|K1|V|->NULL
    20                     +-----------+     +----------+   +----+
    21                     |size=8     |     |dictEntry*|->NULL
    22                     +-----------+     +----------+
    23                     |sizemask=7 |     |dictEntry*|->NULL
    24                     +-----------+     +----------+
    25                     |used=1     |     |dictEntry*|->NULL
    26                     +-----------+     +----------+
    27                                       |dictEntry*|->NULL
    28                                       +----------+
    29                                       |dictEntry*|->NULL
    30                                       +----------+
    31                                       |dictEntry*|->NULL
    32                                       +----------+
    33                                       |dictEntry*|->NULL
    34                                       +----------+
    35 */

    所有节点移完时

     1 /*
     2 
     3 
     4                                    +->+----------+
     5                                    |  |dictEntry*|->NULL
     6                                    |  +----------+
     7                                    |  |dictEntry*|->NULL
     8  +------------+    /+-----------+  |  +----------+
     9  |dictType*   |   / |dictEntry**|--+  |dictEntry*|->NULL
    10  +------------+  /  +-----------+     +----------+
    11  |privdata    | /   |size=4     |     |dictEntry*|->NULL
    12  +------------+/    +-----------+     +----------+
    13  |ht[2]       |     |sizemask=3 |
    14  +------------+    +-----------+
    15  |rehashidx=4 |    |used=0     |
    16  +------------+    +-----------+
    17  |iterators=0 |   
    18  +------------+    +-----------+  +->+----------+   +----+
    19                     |dictEntry**|--+  |dictEntry*|-->|K1|V|->NULL
    20                     +-----------+     +----------+   +----+
    21                     |size=8     |     |dictEntry*|->NULL
    22                     +-----------+     +----------+   +----+
    23                     |sizemask=7 |     |dictEntry*|-->|K3|V|->NULL
    24                     +-----------+     +----------+   +----+
    25                     |used=4     |     |dictEntry*|->NULL
    26                     +-----------+     +----------+
    27                                       |dictEntry*|->NULL
    28                                       +----------+   +----+
    29                                       |dictEntry*|-->|K2|V|->NULL
    30                                       +----------+   +----+
    31                                       |dictEntry*|->NULL
    32                                       +----------+   +----+
    33                                       |dictEntry*|-->|K4|V|->NULL
    34                                       +----------+   +----+
    35 */

    此时ht[0]->used为0,释放原ht[0]的hash table,把ht[1]赋值给ht[0],并设置ht[1] = NULL,最后重置rehashidx=-1,rehashing操作结束

     1 /*
     2  +------------+    /+-----------+   +-->+----------+   +----+
     3  |dictType*   |   / |dictEntry**|---+   |dictEntry*|-->|K1|V|->NULL
     4  +------------+  /  +-----------+       +----------+   +----+
     5  |privdata    | /   |size=8     |       |dictEntry*|->NULL
     6  +------------+/    +-----------+       +----------+   +----+
     7  |ht[2]       |     |sizemask=7 |       |dictEntry*|-->|K3|V|->NULL
     8  +------------+    +-----------+       +----------+   +----+
     9  |rehashidx=-1|    |used=4     |       |dictEntry*|->NULL
    10  +------------+    +-----------+       +----------+
    11  |iterators=0 |                        |dictEntry*|->NULL
    12  +------------+    +-----------+       +----------+   +----+
    13                     |dictEntry**|->NULL |dictEntry*|-->|K2|V|->NULL
    14                     +-----------+       +----------+   +----+
    15                     |size=0     |       |dictEntry*|->NULL
    16                     +-----------+       +----------+   +----+
    17                     |sizemask=0 |       |dictEntry*|-->|K4|V|->NULL
    18                     +-----------+       +----------+   +----+
    19                     |used=0     |
    20                     +-----------+
    21 */

    3.4 rehashing操作的触发共有两种方式

    3.4.1 定时操作

     1 /**
     2  * 返回当前时间,单位:毫秒
     3  */
     4 long long timeInMilliseconds(void) {
     5     struct timeval tv;
     6 
     7     gettimeofday(&tv,NULL);
     8     return (((long long)tv.tv_sec)*1000)+(tv.tv_usec/1000);
     9 }
    10 
    11 /**
    12  * Rehash for an amount of time between ms milliseconds and ms+1 milliseconds 
    13  * 定时执行rehash,定时时间是1毫秒
    14  */
    15 int dictRehashMilliseconds(dict *d, int ms) {
    16     //获取当前的时间,单位是毫秒
    17     long long start = timeInMilliseconds();
    18     int rehashes = 0;
    19 
    20     while(dictRehash(d,100)) {
    21         rehashes += 100;
    22         if (timeInMilliseconds()-start > ms) break;
    23     }
    24     return rehashes;
    25 }

    外部传入一个毫秒时间(实际上就是1ms),在这时间内循环执行rehashing,每次执行100次。

    3.4.2 操作时触发

     1 /* This function performs just a step of rehashing, and only if there are
     2  * no safe iterators bound to our hash table. When we have iterators in the
     3  * middle of a rehashing we can't mess with the two hash tables otherwise
     4  * some element can be missed or duplicated.
     5  *
     6  * This function is called by common lookup or update operations in the
     7  * dictionary so that the hash table automatically migrates from H1 to H2
     8  * while it is actively used.
     9  * 在插入、删除、查找等操作时,顺带执行一次rehashing操作。
    10  * 值得注意的是,如果存在安全的迭代器,即d->iterators != 0,则不会进行rehashing操作
    11  * */
    12 static void _dictRehashStep(dict *d) {
    13     if (d->iterators == 0) dictRehash(d,1);
    14 }

    四、插入

    4.1 获取出入位置

    获取可插入新节点的bucket idx的方法

     1 /* Returns the index of a free slot that can be populated with
     2  * a hash entry for the given 'key'.
     3  * If the key already exists, -1 is returned
     4  * and the optional output parameter may be filled.
     5  *
     6  * Note that if we are in the process of rehashing the hash table, the
     7  * index is always returned in the context of the second (new) hash table.
     8  * 获取可插入新节点的bucket idx
     9  *
    10  * 此方法在进行查找idx之前,先进行一次判断,是否需要rehashing操作。而后进行查找。
    11  * idx的值就是通过hash函数计算出来的hash_value与sizemask做位运算的结果,然后遍历此idx对应的bucket,
    12  * 若已存在相同的key,则认为不可插入,并把对应的dictEntry用传入的二级指针的方式传出,供调用者使用。
    13  * 若不存在,则需要判断是否正在进行rehashing操作。若在,则会对ht[1]做一次相同的操作。最终可以得到一个idx值,
    14  * 或传出一个dictEntry。
    15  *
    16  * 由于rehashing期间,将会把ht[0]的所有dictEntry依次转移至ht[1],
    17  * 为了防止新插入的dictEntry落到ht[0]已完成rehashing操作的bucket上,在rehashing期间,
    18  * 返回的可插入的idx一定是属于ht[1]的。
    19  */
    20 static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing)
    21 {
    22     unsigned long idx, table;
    23     dictEntry *he;
    24     if (existing) *existing = NULL;
    25 
    26     /* Expand the hash table if needed 假设有须要。对字典进行扩展*/
    27     if (_dictExpandIfNeeded(d) == DICT_ERR)
    28         return -1;
    29     // 在两个哈希表(ht[0]、ht[1])中进行查找给定 key
    30     for (table = 0; table <= 1; table++) {
    31         /**
    32          * 依据哈希值和哈希表的 sizemask
    33          * 计算出 key 可能出如今 table 数组中的哪个索引
    34          */
    35         idx = hash & d->ht[table].sizemask;
    36         /* Search if this slot does not already contain the given key */
    37         he = d->ht[table].table[idx];
    38 
    39         /**
    40          * 在节点链表里查找给定 key
    41          * 由于链表的元素数量通常为 1 或者是一个非常小的比率
    42          * 所以能够将这个操作看作 O(1) 来处理
    43          */
    44         while(he) {
    45             // key 已经存在
    46             if (key==he->key || dictCompareKeys(d, key, he->key)) {
    47                 if (existing) *existing = he;
    48                 return -1;
    49             }
    50             he = he->next;
    51         }
    52         /**
    53          * 第一次进行执行到这里时,说明已经查找完 d->ht[0] 了
    54          * 这时假设哈希表不在 rehash 其中。就没有必要查找 d->ht[1]
    55          */
    56         if (!dictIsRehashing(d)) break;
    57     }
    58     return idx;
    59 }

    4.2 插入函数

     1 /**
     2  * Low level add or find:底层的添加和查找函数
     3  * This function adds the entry but instead of setting a value returns the
     4  * dictEntry structure to the user, that will make sure to fill the value
     5  * field as he wishes.
     6  *
     7  * This function is also directly exposed to the user API to be called
     8  * mainly in order to store non-pointers inside the hash value, example:
     9  *
    10  * entry = dictAddRaw(dict,mykey,NULL);
    11  * if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
    12  *
    13  * Return values:
    14  *
    15  * If key already exists NULL is returned, and "*existing" is populated
    16  * with the existing entry if existing is not NULL.
    17  *
    18  * If key was added, the hash entry is returned to be manipulated by the caller.
    19  *
    20  * 函数增加一个元素到entry,函数保证将值放到调用者想要放的位置,而不是仅仅设置一个值然后返回
    21  * 函数会直接暴露API给用户调用,主要为了保存空指针而不是哈希值,比如:
    22  * entry = dictAddRaw(dict,mykey,NULL);
    23  * if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
    24  *
    25  * 若不存在相同key,则插入,否则,传出dictEntry的指针。插入时,由于没有记录每个dictEntry链表的尾指针,
    26  * 所以使用头插法,可以节约插入时的时间消耗。
    27  */
    28 dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
    29 {
    30     long index;
    31     dictEntry *entry;
    32     dictht *ht;
    33 
    34     // 如果正在rehash,顺带执行rehash操作
    35     if (dictIsRehashing(d))
    36         _dictRehashStep(d);
    37 
    38     /**
    39      * Get the index of the new element, or -1 if
    40      * the element already exists.
    41      * 获取新元素的下标,如果已经存在,返回-1
    42      */
    43     if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
    44         return NULL;
    45 
    46     /* Allocate the memory and store the new entry.
    47      * Insert the element in top, with the assumption that in a database
    48      * system it is more likely that recently added entries are accessed
    49      * more frequently.
    50      * 如果正在进行rehash操作,返回ht[1],否则返回ht[0]
    51      * */
    52     ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
    53     entry = zmalloc(sizeof(*entry));
    54     //插入头部
    55     entry->next = ht->table[index];
    56     //更改头部节点
    57     ht->table[index] = entry;
    58     //节点增加
    59     ht->used++;
    60 
    61     /* Set the hash entry fields. 设置entry中的key*/
    62     dictSetKey(d, entry, key);
    63     //返回插入元素对应的哈希节点
    64     return entry;
    65 }

    若不存在相同key,则插入,否则,传出dictEntry的指针。插入时,由于没有记录每个dictEntry链表的尾指针,所以使用头插法,可以节约插入时的时间消耗

    4.3 底层插入函数

    dictAddRaw做为最终插入的方法,被多个方法所调用:

     1 /**
     2  * Add an element to the target hash table
     3  * 向目标哈希表中给添加一个元素
     4  * 若不存在,则插入,否则,报错
     5  */
     6 int dictAdd(dict *d, void *key, void *val)
     7 {
     8     dictEntry *entry = dictAddRaw(d,key,NULL);
     9 
    10     if (!entry) return DICT_ERR;
    11     dictSetVal(d, entry, val);
    12     return DICT_OK;
    13 }
    14 
    15 /**
    16  * Add or Overwrite:
    17  * Add an element, discarding the old value if the key already exists.
    18  * Return 1 if the key was added from scratch, 0 if there was already an
    19  * element with such key and dictReplace() just performed a value update
    20  * operation.
    21  * 若存在,则替换value,否则插入
    22  */
    23 int dictReplace(dict *d, void *key, void *val)
    24 {
    25     dictEntry *entry, *existing, auxentry;
    26 
    27     /**
    28      * Try to add the element. If the key
    29      * does not exists dictAdd will succeed.
    30      * 如果添加成功,dictAddRaw返回非空值
    31      */
    32     entry = dictAddRaw(d,key,&existing);
    33     if (entry) {
    34         dictSetVal(d, entry, val);
    35         return 1;
    36     }
    37 
    38     /**
    39      * Set the new value and free the old one. Note that it is important
    40      * to do that in this order, as the value may just be exactly the same
    41      * as the previous one. In this context, think to reference counting,
    42      * you want to increment (set), and then decrement (free), and not the
    43      * reverse.
    44      * 设置新值,释放旧值,这个顺序很重要,因为值可能是与原来一样的
    45      * 在这个上下文里,考虑引用计数,我们希望的是先加再减,而不是反过来
    46      */
    47     auxentry = *existing;
    48     dictSetVal(d, existing, val);
    49     dictFreeVal(d, &auxentry);
    50     return 0;
    51 }
    52 
    53 /* Add or Find:
    54  * dictAddOrFind() is simply a version of dictAddRaw() that always
    55  * returns the hash entry of the specified key, even if the key already
    56  * exists and can't be added (in that case the entry of the already
    57  * existing key is returned.)
    58  *
    59  * See dictAddRaw() for more information.
    60  * 若存在,则返回对应dictEntry,否则插入后返回新的dictEntry
    61  */
    62 dictEntry *dictAddOrFind(dict *d, void *key) {
    63     dictEntry *entry, *existing;
    64     entry = dictAddRaw(d,key,&existing);
    65     return entry ? entry : existing;
    66 }

    4.4 插入过程

    对于一个刚刚create的dict:

     1 /*
     2 
     3 +------------+    /+-----------+
     4 |dictType*   |   / |dictEntry**|-->NULL
     5 +------------+  /  +-----------+
     6 |privdata    | /   |size=0     |
     7 +------------+/    +-----------+
     8 |ht[2]       |     |sizemask=0 |
     9 +------------+    +-----------+
    10 |rehashidx=-1|    |used=0     |
    11 +------------+    +-----------+
    12 |iterators=0 |   
    13 +------------+    +-----------+
    14                    |dictEntry**|-->NULL
    15                    +-----------+
    16                    |size=0     |
    17                    +-----------+
    18                    |sizemask=0 |
    19                    +-----------+
    20                    |used=0     |
    21                    +-----------+
    22 */

    假设K1、K2、K3、K4计算出来的hash值分别为0、5、2、7,使用sizemask计算出来的idx分别为0、1、2、3

    4.4.1 插入K1

    现调用dictAdd方法进行插入

    执行完dictAddRaw中的_dictKeyIndex里的_dictExpandIfNeeded:

     1 /*
     2 
     3                                                    +-->NULL
     4 +------------+    /+-----------+  +->+----------+ /
     5 |dictType*   |   / |dictEntry**|--+  |dictEntry*|/
     6 +------------+  /  +-----------+     +----------+ +--->NULL
     7 |privdata    | /   |size=4     |     |dictEntry*|/
     8 +------------+/    +-----------+     +----------+
     9 |ht[2]       |     |sizemask=3 |     |dictEntry*|
    10 +------------+    +-----------+     +----------+ +--->NULL
    11 |rehashidx=-1|    |used=0     |     |dictEntry*|
    12 +------------+    +-----------+     +----------+ 
    13 |iterators=0 |                                    +-->NULL
    14 +------------+    +-----------+
    15                    |dictEntry**|-->NULL
    16                    +-----------+
    17                    |size=0     |
    18                    +-----------+
    19                    |sizemask=0 |
    20                    +-----------+
    21                    |used=0     |
    22                    +-----------+
    23 */

    同时得到其在ht[0]的idx = 0,且不在rehashing操作中,于是直接插入

     1 /*
     2                                                       +----+
     3                                                    +->|K1|V|->NULL
     4 +------------+    /+-----------+  +->+----------+ /   +----+
     5 |dictType*   |   / |dictEntry**|--+  |dictEntry*|/
     6 +------------+  /  +-----------+     +----------+ +--->NULL
     7 |privdata    | /   |size=4     |     |dictEntry*|/
     8 +------------+/    +-----------+     +----------+
     9 |ht[2]       |     |sizemask=3 |     |dictEntry*|
    10 +------------+    +-----------+     +----------+ +--->NULL
    11 |rehashidx=-1|    |used=1     |     |dictEntry*|
    12 +------------+    +-----------+     +----------+ 
    13 |iterators=0 |                                    +-->NULL
    14 +------------+    +-----------+
    15                    |dictEntry**|-->NULL
    16                    +-----------+
    17                    |size=0     |
    18                    +-----------+
    19                    |sizemask=0 |
    20                    +-----------+
    21                    |used=0     |
    22                    +-----------+
    23 */

    4.4.2 依次插入K2、K3、K4后

     1 /*
     2                                                       +----+
     3                                                    +->|K1|V|->NULL
     4 +------------+    /+-----------+  +->+----------+ /   +----+
     5 |dictType*   |   / |dictEntry**|--+  |dictEntry*|/    +-----
     6 +------------+  /  +-----------+     +----------+ +-->|K2|V|->NULL
     7 |privdata    | /   |size=4     |     |dictEntry*|/    +----+
     8 +------------+/    +-----------+     +----------+
     9 |ht[2]       |     |sizemask=3 |     |dictEntry*|    +----+
    10 +------------+    +-----------+     +----------+ +-->|K3|V|->NULL
    11 |rehashidx=-1|    |used=4     |     |dictEntry*|    +----+
    12 +------------+    +-----------+     +----------+    +----+
    13 |iterators=0 |                                    +->|K4|V|->NULL
    14 +------------+    +-----------+                      +----+
    15                    |dictEntry**|-->NULL
    16                    +-----------+
    17                    |size=0     |
    18                    +-----------+
    19                    |sizemask=0 |
    20                    +-----------+
    21                    |used=0     |
    22                    +-----------+
    23 */

    4.4.3 此时若有一个K5

    计算出来的hash值为8,则:

    i.因此刻不在rehashing操作,所以不用做处理

    ii.执行完dictAddRaw中的_dictKeyIndex里的_dictExpandIfNeeded:

     1 /*
     2                                                        +----+
     3                                                     +->|K1|V|->NULL
     4                                    +->+----------+ /   +----+
     5                                    |  |dictEntry*|/    +----+
     6                                    |  +----------+ +-->|K2|V|->NULL
     7                                    |  |dictEntry*|/    +----+
     8  +------------+    /+-----------+  |  +----------+
     9  |dictType*   |   / |dictEntry**|--+  |dictEntry*|    +----+
    10  +------------+  /  +-----------+     +----------+ +-->|K3|V|->NULL
    11  |privdata    | /   |size=4     |     |dictEntry*|    +----+
    12  +------------+/    +-----------+     +----------+    +----+
    13  |ht[2]       |     |sizemask=3 |                   +->|K4|V|->NULL
    14  +------------+    +-----------+                      +----+
    15  |rehashidx=0 |    |used=4     |
    16  +------------+    +-----------+
    17  |iterators=0 |   
    18  +------------+    +-----------+  +->+----------+
    19                     |dictEntry**|--+  |dictEntry*|->NULL
    20                     +-----------+     +----------+
    21                     |size=8     |     |dictEntry*|->NULL
    22                     +-----------+     +----------+
    23                     |sizemask=7 |     |dictEntry*|->NULL
    24                     +-----------+     +----------+
    25                     |used=0     |     |dictEntry*|->NULL
    26                     +-----------+     +----------+
    27                                       |dictEntry*|->NULL
    28                                       +----------+
    29                                       |dictEntry*|->NULL
    30                                       +----------+
    31                                       |dictEntry*|->NULL
    32                                       +----------+
    33                                       |dictEntry*|->NULL
    34                                       +----------+
    35 */

    同时得到其在ht[1]的idx=0

    iii.插入,因为此时已经满足的了扩容的条件,所以正在处于rehashing过程中,所以将元素插入ht[1]对应的哈希表。通常情况下,如果不是处于rehashing过程中,就会将元素插入ht[0]对应的哈希表

     1 /*
     2                                                        +----+
     3                                                     +->|K1|V|->NULL
     4                                    +->+----------+ /   +----+
     5                                    |  |dictEntry*|/    +----+
     6                                    |  +----------+ +-->|K2|V|->NULL
     7                                    |  |dictEntry*|/    +----+
     8  +------------+    /+-----------+  |  +----------+
     9  |dictType*   |   / |dictEntry**|--+  |dictEntry*|    +----+
    10  +------------+  /  +-----------+     +----------+ +-->|K3|V|->NULL
    11  |privdata    | /   |size=4     |     |dictEntry*|    +----+
    12  +------------+/    +-----------+     +----------+    +----+
    13  |ht[2]       |     |sizemask=3 |                   +->|K4|V|->NULL
    14  +------------+    +-----------+                      +----+
    15  |rehashidx=0 |    |used=4     |
    16  +------------+    +-----------+
    17  |iterators=0 |   
    18  +------------+    +-----------+  +->+----------+   +----+
    19                     |dictEntry**|--+  |dictEntry*|-->|K5|V|->NULL
    20                     +-----------+     +----------+   +----+
    21                     |size=8     |     |dictEntry*|->NULL
    22                     +-----------+     +----------+
    23                     |sizemask=7 |     |dictEntry*|->NULL
    24                     +-----------+     +----------+
    25                     |used=1     |     |dictEntry*|->NULL
    26                     +-----------+     +----------+
    27                                       |dictEntry*|->NULL
    28                                       +----------+
    29                                       |dictEntry*|->NULL
    30                                       +----------+
    31                                       |dictEntry*|->NULL
    32                                       +----------+
    33                                       |dictEntry*|->NULL
    34                                       +----------+
    35 */

    4.4.4 此时若有一个K6

    计算出来的hash值为16,则:

    i.此时已处理rehashing操作,执行一步,将h[0]中的k1|v移入h[1]中:

     1 /*
     2 
     3                                                     +-->NULL
     4                                    +->+----------+ /
     5                                    |  |dictEntry*|/    +----+
     6                                    |  +----------+ +-->|K2|V|->NULL
     7                                    |  |dictEntry*|/    +----+
     8  +------------+    /+-----------+  |  +----------+
     9  |dictType*   |   / |dictEntry**|--+  |dictEntry*|    +----+
    10  +------------+  /  +-----------+     +----------+ +-->|K3|V|->NULL
    11  |privdata    | /   |size=4     |     |dictEntry*|    +----+
    12  +------------+/    +-----------+     +----------+    +----+
    13  |ht[2]       |     |sizemask=3 |                   +->|K4|V|->NULL
    14  +------------+    +-----------+                      +----+
    15  |rehashidx=1 |    |used=3     |
    16  +------------+    +-----------+
    17  |iterators=0 |   
    18  +------------+    +-----------+  +->+----------+   +----+  +----+
    19                     |dictEntry**|--+  |dictEntry*|-->|K1|V|->|K5|V|->NULL
    20                     +-----------+     +----------+   +----+  +----+
    21                     |size=8     |     |dictEntry*|->NULL
    22                     +-----------+     +----------+
    23                     |sizemask=7 |     |dictEntry*|->NULL
    24                     +-----------+     +----------+
    25                     |used=2     |     |dictEntry*|->NULL
    26                     +-----------+     +----------+
    27                                       |dictEntry*|->NULL
    28                                       +----------+
    29                                       |dictEntry*|->NULL
    30                                       +----------+
    31                                       |dictEntry*|->NULL
    32                                       +----------+
    33                                       |dictEntry*|->NULL
    34                                       +----------+
    35 */

    ii.执行完dictAddRaw中的_dictKeyIndex里的_dictExpandIfNeeded,因已在进行rehashing,所以不做任何处理,只返回其在ht[1]的idx 0

    iii.头插法将K6插入

     1 /*
     2 
     3                                                     +-->NULL
     4                                    +->+----------+ /
     5                                    |  |dictEntry*|/    +----+
     6                                    |  +----------+ +-->|K2|V|->NULL
     7                                    |  |dictEntry*|/    +----+
     8  +------------+    /+-----------+  |  +----------+
     9  |dictType*   |   / |dictEntry**|--+  |dictEntry*|    +----+
    10  +------------+  /  +-----------+     +----------+ +-->|K3|V|->NULL
    11  |privdata    | /   |size=4     |     |dictEntry*|    +----+
    12  +------------+/    +-----------+     +----------+    +----+
    13  |ht[2]       |     |sizemask=3 |                   +->|K4|V|->NULL
    14  +------------+    +-----------+                      +----+
    15  |rehashidx=1 |    |used=3     |
    16  +------------+    +-----------+
    17  |iterators=0 |   
    18  +------------+    +-----------+  +->+----------+   +----+  +----+  +----+
    19                     |dictEntry**|--+  |dictEntry*|-->|K6|V|->|K1|V|->|K5|V|->NULL
    20                     +-----------+     +----------+   +----+  +----+  +----+
    21                     |size=8     |     |dictEntry*|->NULL
    22                     +-----------+     +----------+
    23                     |sizemask=7 |     |dictEntry*|->NULL
    24                     +-----------+     +----------+
    25                     |used=3     |     |dictEntry*|->NULL
    26                     +-----------+     +----------+
    27                                       |dictEntry*|->NULL
    28                                       +----------+
    29                                       |dictEntry*|->NULL
    30                                       +----------+
    31                                       |dictEntry*|->NULL
    32                                       +----------+
    33                                       |dictEntry*|->NULL
    34                                       +----------+
    35 */

    以上为正常插入时的情况,key已存在,或是调用另外两个方法的情况与之大同小异,有时间的时候再补充。

    五、查找 

    5.1 dicFind函数

    返回找到的dictEntry

     1 /**
     2  * 若在rehashing期间,则执行一次。首先在ht[0]里查找,计算出hash值对应ht[0]的idx,取得其bucket,
     3  * 然后遍历之,找到与指定key相同的dictEntry。若ht[0]中找不到指定的key,且正在进行rehashing操作,
     4  * 则去ht[1]以相同方式也查找一次。
     5  */
     6 dictEntry *dictFind(dict *d, const void *key)
     7 {
     8     dictEntry *he;
     9     uint64_t h, idx, table;
    10 
    11     if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty 如果哈希表为空直接返回NULL*/
    12     if (dictIsRehashing(d)) _dictRehashStep(d); //是否处于rehashing操作中
    13     h = dictHashKey(d, key);
    14     for (table = 0; table <= 1; table++) {
    15         idx = h & d->ht[table].sizemask;
    16         he = d->ht[table].table[idx];
    17         while(he) {
    18             if (key==he->key || dictCompareKeys(d, key, he->key))
    19                 return he;
    20             he = he->next;
    21         }
    22         /**
    23          * 如果不是处于rehashing操作中,就不需要遍历ht[1],因为rehashing结束后,会将整个h[1]赋值给h[0],
    24          * 然后将h[1]置为NULL,也就是h[1]不再存在任何人元素
    25          */
    26         if (!dictIsRehashing(d)) return NULL;
    27     }
    28     return NULL;
    29 }

    5.2 dictFetchValue函数

    redis额外提供一个,根据key只获取其value的方法,返回找的value值。

    1 /**
    2  * redis额外提供一个,根据key只获取其value的方法:key不存在时返回NULL,实际上调用的还是dictFind
    3  */
    4 void *dictFetchValue(dict *d, const void *key) {
    5     dictEntry *he;
    6 
    7     he = dictFind(d,key);
    8     return he ? dictGetVal(he) : NULL;
    9 }

    六、删除

    6.1 底层删除函数

     1 /**
     2  * Search and remove an element. This is an helper function for
     3  * dictDelete() and dictUnlink(), please check the top comment
     4  * of those functions.
     5  * 查找方式与dictFind相同。找到之后,由调用者指定是否要销毁此dictEntry,
     6  * 若不销毁,则要把对应指针传出来,给外部使用。
     7  */
     8 static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
     9     uint64_t h, idx;
    10     dictEntry *he, *prevHe;
    11     int table;
    12 
    13     if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL;
    14 
    15     if (dictIsRehashing(d)) _dictRehashStep(d);
    16     h = dictHashKey(d, key);
    17 
    18     for (table = 0; table <= 1; table++) {
    19         idx = h & d->ht[table].sizemask;
    20         he = d->ht[table].table[idx];
    21         prevHe = NULL;
    22         while(he) {
    23             if (key==he->key || dictCompareKeys(d, key, he->key)) {
    24                 /* Unlink the element from the list */
    25                 if (prevHe)
    26                     prevHe->next = he->next;
    27                 else
    28                     d->ht[table].table[idx] = he->next;
    29                 if (!nofree) {
    30                     //需要销毁
    31                     dictFreeKey(d, he);
    32                     dictFreeVal(d, he);
    33                     zfree(he);
    34                 }
    35                 /**
    36                  * 为什么无论是否释放该元素都需要将元素个数减少1,因为查找到的元素是一定会被删除的元素,
    37                  * 即使在这里不进行释放,后续也会调用dictFreeUnlinkedEntry函数将其释放掉,之所以右这样的一个操作,
    38                  * 是因为某些情况下,在释放该元素之前,可能还会利用该元素进行一些必要的操作,
    39                  * 所以通过这样的一种特殊删除方式来满足这种需求。
    40                  */
    41                 d->ht[table].used--;
    42                 return he;
    43             }
    44             prevHe = he;
    45             he = he->next;
    46         }
    47         if (!dictIsRehashing(d)) break;
    48     }
    49     return NULL; /* not found */
    50 }

    查找方式与dictFind相同。找到之后,由调用者指定是否要销毁此dictEntry,若不销毁,则要把对应指针传出来,给外部使用

    6.2 上层删除函数

    6.1中的方法被两个接口所调用:

     1 /**
     2  * Remove an element, returning DICT_OK on success or DICT_ERR if the
     3  * element was not found.
     4  * 删除一个元素,如果成功删除返回DICT_OK,如果不存在,返回DICT_ERR
     5  */
     6 int dictDelete(dict *ht, const void *key) {
     7     return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR;
     8 }
     9 
    10 /* Remove an element from the table, but without actually releasing
    11  * the key, value and dictionary entry. The dictionary entry is returned
    12  * if the element was found (and unlinked from the table), and the user
    13  * should later call `dictFreeUnlinkedEntry()` with it in order to release it.
    14  * Otherwise if the key is not found, NULL is returned.
    15  *
    16  * This function is useful when we want to remove something from the hash
    17  * table but want to use its value before actually deleting the entry.
    18  * Without this function the pattern would require two lookups:
    19  *
    20  *  entry = dictFind(...);
    21  *  // Do something with entry
    22  *  dictDelete(dictionary,entry);
    23  *
    24  * Thanks to this function it is possible to avoid this, and use
    25  * instead:
    26  *
    27  * entry = dictUnlink(dictionary,entry);
    28  * // Do something with entry
    29  * dictFreeUnlinkedEntry(entry); // <- This does not need to lookup again.
    30  *
    31  * dictDelete就不用多说了,直接删除对应dictEntry。关于为什么需要dictUnlink,源码的注释上写道,
    32  * 如果有某种操作,需要先查找指定key对应的dictEntry,然后对其做点操作,接着就直接删除,在没有dictUnlink的时候,
    33  * 需要这样:
    34  *      1 entry = dictFind(...);
    35  *      2 // Do something with entry
    36  *      3 dictDelete(dictionary,entry);
    37  * 实际需要查找两次。而在有dictUnlink的情况下:
    38  *      1 entry = dictUnlink(dictionary,entry);
    39  *      2 // Do something with entry
    40  *      3 dictFreeUnlinkedEntry(entry);
    41  * 只需要一次查找,配合专门的删除操作,即可。
    42  */
    43 dictEntry *dictUnlink(dict *ht, const void *key) {
    44     return dictGenericDelete(ht,key,1);
    45 }
    46 
    47 /**
    48  * You need to call this function to really free the entry after a call
    49  * to dictUnlink(). It's safe to call this function with 'he' = NULL.
    50  * 配合dictUnlink函数,对dictUnlink的返回结果进行释放
    51  */
    52 void dictFreeUnlinkedEntry(dict *d, dictEntry *he) {
    53     if (he == NULL) return;
    54     dictFreeKey(d, he);
    55     dictFreeVal(d, he);
    56     zfree(he);
    57 }

    七、销毁

    清空一个hash table的方法

     1 /**
     2  * Destroy an entire dictionary
     3  * 销毁整个哈希表
     4  * 两层循环,分别遍历所有bucket与单bucket里所有dictEntry进行释放。
     5  * 关于这里的 (i&65535) == 0的判断,_dictClear方法仅在相同文件的方法dictEmpty与dictRelease调用
     6  */
     7 int _dictClear(dict *d, dictht *ht, void(callback)(void *)) {
     8     unsigned long i;
     9 
    10     /* Free all the elements */
    11     for (i = 0; i < ht->size && ht->used > 0; i++) {
    12         dictEntry *he, *nextHe;
    13 
    14         if (callback && (i & 65535) == 0) callback(d->privdata);
    15 
    16         if ((he = ht->table[i]) == NULL) continue;
    17         while(he) {
    18             nextHe = he->next;
    19             dictFreeKey(d, he);
    20             dictFreeVal(d, he);
    21             zfree(he);
    22             ht->used--;
    23             he = nextHe;
    24         }
    25     }
    26     /* Free the table and the allocated cache structure */
    27     zfree(ht->table);
    28     /* Re-initialize the table */
    29     _dictReset(ht);
    30     return DICT_OK; /* never fails */
    31 }

    用户使用的API函数dictEmpty与dictRelease

     1 /**
     2  * 迭代器销毁
     3  * 与首次执行next操作相对应,若为safe的迭代器,要给dict的计算减1,否则要校验期间dict的指纹是否发生了变化。
     4  */
     5 void dictReleaseIterator(dictIterator *iter)
     6 {
     7     if (!(iter->index == -1 && iter->table == 0)) {
     8         if (iter->safe)
     9             iter->d->iterators--;
    10         else
    11             assert(iter->fingerprint == dictFingerprint(iter->d));
    12     }
    13     zfree(iter);
    14 }
    15 
    16 /**
    17  * 释放所有元素,将表恢复为初始化状态,也就是刚刚创建的状态
    18  * 第二个参数传入的NULL
    19  */
    20 void dictEmpty(dict *d, void(callback)(void*)) {
    21     _dictClear(d,&d->ht[0],callback);
    22     _dictClear(d,&d->ht[1],callback);
    23     d->rehashidx = -1;
    24     d->iterators = 0;
    25 }

    dictRelease不用多说,传入的callback为NULL。而dictEmpty,搜索redis源码所有文件的调用,

     1 匹配到二进制文件 src/redis-check-aof
     2 src/replication.c:    dictEmpty(server.repl_scriptcache_dict,NULL);
     3 src/dict.h:void dictEmpty(dict *d, void(callback)(void*));
     4 匹配到二进制文件 src/redis-cli
     5 匹配到二进制文件 src/dict.o
     6 src/dict.c:void dictEmpty(dict *d, void(callback)(void*)) {
     7 匹配到二进制文件 src/sentinel.o
     8 src/db.c:            dictEmpty(server.db[j].dict,callback);
     9 src/db.c:            dictEmpty(server.db[j].expires,callback);
    10 匹配到二进制文件 src/redis-sentinel
    11 匹配到二进制文件 src/redis-check-rdb
    12 匹配到二进制文件 src/replication.o
    13 src/sentinel.c:    dictEmpty(server.commands,NULL);
    14 匹配到二进制文件 src/db.o
    15 匹配到二进制文件 src/blocked.o
    16 src/blocked.c:    dictEmpty(c->bpop.keys,NULL);
    17 匹配到二进制文件 src/redis-server

    仅db.c里传了callback进来,对应的方法为

    1 long long emptyDb(int dbnum, int flags, void(callback)(void*));

    继续搜索emptyDb

     1 cluster.c:        emptyDb(-1,EMPTYDB_NO_FLAGS,NULL);
     2 匹配到二进制文件 cluster.o
     3 db.c:long long emptyDb(int dbnum, int flags, void(callback)(void*)) {
     4 db.c:            emptyDbAsync(&server.db[j]);
     5 db.c:/* Return the set of flags to use for the emptyDb() call for FLUSHALL
     6 db.c:    server.dirty += emptyDb(c->db->id,flags,NULL);
     7 db.c:    server.dirty += emptyDb(-1,flags,NULL);
     8 匹配到二进制文件 db.o
     9 debug.c:        emptyDb(-1,EMPTYDB_NO_FLAGS,NULL);
    10 debug.c:        emptyDb(-1,EMPTYDB_NO_FLAGS,NULL);
    11 匹配到二进制文件 debug.o
    12 lazyfree.c:void emptyDbAsync(redisDb *db) {
    13 匹配到二进制文件 lazyfree.o
    14 匹配到二进制文件 redis-check-aof
    15 匹配到二进制文件 redis-check-rdb
    16 匹配到二进制文件 redis-sentinel
    17 匹配到二进制文件 redis-server
    18 replication.c: * data with emptyDb(), and while we load the new data received as an
    19 replication.c:/* Callback used by emptyDb() while flushing away old data to load
    20 replication.c:        emptyDb(
    21 匹配到二进制文件 replication.o
    22 server.h:long long emptyDb(int dbnum, int flags, void(callback)(void*));
    23 server.h:void emptyDbAsync(redisDb *db);

    真正调用的地方传入的也是NULL,并不知道为什么这样设计

    八、迭代器

    8.1  数据结构

     1 /**
     2  * If safe is set to 1 this is a safe iterator, that means, you can call
     3  * dictAdd, dictFind, and other functions against the dictionary even while
     4  * iterating. Otherwise it is a non safe iterator, and only dictNext()
     5  * should be called while iterating.
     6  * 如果是个安全的迭代器,即safe == 1,则在迭代中可以调用dictAdd、dictFind等方法,否则只能调用dictNext。
     7  * index表示,ht[table]对应的bucket的idx。
     8  */
     9 typedef struct dictIterator {
    10     dict *d;
    11     long index;
    12     int table, safe;
    13     dictEntry *entry, *nextEntry;
    14     /* unsafe iterator fingerprint for misuse detection. */
    15     long long fingerprint;
    16 } dictIterator;

    8.2 获取迭代器

     1 //获取普通类型迭代器,刚获取的迭代器并不指向具体哪个dictEntry
     2 dictIterator *dictGetIterator(dict *d)
     3 {
     4     dictIterator *iter = zmalloc(sizeof(*iter));
     5 
     6     iter->d = d;
     7     iter->table = 0;
     8     iter->index = -1;
     9     iter->safe = 0;
    10     iter->entry = NULL;
    11     iter->nextEntry = NULL;
    12     return iter;
    13 }
    14 
    15 //获取安全类型的迭代器
    16 dictIterator *dictGetSafeIterator(dict *d) {
    17     dictIterator *i = dictGetIterator(d);
    18 
    19     i->safe = 1;
    20     return i;
    21 }

    8.3 迭代器的next操作

     1 /**
     2  * 对于一个新的迭代器,首次调用时,会根据是否安全,做不同操作。安全的迭代器会给dict里的计数器+1,
     3  * 不安全的将会记录本字典的指纹。之后会遍历ht[0],取到第一个非NULL的dictEntry。
     4  * 当ht[0]遍历完且取不到非NULL的dictEntry时,如果正在进行rehashing操作,则会去ht[1]里找。
     5  */
     6 dictEntry *dictNext(dictIterator *iter)
     7 {
     8     while (1) {
     9         if (iter->entry == NULL) {
    10             dictht *ht = &iter->d->ht[iter->table];
    11             if (iter->index == -1 && iter->table == 0) {
    12                 if (iter->safe)
    13                     iter->d->iterators++;
    14                 else
    15                     iter->fingerprint = dictFingerprint(iter->d);
    16             }
    17             iter->index++;
    18             if (iter->index >= (long) ht->size) {
    19                 if (dictIsRehashing(iter->d) && iter->table == 0) {
    20                     iter->table++;
    21                     iter->index = 0;
    22                     ht = &iter->d->ht[1];
    23                 } else {
    24                     break;
    25                 }
    26             }
    27             iter->entry = ht->table[iter->index];
    28         } else {
    29             iter->entry = iter->nextEntry;
    30         }
    31         if (iter->entry) {
    32             /* We need to save the 'next' here, the iterator user
    33              * may delete the entry we are returning. */
    34             iter->nextEntry = iter->entry->next;
    35             return iter->entry;
    36         }
    37     }
    38     return NULL;
    39 }

    8.4 遍历过程

     1 /*
     2 
     3      +-------------------------+
     4 +----|dict *                   |
     5 |    +-------------------------+
     6 |    |long index               |
     7 |    +-------------------------+
     8 |    |int table                |
     9 |    +-------------------------+
    10 |    |int safe                 |
    11 |    +-------------------------+
    12 |    |dictEntry *entry         |->NULL
    13 |    +-------------------------+
    14 |    |dictEntry *entrynextEntry|->NULL
    15 |    +-------------------------+
    16 |    |long long fingerprint    |
    17 |    +-------------------------+
    18 |
    19 |
    20 |
    21 |                                                       +-->NULL
    22 |                                      +->+----------+ /
    23 |                                      |  |dictEntry*|/    +----+
    24 |                                      |  +----------+ +-->|K2|V|->NULL
    25 |                                      |  |dictEntry*|/    +----+
    26 +--->+------------+    /+-----------+  |  +----------+
    27      |dictType*   |   / |dictEntry**|--+  |dictEntry*|    +----+
    28      +------------+  /  +-----------+     +----------+ +-->|K3|V|->NULL
    29      |privdata    | /   |size=4     |     |dictEntry*|    +----+
    30      +------------+/    +-----------+     +----------+    +----+
    31      |ht[2]       |     |sizemask=3 |                   +->|K4|V|->NULL
    32      +------------+    +-----------+                      +----+
    33      |rehashidx=1 |    |used=3     |
    34      +------------+    +-----------+
    35      |iterators=0 |   
    36      +------------+    +-----------+  +->+----------+   +----+  +----+
    37                         |dictEntry**|--+  |dictEntry*|-->|K1|V|->|K5|V|->NULL
    38                         +-----------+     +----------+   +----+  +----+
    39                         |size=8     |     |dictEntry*|->NULL
    40                         +-----------+     +----------+
    41                         |sizemask=7 |     |dictEntry*|->NULL
    42                         +-----------+     +----------+
    43                         |used=3     |     |dictEntry*|->NULL
    44                         +-----------+     +----------+
    45                                           |dictEntry*|->NULL
    46                                           +----------+
    47                                           |dictEntry*|->NULL
    48                                           +----------+
    49                                           |dictEntry*|->NULL
    50                                           +----------+
    51                                           |dictEntry*|->NULL
    52                                           +----------+
    53 */

    遍历顺序为,K2,K3,K4,K1,K5。

    8.5 迭代器销毁

     1 /**
     2  * 迭代器销毁
     3  * 与首次执行next操作相对应,若为safe的迭代器,要给dict的计算减1,否则要校验期间dict的指纹是否发生了变化。
     4  */
     5 void dictReleaseIterator(dictIterator *iter)
     6 {
     7     if (!(iter->index == -1 && iter->table == 0)) {
     8         if (iter->safe)
     9             iter->d->iterators--;
    10         else
    11             assert(iter->fingerprint == dictFingerprint(iter->d));
    12     }
    13     zfree(iter);
    14 }

    8.6 普通迭代器的指纹

     1 /**
     2  * A fingerprint is a 64 bit number that represents the state of the dictionary
     3  * at a given time, it's just a few dict properties xored together.
     4  *
     5  * 指纹是一个 64 位的数字,代表字典在给定时间的状态,它只是几个字典属性异或在一起。
     6  *
     7  * When an unsafe iterator is initialized, we get the dict fingerprint, and check
     8  * the fingerprint again when the iterator is released.
     9  *
    10  * 当一个不安全的迭代器被初始化时,我们得到dict指纹,当迭代器被释放时再次检查指纹。
    11  *
    12  * If the two fingerprints are different it means that the user of the iterator
    13  * performed forbidden operations against the dictionary while iterating.
    14  *
    15  * 如果两个指纹不同,则表示迭代器的用户在迭代时对字典执行了禁止操作。
    16  *
    17  * 指纹的计算
    18  *
    19  * 对于不安全的迭代器,在迭代过程中,不允许执行任何修改dict的操作,是只读的,不会发生迭代器失效的问题。
    20  * 对于安全的迭代器,在进行操作本节点的时候,redis中记录了当前迭代的bucket idx,以及当前dictEntry的next节点。
    21  * 如果只是add操作,即使是用了头插法把新dictEntry插在本节点之前,对迭代器本身并没有影响。
    22  * 如果是delete了本节点,迭代器中还记录了next节点的位置,调用next时直接取就好。
    23  * 如果next为空,则可以认为当前bucket遍历完了,取下一个bucket就行了。
    24  * 当然,如果在add/delete等操作的时候,进行了rehashing操作,那么当前迭代器里记录的next,在rehashing之后,
    25  * 可能就不是当前节点新位置的next了。所以在使用安全迭代器的时候,禁止了rehashing操作。
    26  */
    27 long long dictFingerprint(dict *d) {
    28     long long integers[6], hash = 0;
    29     int j;
    30 
    31     integers[0] = (long) d->ht[0].table;
    32     integers[1] = d->ht[0].size;
    33     integers[2] = d->ht[0].used;
    34     integers[3] = (long) d->ht[1].table;
    35     integers[4] = d->ht[1].size;
    36     integers[5] = d->ht[1].used;
    37 
    38     /* We hash N integers by summing every successive integer with the integer
    39      * hashing of the previous sum. Basically:
    40      *
    41      * Result = hash(hash(hash(int1)+int2)+int3) ...
    42      *
    43      * This way the same set of integers in a different order will (likely) hash
    44      * to a different number. */
    45     for (j = 0; j < 6; j++) {
    46         hash += integers[j];
    47         /* For the hashing step we use Tomas Wang's 64 bit integer hash. */
    48         hash = (~hash) + (hash << 21); // hash = (hash << 21) - hash - 1;
    49         hash = hash ^ (hash >> 24);
    50         hash = (hash + (hash << 3)) + (hash << 8); // hash * 265
    51         hash = hash ^ (hash >> 14);
    52         hash = (hash + (hash << 2)) + (hash << 4); // hash * 21
    53         hash = hash ^ (hash >> 28);
    54         hash = hash + (hash << 31);
    55     }
    56     return hash;
    57 }

    九、其它操作

    dict还支持其它的一些操作。

    9.1、随机获取一个key

      dictGetRandomKey

    9.2 随机获取n个key

      dictGetSomeKeys

    9.3 scan操作

      关于scan操作,redis采用了一个很巧妙的方法,保证了在开始scan时未删除的元素一定能遍历到,又能保证尽量少地重复遍历。采用了reverse binary iteration方法,也就是每次是向cursor的最高位加1,并向低位方向进位

     这就是该算法的精妙所在,使用该算法,可以做到下面两点:

             a:开始遍历那一刻的所有元素,只要不被删除,肯定能被遍历到,不管字典扩展还是缩小

             b:该算法可能会返回重复元素,但是已经把返回重复元素的可能性降到了最低;

    参考网址:

      https://blog.csdn.net/gqtcgq/article/details/50533336

      https://github.com/redis/redis/pull/579

    9.3.1 函数原型

      1 /* dictScan() is used to iterate over the elements of a dictionary.
      2  *
      3  * Iterating works the following way:
      4  * 迭代器的工作方式如下
      5  *
      6  * 1) Initially you call the function using a cursor (v) value of 0.
      7  * 初始访问位置为0
      8  * 2) The function performs one step of the iteration, and returns the
      9  *    new cursor value you must use in the next call.
     10  * 访问当前位置的元素,并返回下一次访问的cursor的位置(实际上就是二进制高位+1,然后如果必要的话向低位进位)
     11  * 3) When the returned cursor is 0, the iteration is complete.
     12  * 当返回的下一个访问位置是0,表示整个哈希表遍历结束
     13  *
     14  * The function guarantees all elements present in the
     15  * dictionary get returned between the start and end of the iteration.
     16  * However it is possible some elements get returned multiple times.
     17  * 这个函数保证了从开始遍历那一刻开始哈希表中的所有元素,只要不被删除,肯定能被遍历到,不管字典扩展还是缩小
     18  * 但是有的元素可能会被遍历多次,但是已经把返回重复元素的可能性降到了最低;
     19  *
     20  * For every element returned, the callback argument 'fn' is
     21  * called with 'privdata' as first argument and the dictionary entry
     22  * 'de' as second argument.
     23  * 对于每一个遍历到的元素,都是使用fn函数进行处理,fn的第一个参数是privdata,第二个参数是遍历到的元素de
     24  *
     25  * HOW IT WORKS.
     26  *
     27  * The iteration algorithm was designed by Pieter Noordhuis.
     28  * The main idea is to increment a cursor starting from the higher order
     29  * bits. That is, instead of incrementing the cursor normally, the bits
     30  * of the cursor are reversed, then the cursor is incremented, and finally
     31  * the bits are reversed again.
     32  *
     33  * This strategy is needed because the hash table may be resized between
     34  * iteration calls.
     35  *
     36  * dict.c hash tables are always power of two in size, and they
     37  * use chaining, so the position of an element in a given table is given
     38  * by computing the bitwise AND between Hash(key) and SIZE-1
     39  * (where SIZE-1 is always the mask that is equivalent to taking the rest
     40  *  of the division between the Hash of the key and SIZE).
     41  *
     42  * For example if the current hash table size is 16, the mask is
     43  * (in binary) 1111. The position of a key in the hash table will always be
     44  * the last four bits of the hash output, and so forth.
     45  *
     46  * WHAT HAPPENS IF THE TABLE CHANGES IN SIZE?
     47  *
     48  * If the hash table grows, elements can go anywhere in one multiple of
     49  * the old bucket: for example let's say we already iterated with
     50  * a 4 bit cursor 1100 (the mask is 1111 because hash table size = 16).
     51  *
     52  * If the hash table will be resized to 64 elements, then the new mask will
     53  * be 111111. The new buckets you obtain by substituting in ??1100
     54  * with either 0 or 1 can be targeted only by keys we already visited
     55  * when scanning the bucket 1100 in the smaller hash table.
     56  *
     57  * By iterating the higher bits first, because of the inverted counter, the
     58  * cursor does not need to restart if the table size gets bigger. It will
     59  * continue iterating using cursors without '1100' at the end, and also
     60  * without any other combination of the final 4 bits already explored.
     61  *
     62  * Similarly when the table size shrinks over time, for example going from
     63  * 16 to 8, if a combination of the lower three bits (the mask for size 8
     64  * is 111) were already completely explored, it would not be visited again
     65  * because we are sure we tried, for example, both 0111 and 1111 (all the
     66  * variations of the higher bit) so we don't need to test it again.
     67  *
     68  * WAIT... YOU HAVE *TWO* TABLES DURING REHASHING!
     69  *
     70  * Yes, this is true, but we always iterate the smaller table first, then
     71  * we test all the expansions of the current cursor into the larger
     72  * table. For example if the current cursor is 101 and we also have a
     73  * larger table of size 16, we also test (0)101 and (1)101 inside the larger
     74  * table. This reduces the problem back to having only one table, where
     75  * the larger one, if it exists, is just an expansion of the smaller one.
     76  *
     77  * LIMITATIONS
     78  *
     79  * This iterator is completely stateless, and this is a huge advantage,
     80  * including no additional memory used.
     81  *
     82  * The disadvantages resulting from this design are:
     83  *
     84  * 1) It is possible we return elements more than once. However this is usually
     85  *    easy to deal with in the application level.
     86  * 2) The iterator must return multiple elements per call, as it needs to always
     87  *    return all the keys chained in a given bucket, and all the expansions, so
     88  *    we are sure we don't miss keys moving during rehashing.
     89  * 3) The reverse cursor is somewhat hard to understand at first, but this
     90  *    comment is supposed to help.
     91  */
     92 unsigned long dictScan(dict *d,
     93                        unsigned long v,
     94                        dictScanFunction *fn,
     95                        dictScanBucketFunction* bucketfn,
     96                        void *privdata)
     97 {
     98     dictht *t0, *t1;
     99     const dictEntry *de, *next;
    100     unsigned long m0, m1;
    101 
    102     if (dictSize(d) == 0) return 0;
    103 
    104     if (!dictIsRehashing(d)) {
    105         /**
    106          * 如果不是处于rehash过程中,只需要遍历哈希表ht[0],然后使用用户定义的函数fn对齐进行处理
    107          */
    108         t0 = &(d->ht[0]);
    109         m0 = t0->sizemask;
    110 
    111         /* Emit entries at cursor */
    112         if (bucketfn) bucketfn(privdata, &t0->table[v & m0]);
    113         de = t0->table[v & m0];
    114         while (de) {
    115             next = de->next;
    116             fn(privdata, de);
    117             de = next;
    118         }
    119 
    120         /* Set unmasked bits so incrementing the reversed cursor
    121          * operates on the masked bits */
    122         v |= ~m0;
    123 
    124         /* Increment the reverse cursor */
    125         v = rev(v);
    126         v++;
    127         v = rev(v);
    128 
    129     } else {
    130         /**
    131          * 如果处于rehash过程中,先遍历较小的哈希表,再遍历较大的哈希表,同时使用用户定义的函数fn对齐进行处理
    132          */
    133         t0 = &d->ht[0];
    134         t1 = &d->ht[1];
    135 
    136         /* Make sure t0 is the smaller and t1 is the bigger table 根据哈希表的大小调整遍历顺序*/
    137         if (t0->size > t1->size) {
    138             t0 = &d->ht[1];
    139             t1 = &d->ht[0];
    140         }
    141 
    142         //获取两张哈希表的掩码
    143         m0 = t0->sizemask;
    144         m1 = t1->sizemask;
    145 
    146         /* Emit entries at cursor */
    147         if (bucketfn) bucketfn(privdata, &t0->table[v & m0]);
    148         //根据v&m0,找到t0中需要迭代的bucket,然后迭代其中的每个节点即可。
    149         de = t0->table[v & m0];
    150         while (de) {
    151             next = de->next;
    152             fn(privdata, de);
    153             de = next;
    154         }
    155 
    156         /**
    157          * Iterate over indices in larger table that are the expansion
    158          * of the index pointed to by the cursor in the smaller table
    159          * 接下来的代码稍显复杂,但是,本质上,就是t0中,索引为v&m0的bucket中的所有节点,
    160          * 再其扩展到t1中后,遍历其所有可能的bucket中的节点。语言不好描述,
    161          * 举个例子就明白了:若t0长度为8,则m0为111,v&m0就是保留v的低三位,假设为abc。
    162          * 若t1长度为32,则m1为11111,该过程就是:遍历完t0中索引为abc的bucket之后,接着遍历t1中,
    163          * 索引为00abc、01abc、10abc、11abc的bucket中的节点。
    164          */
    165         do {
    166             /* Emit entries at cursor */
    167             if (bucketfn) bucketfn(privdata, &t1->table[v & m1]);
    168             de = t1->table[v & m1];
    169             while (de) {
    170                 next = de->next;
    171                 fn(privdata, de);
    172                 de = next;
    173             }
    174 
    175             /* Increment the reverse cursor not covered by the smaller mask.*/
    176             v |= ~m1;
    177             v = rev(v);
    178             v++;
    179             v = rev(v);
    180 
    181             /* Continue while bits covered by mask difference is non-zero */
    182         } while (v & (m0 ^ m1));
    183     }
    184 
    185     //返回下一个需要遍历的位置
    186     return v;
    187 }

    9.3.2 核心算法测试测试

    下面是抽取核心代码的逻辑而写的测试代码

     1 #include <iostream>
     2 #include <vector>
     3 
     4 using namespace std;
     5 
     6 static unsigned long rev(unsigned long v) {
     7     unsigned long s = 8 * sizeof(v); // bit size; must be power of 2
     8     unsigned long mask = ~0;
     9     while ((s >>= 1) > 0) {
    10         mask ^= (mask << s);
    11         v = ((v >> s) & mask) | ((v << s) & ~mask);
    12     }
    13     return v;
    14 }
    15 
    16 void printbits(int n, int x)
    17 {
    18     vector<int> reg;
    19     int bit = 1;
    20     while (x)
    21     {
    22         reg.push_back((n & bit) ? 1 : 0);
    23         bit *= 2;
    24         x--;
    25     }
    26     for (int i = reg.size() - 1; i >= 0; i--)
    27         cout << reg[i];
    28 }
    29 
    30 void test_dictScan_iter(int smalltablesize, int largetablesize)
    31 {
    32     unsigned long v;
    33     unsigned long m0, m1;
    34 
    35     v = 0;
    36     m0 = smalltablesize - 1;
    37     m1 = largetablesize - 1;
    38 
    39     do
    40     {
    41         printf("
    small v is: ");
    42         printbits(v & m0, (int)log2(smalltablesize));
    43         printf("
    ");
    44         int vt = v;
    45 
    46         do
    47         {
    48             printf("large v is: ");
    49             printbits(vt & m1, (int)log2(largetablesize));
    50             printf("
    ");
    51 
    52             vt |= ~m1;
    53             vt = rev(vt);
    54             vt++;
    55             vt = rev(vt);
    56 
    57             //v = (((v | m0) + 1) & ~m0) | (v & m0);
    58         } while (vt & (m0 ^ m1));
    59 
    60         v |= ~m0;
    61         v = rev(v);
    62         v++;
    63         v = rev(v);
    64     } while (v != 0);
    65 }
    66 
    67 int main()
    68 {
    69     test_dictScan_iter(8, 32);
    70     return 0;
    71 }

    测试结果

     1 small v is: 000
     2 large v is: 00000
     3 large v is: 01000
     4 large v is: 10000
     5 large v is: 11000
     6 
     7 small v is: 100
     8 large v is: 00100
     9 large v is: 01100
    10 large v is: 10100
    11 large v is: 11100
    12 
    13 small v is: 010
    14 large v is: 00010
    15 large v is: 01010
    16 large v is: 10010
    17 large v is: 11010
    18 
    19 small v is: 110
    20 large v is: 00110
    21 large v is: 01110
    22 large v is: 10110
    23 large v is: 11110
    24 
    25 small v is: 001
    26 large v is: 00001
    27 large v is: 01001
    28 large v is: 10001
    29 large v is: 11001
    30 
    31 small v is: 101
    32 large v is: 00101
    33 large v is: 01101
    34 large v is: 10101
    35 large v is: 11101
    36 
    37 small v is: 011
    38 large v is: 00011
    39 large v is: 01011
    40 large v is: 10011
    41 large v is: 11011
    42 
    43 small v is: 111
    44 large v is: 00111
    45 large v is: 01111
    46 large v is: 10111
    47 large v is: 11111

    可见,无论v取何值,只要字典开始扩展了,都会遍历大表中,相应于小表的所有节点。具体的核心逻辑代码如下:

     1 do {
     2     de = t1->table[v & m1];
     3 
     4     ...
     5 
     6     /* Increment the reverse cursor not covered by the smaller mask.*/
     7     v |= ~m1;
     8     v = rev(v);
     9     v++;
    10     v = rev(v);
    11 
    12     /* Continue while bits covered by mask difference is non-zero */
    13 } while (v & (m0 ^ m1));

    首先迭代t1中,索引为v&m1的bucket,然后就是对v的低m1-m0位加1,并保留v的低m0位。循环条件v &(m0 ^ m1),表示直到v的低m1-m0位到低m1位之间全部为0为止。

    参考文章

    https://www.cnblogs.com/chinxi/p/12235526.html

    本文来自博客园,作者:Mr-xxx,转载请注明原文链接:https://www.cnblogs.com/MrLiuZF/p/14970967.html

  • 相关阅读:
    前端学习笔记系列一:5 在项目中引入阿里图标icon
    前端学习笔记系列一:3 Vue中的nextTick
    前端学习笔记系列一:4 vue中@click.native
    学习习惯
    美团作价27亿美元正式收购摩拜
    北京 一卡通 退卡
    愚人自以慧,智者自以愚。
    袁隆平分享8字成功经验
    性能计数器 叹号
    升级 windows 2016
  • 原文地址:https://www.cnblogs.com/MrLiuZF/p/14970967.html
Copyright © 2020-2023  润新知