• crush_do_rule


    文件读写接口:

    例如:rados_write(ioctx, "foo", buf, sizeof(buf), 0)

     1 /**
     2  * Write *len* bytes from *buf* into the *oid* object, starting at
     3  * offset *off*. The value of *len* must be <= UINT_MAX/2.
     4  *
     5  * @note This will never return a positive value not equal to len.   
     6  * @param io the io context in which the write will occur
     7  * @param oid name of the object       // 对象名称
     8  * @param buf data to write            // 对象内容
     9  * @param len length of the data, in bytes  // 写的长度
    10  * @param off byte offset in the object to begin writing at  // 写起始位置
    11  * @returns 0 on success, negative error code on failure
    12  */
    13 CEPH_RADOS_API int rados_write(rados_ioctx_t io, const char *oid,
    14                                const char *buf, size_t len, uint64_t off);

    do_rule函数:

    const WeightVector& weight为每个item的weight
     1   template<typename WeightVector>
     2   void do_rule(int rule, int x, std::vector<int>& out, int maxout,
     3            const WeightVector& weight,
     4            uint64_t choose_args_index) const {
     5     int rawout[maxout];
     6     char work[crush_work_size(crush, maxout)];
     7     crush_init_workspace(crush, work);   
     8     crush_choose_arg_map arg_map = choose_args_get_with_fallback(
     9       choose_args_index);
    10     int numrep = crush_do_rule(crush, rule, x, rawout, maxout, &weight[0],
    11                    weight.size(), work, arg_map.args);
    12     if (numrep < 0)
    13       numrep = 0;
    14     out.resize(numrep);
    15     for (int i=0; i<numrep; i++)
    16       out[i] = rawout[i];
    17   }

    函数原型:

      1 /**
      2  * crush_do_rule - calculate a mapping with the given input and rule
      3  * @map: the crush_map
      4  * @ruleno: the rule id       
      5  * @x: hash input                    对象的id和pool id进行hash后输出的值
      6  * @result: pointer to result vector
      7  * @result_max: maximum result size
      8  * @weight: weight vector (for map leaves)
      9  * @weight_max: size of weight vector
     10  * @cwin: Pointer to at least map->working_size bytes of memory or NULL.
     11  */
     12 int crush_do_rule(const struct crush_map *map,
     13           int ruleno, int x, int *result, int result_max,
     14           const __u32 *weight, int weight_max,
     15           void *cwin, const struct crush_choose_arg *choose_args)
     16 {
     17     int result_len;
     18     struct crush_work *cw = cwin;
     19     int *a = (int *)((char *)cw + map->working_size);
     20     int *b = a + result_max;
     21     int *c = b + result_max;
     22     int *w = a;
     23     int *o = b;
    /* a, b, c 分别指向 scratch向量的0, 1/3, 2/3的位置. 
    w = a; o = b; 
    w被用作一个先入先出队列来在CRUSH map中进行横向优先搜索(BFS traversal). 
    o存储crush_choose_firstn选择的结果. 
    c存储最终的OSD选择结果. 
    crush_choose_firstn计算后如果结果不是OSD类型, o 交给w.以便于 w成为下次crush_choose_firstn的输入参数.
    如上所述, crush_do_rule 反复进行 crushrules 迭代. 你可以在内存中发现规则: */
    24 int recurse_to_leaf; 25 int wsize = 0; 26 int osize; 27 int *tmp; 28 const struct crush_rule *rule; 29 __u32 step; 30 int i, j; 31 int numrep; 32 int out_size; 33 /* 34 * the original choose_total_tries value was off by one (it 35 * counted "retries" and not "tries"). add one. 36 */ 37 int choose_tries = map->choose_total_tries + 1; 38 int choose_leaf_tries = 0; 39 /* 40 * the local tries values were counted as "retries", though, 41 * and need no adjustment 42 */ 43 int choose_local_retries = map->choose_local_tries; 44 int choose_local_fallback_retries = map->choose_local_fallback_tries; 45 46 int vary_r = map->chooseleaf_vary_r; 47 int stable = map->chooseleaf_stable; 48 49 if ((__u32)ruleno >= map->max_rules) { 50 dprintk(" bad ruleno %d ", ruleno); 51 return 0; 52 } 53 54 rule = map->rules[ruleno]; 55 result_len = 0; 56 57 for (step = 0; step < rule->len; step++) { 58 int firstn = 0; 59 const struct crush_rule_step *curstep = &rule->steps[step]; 60 61 switch (curstep->op) { 62 case CRUSH_RULE_TAKE: 63 if ((curstep->arg1 >= 0 && 64 curstep->arg1 < map->max_devices) || 65 (-1-curstep->arg1 >= 0 && 66 -1-curstep->arg1 < map->max_buckets && 67 map->buckets[-1-curstep->arg1])) { 68 w[0] = curstep->arg1; 69 wsize = 1; 70 } else { 71 dprintk(" bad take value %d ", curstep->arg1); 72 } 73 break; 74 75 case CRUSH_RULE_SET_CHOOSE_TRIES: 76 if (curstep->arg1 > 0) 77 choose_tries = curstep->arg1; 78 break; 79 80 case CRUSH_RULE_SET_CHOOSELEAF_TRIES: 81 if (curstep->arg1 > 0) 82 choose_leaf_tries = curstep->arg1; 83 break; 84 85 case CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES: 86 if (curstep->arg1 >= 0) 87 choose_local_retries = curstep->arg1; 88 break; 89 90 case CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES: 91 if (curstep->arg1 >= 0) 92 choose_local_fallback_retries = curstep->arg1; 93 break; 94 95 case CRUSH_RULE_SET_CHOOSELEAF_VARY_R: 96 if (curstep->arg1 >= 0) 97 vary_r = curstep->arg1; 98 break; 99 100 case CRUSH_RULE_SET_CHOOSELEAF_STABLE: 101 if (curstep->arg1 >= 0) 102 stable = curstep->arg1; 103 break; 104 105 case CRUSH_RULE_CHOOSELEAF_FIRSTN: 106 case CRUSH_RULE_CHOOSE_FIRSTN: 107 firstn = 1; 108 /* fall through */ 109 case CRUSH_RULE_CHOOSELEAF_INDEP: 110 case CRUSH_RULE_CHOOSE_INDEP: 111 if (wsize == 0) 112 break; 113 114 recurse_to_leaf = 115 curstep->op == 116 CRUSH_RULE_CHOOSELEAF_FIRSTN || 117 curstep->op == 118 CRUSH_RULE_CHOOSELEAF_INDEP; 119 120 /* reset output */ 121 osize = 0; 122 123 for (i = 0; i < wsize; i++) { 124 int bno; 125 numrep = curstep->arg1; 126 if (numrep <= 0) { 127 numrep += result_max; 128 if (numrep <= 0) 129 continue; 130 } 131 j = 0; 132 /* make sure bucket id is valid */ 133 bno = -1 - w[i]; 134 if (bno < 0 || bno >= map->max_buckets) { 135 // w[i] is probably CRUSH_ITEM_NONE 136 dprintk(" bad w[i] %d ", w[i]); 137 continue; 138 } 139 if (firstn) { 140 int recurse_tries; 141 if (choose_leaf_tries) 142 recurse_tries = 143 choose_leaf_tries; 144 else if (map->chooseleaf_descend_once) 145 recurse_tries = 1; 146 else 147 recurse_tries = choose_tries; 148 osize += crush_choose_firstn( 149 map, 150 cw, 151 map->buckets[bno], 152 weight, weight_max, 153 x, numrep, 154 curstep->arg2, 155 o+osize, j, 156 result_max-osize, 157 choose_tries, 158 recurse_tries, 159 choose_local_retries, 160 choose_local_fallback_retries, 161 recurse_to_leaf, 162 vary_r, 163 stable, 164 c+osize, 165 0, 166 choose_args); 167 } else { 168 out_size = ((numrep < (result_max-osize)) ? 169 numrep : (result_max-osize)); 170 crush_choose_indep( 171 map, 172 cw, 173 map->buckets[bno], 174 weight, weight_max, 175 x, out_size, numrep, 176 curstep->arg2, 177 o+osize, j, 178 choose_tries, 179 choose_leaf_tries ? 180 choose_leaf_tries : 1, 181 recurse_to_leaf, 182 c+osize, 183 0, 184 choose_args); 185 osize += out_size; 186 } 187 } 188 189 if (recurse_to_leaf) 190 /* copy final _leaf_ values to output set */ 191 memcpy(o, c, osize*sizeof(*o)); 192 193 /* swap o and w arrays */ 194 tmp = o; 195 o = w; 196 w = tmp; 197 wsize = osize; 198 break; 199 200 201 case CRUSH_RULE_EMIT: 202 for (i = 0; i < wsize && result_len < result_max; i++) { 203 result[result_len] = w[i]; 204 result_len++; 205 } 206 wsize = 0; 207 break; 208 209 default: 210 dprintk(" unknown op %d at step %d ", 211 curstep->op, step); 212 break; 213 } 214 } 215 216 return result_len; 217 }

    crush_choose_firstn 函数

    这个函数递归的选择特定bucket或者设备,并且可以处理冲突,失败的情况. 
    如果当前是choose过程,通过调用crush_bucket_choose来直接选择. 
    如果当前是chooseleaf选择叶子节点的过程,该函数将递归直到得到叶子节点.

    crush_bucket_choose 函数

    crush_bucket_choose是CRUSH最重要的函数.应为默认的bucket类型是straw,常见的情况下我们会使用straw类型bucket,然后就会进入bucket_straw_choose

     1 static int crush_bucket_choose(const struct crush_bucket *in,
     2                    struct crush_work_bucket *work,
     3                    int x, int r,
     4                                const struct crush_choose_arg *arg,
     5                                int position)
     6 {
     7     dprintk(" crush_bucket_choose %d x=%d r=%d
    ", in->id, x, r);
     8     BUG_ON(in->size == 0);
     9     switch (in->alg) {
    10     case CRUSH_BUCKET_UNIFORM:
    11         return bucket_uniform_choose(
    12             (const struct crush_bucket_uniform *)in,
    13             work, x, r);
    14     case CRUSH_BUCKET_LIST:
    15         return bucket_list_choose((const struct crush_bucket_list *)in,
    16                       x, r);
    17     case CRUSH_BUCKET_TREE:
    18         return bucket_tree_choose((const struct crush_bucket_tree *)in,
    19                       x, r);
    20     case CRUSH_BUCKET_STRAW:
    21         return bucket_straw_choose(
    22             (const struct crush_bucket_straw *)in,
    23             x, r);
    24     case CRUSH_BUCKET_STRAW2:  // 默认算法
    25         return bucket_straw2_choose(
    26             (const struct crush_bucket_straw2 *)in,
    27             x, r, arg, position);
    28     default:
    29         dprintk("unknown bucket %d alg %d
    ", in->id, in->alg);
    30         return in->items[0];
    31     }
    32 }

    关键选择函数:

     1 static int bucket_straw2_choose(const struct crush_bucket_straw2 *bucket,
     2                 int x, int r, const struct crush_choose_arg *arg,
     3                                 int position)
     4 {
     5     unsigned int i, high = 0;
     6     __s64 draw, high_draw = 0;
     7     __u32 *weights = get_choose_arg_weights(bucket, arg, position);
     8     __s32 *ids = get_choose_arg_ids(bucket, arg);
     9     for (i = 0; i < bucket->h.size; i++) {
    10                 dprintk("weight 0x%x item %d
    ", weights[i], ids[i]);
    11         if (weights[i]) {
    12             draw = generate_exponential_distribution(bucket->h.hash, x, ids[i], r, weights[i]);
    13         } else {
    14             draw = S64_MIN;
    15         }
    16 
    17         if (i == 0 || draw > high_draw) {
    18             high = i;
    19             high_draw = draw;
    20         }
    21     }
    22 
    23     return bucket->h.items[high];
    24 }
  • 相关阅读:
    android状态栏
    python基础(二)——文件操作
    python基础(一)——字符串
    每日读书
    gradle相关
    EditText整体hint
    java生成pdf
    Androidstudio点9图报错
    【ASP.NET Core学习】使用JWT认证授权
    数据访问仓储模式的事务管理(Unit of Work)
  • 原文地址:https://www.cnblogs.com/yunlion/p/10690843.html
Copyright © 2020-2023  润新知