crush_do_rule

文件读写接口：

例如：rados_write(ioctx, "foo", buf, sizeof(buf), 0)

 1 /**
 2  * Write *len* bytes from *buf* into the *oid* object, starting at
 3  * offset *off*. The value of *len* must be <= UINT_MAX/2.
 4  *
 5  * @note This will never return a positive value not equal to len.   
 6  * @param io the io context in which the write will occur
 7  * @param oid name of the object       // 对象名称
 8  * @param buf data to write            // 对象内容
 9  * @param len length of the data, in bytes  // 写的长度
10  * @param off byte offset in the object to begin writing at  // 写起始位置
11  * @returns 0 on success, negative error code on failure
12  */
13 CEPH_RADOS_API int rados_write(rados_ioctx_t io, const char *oid,
14                                const char *buf, size_t len, uint64_t off);

do_rule函数：

const WeightVector& weight为每个item的weight

 1   template<typename WeightVector>
 2   void do_rule(int rule, int x, std::vector<int>& out, int maxout,
 3            const WeightVector& weight,
 4            uint64_t choose_args_index) const {
 5     int rawout[maxout];
 6     char work[crush_work_size(crush, maxout)];
 7     crush_init_workspace(crush, work);   
 8     crush_choose_arg_map arg_map = choose_args_get_with_fallback(
 9       choose_args_index);
10     int numrep = crush_do_rule(crush, rule, x, rawout, maxout, &weight[0],
11                    weight.size(), work, arg_map.args);
12     if (numrep < 0)
13       numrep = 0;
14     out.resize(numrep);
15     for (int i=0; i<numrep; i++)
16       out[i] = rawout[i];
17   }

函数原型：

  1 /**
  2  * crush_do_rule - calculate a mapping with the given input and rule
  3  * @map: the crush_map
  4  * @ruleno: the rule id       
  5  * @x: hash input                    对象的id和pool id进行hash后输出的值
  6  * @result: pointer to result vector
  7  * @result_max: maximum result size
  8  * @weight: weight vector (for map leaves)
  9  * @weight_max: size of weight vector
 10  * @cwin: Pointer to at least map->working_size bytes of memory or NULL.
 11  */
 12 int crush_do_rule(const struct crush_map *map,
 13           int ruleno, int x, int *result, int result_max,
 14           const __u32 *weight, int weight_max,
 15           void *cwin, const struct crush_choose_arg *choose_args)
 16 {
 17     int result_len;
 18     struct crush_work *cw = cwin;
 19     int *a = (int *)((char *)cw + map->working_size);
 20     int *b = a + result_max;
 21     int *c = b + result_max;
 22     int *w = a;
 23     int *o = b;
         /* a, b, c 分别指向 scratch向量的0, 1/3, 2/3的位置. 
           w = a; o = b; 
         - w被用作一个先入先出队列来在CRUSH map中进行横向优先搜索(BFS traversal). 
         - o存储crush_choose_firstn选择的结果. 
         - c存储最终的OSD选择结果. 
           crush_choose_firstn计算后如果结果不是OSD类型, o 交给w.以便于 w成为下次crush_choose_firstn的输入参数. 
         如上所述, crush_do_rule 反复进行 crushrules 迭代. 你可以在内存中发现规则: */
 24     int recurse_to_leaf;
 25     int wsize = 0;
 26     int osize;
 27     int *tmp;
 28     const struct crush_rule *rule;
 29     __u32 step;
 30     int i, j;
 31     int numrep;
 32     int out_size;
 33     /*
 34      * the original choose_total_tries value was off by one (it
 35      * counted "retries" and not "tries").  add one.
 36      */
 37     int choose_tries = map->choose_total_tries + 1;
 38     int choose_leaf_tries = 0;
 39     /*
 40      * the local tries values were counted as "retries", though,
 41      * and need no adjustment
 42      */
 43     int choose_local_retries = map->choose_local_tries;
 44     int choose_local_fallback_retries = map->choose_local_fallback_tries;
 45 
 46     int vary_r = map->chooseleaf_vary_r;
 47     int stable = map->chooseleaf_stable;
 48 
 49     if ((__u32)ruleno >= map->max_rules) {
 50         dprintk(" bad ruleno %d
", ruleno);
 51         return 0;
 52     }
 53 
 54     rule = map->rules[ruleno];
 55     result_len = 0;
 56 
 57     for (step = 0; step < rule->len; step++) {
 58         int firstn = 0;
 59         const struct crush_rule_step *curstep = &rule->steps[step];
 60 
 61         switch (curstep->op) {
 62         case CRUSH_RULE_TAKE:
 63             if ((curstep->arg1 >= 0 &&
 64                  curstep->arg1 < map->max_devices) ||
 65                 (-1-curstep->arg1 >= 0 &&
 66                  -1-curstep->arg1 < map->max_buckets &&
 67                  map->buckets[-1-curstep->arg1])) {
 68                 w[0] = curstep->arg1;
 69                 wsize = 1;
 70             } else {
 71                 dprintk(" bad take value %d
", curstep->arg1);
 72             }
 73             break;
 74 
 75         case CRUSH_RULE_SET_CHOOSE_TRIES:
 76             if (curstep->arg1 > 0)
 77                 choose_tries = curstep->arg1;
 78             break;
 79 
 80         case CRUSH_RULE_SET_CHOOSELEAF_TRIES:
 81             if (curstep->arg1 > 0)
 82                 choose_leaf_tries = curstep->arg1;
 83             break;
 84 
 85         case CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES:
 86             if (curstep->arg1 >= 0)
 87                 choose_local_retries = curstep->arg1;
 88             break;
 89 
 90         case CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES:
 91             if (curstep->arg1 >= 0)
 92                 choose_local_fallback_retries = curstep->arg1;
 93             break;
 94 
 95         case CRUSH_RULE_SET_CHOOSELEAF_VARY_R:
 96             if (curstep->arg1 >= 0)
 97                 vary_r = curstep->arg1;
 98             break;
 99 
100         case CRUSH_RULE_SET_CHOOSELEAF_STABLE:
101             if (curstep->arg1 >= 0)
102                 stable = curstep->arg1;
103             break;
104 
105         case CRUSH_RULE_CHOOSELEAF_FIRSTN:
106         case CRUSH_RULE_CHOOSE_FIRSTN:
107             firstn = 1;
108             /* fall through */
109         case CRUSH_RULE_CHOOSELEAF_INDEP:
110         case CRUSH_RULE_CHOOSE_INDEP:
111             if (wsize == 0)
112                 break;
113 
114             recurse_to_leaf =
115                 curstep->op ==
116                  CRUSH_RULE_CHOOSELEAF_FIRSTN ||
117                 curstep->op ==
118                 CRUSH_RULE_CHOOSELEAF_INDEP;
119 
120             /* reset output */
121             osize = 0;
122 
123             for (i = 0; i < wsize; i++) {
124                 int bno;
125                 numrep = curstep->arg1;
126                 if (numrep <= 0) {
127                     numrep += result_max;
128                     if (numrep <= 0)
129                         continue;
130                 }
131                 j = 0;
132                 /* make sure bucket id is valid */
133                 bno = -1 - w[i];
134                 if (bno < 0 || bno >= map->max_buckets) {
135                     // w[i] is probably CRUSH_ITEM_NONE
136                     dprintk("  bad w[i] %d
", w[i]);
137                     continue;
138                 }
139                 if (firstn) {
140                     int recurse_tries;
141                     if (choose_leaf_tries)
142                         recurse_tries =
143                             choose_leaf_tries;
144                     else if (map->chooseleaf_descend_once)
145                         recurse_tries = 1;
146                     else
147                         recurse_tries = choose_tries;
148                     osize += crush_choose_firstn(
149                         map,
150                         cw,
151                         map->buckets[bno],
152                         weight, weight_max,
153                         x, numrep,
154                         curstep->arg2,
155                         o+osize, j,
156                         result_max-osize,
157                         choose_tries,
158                         recurse_tries,
159                         choose_local_retries,
160                         choose_local_fallback_retries,
161                         recurse_to_leaf,
162                         vary_r,
163                         stable,
164                         c+osize,
165                         0,
166                         choose_args);
167                 } else {
168                     out_size = ((numrep < (result_max-osize)) ?
169                             numrep : (result_max-osize));
170                     crush_choose_indep(
171                         map,
172                         cw,
173                         map->buckets[bno],
174                         weight, weight_max,
175                         x, out_size, numrep,
176                         curstep->arg2,
177                         o+osize, j,
178                         choose_tries,
179                         choose_leaf_tries ?
180                            choose_leaf_tries : 1,
181                         recurse_to_leaf,
182                         c+osize,
183                         0,
184                         choose_args);
185                     osize += out_size;
186                 }
187             }
188 
189             if (recurse_to_leaf)
190                 /* copy final _leaf_ values to output set */
191                 memcpy(o, c, osize*sizeof(*o));
192 
193             /* swap o and w arrays */
194             tmp = o;
195             o = w;
196             w = tmp;
197             wsize = osize;
198             break;
199 
200 
201         case CRUSH_RULE_EMIT:
202             for (i = 0; i < wsize && result_len < result_max; i++) {
203                 result[result_len] = w[i];
204                 result_len++;
205             }
206             wsize = 0;
207             break;
208 
209         default:
210             dprintk(" unknown op %d at step %d
",
211                 curstep->op, step);
212             break;
213         }
214     }
215 
216     return result_len;
217 }

crush_choose_firstn 函数

这个函数递归的选择特定bucket或者设备,并且可以处理冲突,失败的情况.
如果当前是choose过程,通过调用crush_bucket_choose来直接选择.
如果当前是chooseleaf选择叶子节点的过程,该函数将递归直到得到叶子节点.

crush_bucket_choose 函数

crush_bucket_choose是CRUSH最重要的函数.应为默认的bucket类型是straw,常见的情况下我们会使用straw类型bucket,然后就会进入bucket_straw_choose

 1 static int crush_bucket_choose(const struct crush_bucket *in,
 2                    struct crush_work_bucket *work,
 3                    int x, int r,
 4                                const struct crush_choose_arg *arg,
 5                                int position)
 6 {
 7     dprintk(" crush_bucket_choose %d x=%d r=%d
", in->id, x, r);
 8     BUG_ON(in->size == 0);
 9     switch (in->alg) {
10     case CRUSH_BUCKET_UNIFORM:
11         return bucket_uniform_choose(
12             (const struct crush_bucket_uniform *)in,
13             work, x, r);
14     case CRUSH_BUCKET_LIST:
15         return bucket_list_choose((const struct crush_bucket_list *)in,
16                       x, r);
17     case CRUSH_BUCKET_TREE:
18         return bucket_tree_choose((const struct crush_bucket_tree *)in,
19                       x, r);
20     case CRUSH_BUCKET_STRAW:
21         return bucket_straw_choose(
22             (const struct crush_bucket_straw *)in,
23             x, r);
24     case CRUSH_BUCKET_STRAW2:  // 默认算法
25         return bucket_straw2_choose(
26             (const struct crush_bucket_straw2 *)in,
27             x, r, arg, position);
28     default:
29         dprintk("unknown bucket %d alg %d
", in->id, in->alg);
30         return in->items[0];
31     }
32 }

关键选择函数：

 1 static int bucket_straw2_choose(const struct crush_bucket_straw2 *bucket,
 2                 int x, int r, const struct crush_choose_arg *arg,
 3                                 int position)
 4 {
 5     unsigned int i, high = 0;
 6     __s64 draw, high_draw = 0;
 7     __u32 *weights = get_choose_arg_weights(bucket, arg, position);
 8     __s32 *ids = get_choose_arg_ids(bucket, arg);
 9     for (i = 0; i < bucket->h.size; i++) {
10                 dprintk("weight 0x%x item %d
", weights[i], ids[i]);
11         if (weights[i]) {
12             draw = generate_exponential_distribution(bucket->h.hash, x, ids[i], r, weights[i]);
13         } else {
14             draw = S64_MIN;
15         }
16 
17         if (i == 0 || draw > high_draw) {
18             high = i;
19             high_draw = draw;
20         }
21     }
22 
23     return bucket->h.items[high];
24 }

相关阅读:
android状态栏
 python基础（二）——文件操作
 python基础（一）——字符串
 每日读书
 gradle相关
 EditText整体hint
java生成pdf
Androidstudio点9图报错
 【ASP.NET Core学习】使用JWT认证授权
 数据访问仓储模式的事务管理(Unit of Work)
原文地址：https://www.cnblogs.com/yunlion/p/10690843.html