处理upcall总体框架:
1.由函数handle_upcalls()批量处理(in batches)的是由内核传上来的dpif_upcalls,会解析出upcall的类型。这里主要看在内核中匹配流表失败的MISS_UPCALL。
处理完毕后会得到多个flow_miss。
结构体dpif_upcall代表的是由内核传到用户空间的一个包,包含上传原因,packet data。以及以netlink attr形式存在的键值。
struct dpif_upcall {
/* All types. */
enum dpif_upcall_type type;
struct ofpbuf *packet; /* Packet data. */
struct nlattr *key; /* Flow key. */
size_t key_len; /* Length of 'key' in bytes. */
/* DPIF_UC_ACTION only. */
uint64_t userdata; /* Argument to OVS_ACTION_ATTR_USERSPACE. */
};
/* All types. */
enum dpif_upcall_type type;
struct ofpbuf *packet; /* Packet data. */
struct nlattr *key; /* Flow key. */
size_t key_len; /* Length of 'key' in bytes. */
/* DPIF_UC_ACTION only. */
uint64_t userdata; /* Argument to OVS_ACTION_ATTR_USERSPACE. */
};
结构体flow_miss是将具有同样流特征的packets统一起来( batching),性能可能会更优,所以这个结构体要将datapath interface相关的数据队列起来。每一个flow_miss相应的是发送的一个或多个数据包,另外可能会在dpif中安装流项。
struct flow_miss {
struct hmap_node hmap_node;
struct flow flow; //流特征。
enum odp_key_fitness key_fitness;
const struct nlattr *key;
size_t key_len;
ovs_be16 initial_tci;
struct list packets; //具有该流特征的全部的packets;
enum dpif_upcall_type upcall_type;
};
struct hmap_node hmap_node;
struct flow flow; //流特征。
enum odp_key_fitness key_fitness;
const struct nlattr *key;
size_t key_len;
ovs_be16 initial_tci;
struct list packets; //具有该流特征的全部的packets;
enum dpif_upcall_type upcall_type;
};
2. 接下来。函数handle_miss_upcalls()会依次遍历这个flow_misses数组,完毕的工作有:1)得到odp_key_fitness (也就是内核层/用户层在流匹配上的一致程度);2)从packet data中析取出流信息miss->flow。3)然后对miss->flow进行哈希。假设不存在则插入到TO-DO-List中。4)将这个upcall->packet插入到对应的节点上。
3.然后对于TO-DO-List中的每一个元素,调用handle_flow_miss()函数。它会从这个flow_miss中构造得到flow_miss_op,详细的过程是:1)查询ofproto的facet表ofproto->facets看针对这个flow的facet是否已存在。2)从ofproto的分类表中查找与这个flow相应的分类规则,对于第一个进入系统的包,还没有建立起cls_rule。此时返回ofproto->miss_rule(是怎样初始化的呢?);3)构造一个facet,和当前的flow和rule_dpif关联起来;4)这时候与flow_miss 匹配的facet也有了,接着呼叫函数 handle_flow_miss_with_facet()可能会添加须要的操作到flow_miss_op中。详细过程是:先是通过内核传上来的key找subfacet是否存在,假设不存在就构建一个;然后针对每一个连接到这个flow_miss中的packet进行分别处理;handle_flow_miss_common()会推断假设rule->up.cr.priority = FAIL_OPEN_PRIORITY的话就会发送一个packetin到SDN Controller;对于刚创建的subfacet,其actions为空,所以函数subfacet_make_actions()会依据subfacet中的rule来创建datapath action,存储在odp_actions中。假设upcall的类型是DPIF_UC_MISS。就创建一个DPIF_OP_FLOW_PUT类型的flow_miss_op(即dpif_flow_put),然后compose_slow_path()会构建一个用户空间的user_action_cookie,它的类型是USER_ACTION_COOKIE_SLOW_PATH 表示这个流得到了用户空间的处理。然后-> odp_put_userspace_action() 会添加一个OVS_ACTION_ATTR_USERSPACE action到odp_actions中,属性值包含netlink pid 和 刚才的cookie。
struct flow_miss_op {
struct dpif_op dpif_op; //据此能够得到操作类型handler;
struct subfacet *subfacet; // Subfacet ,据此能够得到全部的flow和rule等数据。
void *garbage; /* Pointer to pass to free(), NULL if none. */
uint64_t stub[1024 / 8]; /* Temporary buffer. */
};
struct dpif_op dpif_op; //据此能够得到操作类型handler;
struct subfacet *subfacet; // Subfacet ,据此能够得到全部的flow和rule等数据。
void *garbage; /* Pointer to pass to free(), NULL if none. */
uint64_t stub[1024 / 8]; /* Temporary buffer. */
};
struct dpif_op {
enum dpif_op_type type;
int error;
union {
struct dpif_flow_put flow_put;
struct dpif_flow_del flow_del;
struct dpif_execute execute;
} u;
};
enum dpif_op_type type;
int error;
union {
struct dpif_flow_put flow_put;
struct dpif_flow_del flow_del;
struct dpif_execute execute;
} u;
};
enum dpif_op_type {
DPIF_OP_FLOW_PUT = 1,
DPIF_OP_FLOW_DEL,
DPIF_OP_EXECUTE,
};
DPIF_OP_FLOW_PUT = 1,
DPIF_OP_FLOW_DEL,
DPIF_OP_EXECUTE,
};
结构体facet是openflow flow的全然匹配( exact-match)的实例抽象。它与"struct flow"关联。代表OVS用户空间对于exact match flow的观点。有一个或多个subfacet。每一个subfacet追踪着内核层datapath对于这个exact-match flow 的观点。当内核层和用户空间对一个flow key观点一致的时候,就仅仅有一个subfacet(通常如此)。很多其它理解參考[]。
struct facet {
/* Owners. */
struct hmap_node hmap_node; /* In owning ofproto's 'facets' hmap. */
struct list list_node; /* In owning rule's 'facets' list. */
struct rule_dpif *rule; /* Owning rule. */
/* Owned data. */
struct list subfacets;
long long int used; /* Time last used; time created if not used. */
/* Key. */
struct flow flow;
// 接下来是 一些统计字段;
/* Storage for a single subfacet, to reduce malloc() time and space
* overhead. (A facet always has at least one subfacet and in the common
* case has exactly one subfacet.) */
struct subfacet one_subfacet;
};
/* Owners. */
struct hmap_node hmap_node; /* In owning ofproto's 'facets' hmap. */
struct list list_node; /* In owning rule's 'facets' list. */
struct rule_dpif *rule; /* Owning rule. */
/* Owned data. */
struct list subfacets;
long long int used; /* Time last used; time created if not used. */
/* Key. */
struct flow flow;
// 接下来是 一些统计字段;
/* Storage for a single subfacet, to reduce malloc() time and space
* overhead. (A facet always has at least one subfacet and in the common
* case has exactly one subfacet.) */
struct subfacet one_subfacet;
};
struct rule_dpif {
struct rule up;
uint64_t packet_count; /* Number of packets received. */
uint64_t byte_count; /* Number of bytes received. */
tag_type tag; /* Caches rule_calculate_tag() result. */
struct list facets; /* List of "struct facet"s. */
};
struct rule up;
uint64_t packet_count; /* Number of packets received. */
uint64_t byte_count; /* Number of bytes received. */
tag_type tag; /* Caches rule_calculate_tag() result. */
struct list facets; /* List of "struct facet"s. */
};
/* An OpenFlow flow within a "struct ofproto".
*
* With few exceptions, ofproto implementations may look at these fields but
* should not modify them. */
struct rule {
struct list ofproto_node; /* Owned by ofproto base code. */
struct ofproto *ofproto; /* The ofproto that contains this rule. */
struct cls_rule cr; /* In owning ofproto's classifier. */
struct ofoperation *pending; /* Operation now in progress, if nonnull. */
ovs_be64 flow_cookie; /* Controller-issued identifier. */
long long int created; /* Creation time. */
long long int modified; /* Time of last modification. */
long long int used; /* Last use; time created if never used. */
uint16_t hard_timeout; /* In seconds from ->modified. */
uint16_t idle_timeout; /* In seconds from ->used. */
uint8_t table_id; /* Index in ofproto's 'tables' array. */
bool send_flow_removed; /* Send a flow removed message? */
/* Eviction groups. */
bool evictable; /* If false, prevents eviction. */
struct heap_node evg_node; /* In eviction_group's "rules" heap. */
struct eviction_group *eviction_group; /* NULL if not in any group. */
struct ofpact *ofpacts; /* Sequence of "struct ofpacts". */
unsigned int ofpacts_len; /* Size of 'ofpacts', in bytes. */
/* Flow monitors. */
enum nx_flow_monitor_flags monitor_flags;
uint64_t add_seqno; /* Sequence number when added. */
uint64_t modify_seqno; /* Sequence number when changed. */
};
*
* With few exceptions, ofproto implementations may look at these fields but
* should not modify them. */
struct rule {
struct list ofproto_node; /* Owned by ofproto base code. */
struct ofproto *ofproto; /* The ofproto that contains this rule. */
struct cls_rule cr; /* In owning ofproto's classifier. */
struct ofoperation *pending; /* Operation now in progress, if nonnull. */
ovs_be64 flow_cookie; /* Controller-issued identifier. */
long long int created; /* Creation time. */
long long int modified; /* Time of last modification. */
long long int used; /* Last use; time created if never used. */
uint16_t hard_timeout; /* In seconds from ->modified. */
uint16_t idle_timeout; /* In seconds from ->used. */
uint8_t table_id; /* Index in ofproto's 'tables' array. */
bool send_flow_removed; /* Send a flow removed message? */
/* Eviction groups. */
bool evictable; /* If false, prevents eviction. */
struct heap_node evg_node; /* In eviction_group's "rules" heap. */
struct eviction_group *eviction_group; /* NULL if not in any group. */
struct ofpact *ofpacts; /* Sequence of "struct ofpacts". */
unsigned int ofpacts_len; /* Size of 'ofpacts', in bytes. */
/* Flow monitors. */
enum nx_flow_monitor_flags monitor_flags;
uint64_t add_seqno; /* Sequence number when added. */
uint64_t modify_seqno; /* Sequence number when changed. */
};
struct subfacet {
/* Owners. */
struct hmap_node hmap_node; /* In struct ofproto_dpif 'subfacets' list. */
struct list list_node; /* In struct facet's 'facets' list. */
struct facet *facet; /* Owning facet. */
/* Key.
*
* To save memory in the common case, 'key' is NULL if 'key_fitness' is
* ODP_FIT_PERFECT, that is, odp_flow_key_from_flow() can accurately
* regenerate the ODP flow key from ->facet->flow. */
enum odp_key_fitness key_fitness;
struct nlattr *key;
int key_len;
long long int used; /* Time last used; time created if not used. */
uint64_t dp_packet_count; /* Last known packet count in the datapath. */
uint64_t dp_byte_count; /* Last known byte count in the datapath. */
/* Datapath actions.
*
* These should be essentially identical for every subfacet in a facet, but
* may differ in trivial ways due to VLAN splinters. */
size_t actions_len; /* Number of bytes in actions[]. */
struct nlattr *actions; /* Datapath actions. */
enum slow_path_reason slow; /* 0 if fast path may be used. */
enum subfacet_path path; /* Installed in datapath? */
};
/* Owners. */
struct hmap_node hmap_node; /* In struct ofproto_dpif 'subfacets' list. */
struct list list_node; /* In struct facet's 'facets' list. */
struct facet *facet; /* Owning facet. */
/* Key.
*
* To save memory in the common case, 'key' is NULL if 'key_fitness' is
* ODP_FIT_PERFECT, that is, odp_flow_key_from_flow() can accurately
* regenerate the ODP flow key from ->facet->flow. */
enum odp_key_fitness key_fitness;
struct nlattr *key;
int key_len;
long long int used; /* Time last used; time created if not used. */
uint64_t dp_packet_count; /* Last known packet count in the datapath. */
uint64_t dp_byte_count; /* Last known byte count in the datapath. */
/* Datapath actions.
*
* These should be essentially identical for every subfacet in a facet, but
* may differ in trivial ways due to VLAN splinters. */
size_t actions_len; /* Number of bytes in actions[]. */
struct nlattr *actions; /* Datapath actions. */
enum slow_path_reason slow; /* 0 if fast path may be used. */
enum subfacet_path path; /* Installed in datapath? */
};
枚举体slow_path_reason 列举的是packet没有在内核层被转发的原因(也就是说这个packet是fast path)。
enum slow_path_reason {
/* These reasons are mutually exclusive. */
SLOW_CFM = 1 << 0, /* CFM packets need per-packet processing. */
SLOW_LACP = 1 << 1, /* LACP packets need per-packet processing. */
SLOW_STP = 1 << 2, /* STP packets need per-packet processing. */
SLOW_IN_BAND = 1 << 3, /* In-band control needs every packet. */
enum slow_path_reason {
/* These reasons are mutually exclusive. */
SLOW_CFM = 1 << 0, /* CFM packets need per-packet processing. */
SLOW_LACP = 1 << 1, /* LACP packets need per-packet processing. */
SLOW_STP = 1 << 2, /* STP packets need per-packet processing. */
SLOW_IN_BAND = 1 << 3, /* In-band control needs every packet. */
// 和 SLOW_CFM, SLOW_LACP, SLOW_STP相互排斥,能够和SLOW_IN_BAND组合。
SLOW_CONTROLLER = 1 << 4, /* Packets must go to OpenFlow controller. */
};
SLOW_CONTROLLER = 1 << 4, /* Packets must go to OpenFlow controller. */
};
枚举体subfacet_path列举的是其可能的当前状态:1)SF_NOT_INSTALLED表示没有安装在datapath中,这样的情况出如今这个subfacet构建之后,销毁之前,或者当我们在安装一个subfacet到datapath时出错。由于subfacet中相应的有action,所以这里的facet install指的是datapath运行了由用户空间下发的详细action。2)SF_FAST_PATH说明相应的action已经得到了运行,packets能够在内核层直接转发;3)SF_SLOW_PATH是流规则指定了要发往用户空间。
enum subfacet_path {
SF_NOT_INSTALLED, /* No datapath flow for this subfacet. */
SF_FAST_PATH, /* Full actions are installed. */
SF_SLOW_PATH, /* Send-to-userspace action is installed. */
};
4. 通过上面的操作,flow_miss_op数组就得到了。接下来调用函数 dpif_operate() 依次对dpif运行这些operation。
for (i = 0; i < n_ops; i++) {
struct dpif_op *op = ops[i];
switch (op->type) {
case DPIF_OP_FLOW_PUT:
op->error = dpif_flow_put__(dpif, &op->u.flow_put);
break;
case DPIF_OP_FLOW_DEL:
op->error = dpif_flow_del__(dpif, &op->u.flow_del);
break;
case DPIF_OP_EXECUTE:
op->error = dpif_execute__(dpif, &op->u.execute);
break;
default:
NOT_REACHED();
struct dpif_op *op = ops[i];
switch (op->type) {
case DPIF_OP_FLOW_PUT:
op->error = dpif_flow_put__(dpif, &op->u.flow_put);
break;
case DPIF_OP_FLOW_DEL:
op->error = dpif_flow_del__(dpif, &op->u.flow_del);
break;
case DPIF_OP_EXECUTE:
op->error = dpif_execute__(dpif, &op->u.execute);
break;
default:
NOT_REACHED();
}
这里就看flow put的情况,用户空间会通过genl把对应的动作下发给内核datapath,而且接收响应。