1.5.1
当邻居项不处于NUD_CONNECTD状态时,不允许快速路径发送报文,函数neigh_resolve_output 用于慢而安全的输出,通常用初始化neigh_ops结构
来实例output函数,当邻居从NUD_CONNECT转到非NUD_CONNECT的时候,使用neigh_suspect 将output设置为neigh_resolve_output ()
/* Neighbour state is suspicious;
disable fast path.
Called with write_locked neigh.
*/
static void neigh_suspect(struct neighbour *neigh)
{
NEIGH_PRINTK2("neigh %p is suspected. ", neigh);
neigh->output = neigh->ops->output;
}
/* Neighbour state is OK;
enable fast path.
Called with write_locked neigh.
*/
static void neigh_connect(struct neighbour *neigh)
{
NEIGH_PRINTK2("neigh %p is connected. ", neigh);
neigh->output = neigh->ops->connected_output;
}
static const struct neigh_ops arp_generic_ops = {
.family = AF_INET,
.solicit = arp_solicit,
.error_report = arp_error_report,
.output = neigh_resolve_output,
.connected_output = neigh_connected_output,
};
neigh_resolve_output:大概含义为:邻居项的输出设备支持hard_header_cache 同时二层首部没有建立
则为改路由缓存建立硬件首部缓存,然后再输出报文中添加改二层硬件首部.;否则直接在报文首部添加硬件首部
/* Slow and careful. */ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); int rc = 0; if (!dst) goto discard; /* 检测邻居项状态有效性 */ if (!neigh_event_send(neigh, skb)) { int err; struct net_device *dev = neigh->dev; unsigned int seq; ///* 有二层头缓存函数,则缓存之 */ if (dev->header_ops->cache && !neigh->hh.hh_len) neigh_hh_init(neigh, dst); do {/* 填充二层头 */ __skb_pull(skb, skb_network_offset(skb)); seq = read_seqbegin(&neigh->ha_lock); err = dev_hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len); } while (read_seqretry(&neigh->ha_lock, seq)); if (err >= 0)//如果添加首部成功调用xmit 输出到网络设备 rc = dev_queue_xmit(skb);/* 数据包发送 */ else goto out_kfree_skb; } out: return rc; discard: NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p ", dst, neigh); out_kfree_skb: rc = -EINVAL; kfree_skb(skb); goto out; }
static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { unsigned long now = jiffies; if (neigh->used != now) neigh->used = now; if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) return __neigh_event_send(neigh, skb); return 0; } int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { int rc; bool immediate_probe = false; write_lock_bh(&neigh->lock); rc = 0; if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) goto out_unlock_bh; /* 去掉NUD_CONNECT NUD_DELAY NUD_PROBE 状态 那么就只剩下 NUD_STALE NUD_INCOMPLETE NUD_NONE NUD_FAILD */ if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { //NUD_NONE状态 if (neigh->parms->mcast_probes + neigh->parms->app_probes) { //如果允许发送广播请求或者应用程序发送请求解析neigh地址 unsigned long next, now = jiffies; atomic_set(&neigh->probes, neigh->parms->ucast_probes); neigh->nud_state = NUD_INCOMPLETE; neigh->updated = now; next = now + max(neigh->parms->retrans_time, HZ/2); neigh_add_timer(neigh, next); //启动定时器 immediate_probe = true; //发送arp 请求(ipv4) 请求邻居表项 } else { neigh->nud_state = NUD_FAILED;//邻居无效 不能输出 neigh->updated = jiffies; write_unlock_bh(&neigh->lock); kfree_skb(skb); return 1; } } else if (neigh->nud_state & NUD_STALE) { NEIGH_PRINTK2("neigh %p is delayed. ", neigh); neigh->nud_state = NUD_DELAY;//转变为delay 状态 neigh->updated = jiffies; neigh_add_timer(neigh, jiffies + neigh->parms->delay_probe_time); } if (neigh->nud_state == NUD_INCOMPLETE) {//说明之前有报文发送 if (skb) { while (neigh->arp_queue_len_bytes + skb->truesize > neigh->parms->queue_len_bytes) {//如果请求报文已经满了,但还没有收到应答。 struct sk_buff *buff;//如果缓存队列还没有达到上限,则将报文加入到输出缓存队列中 //否者 丢弃队列中最早加入的报文然后加入队列 //但是返回值都是1 即 不能立即发送 buff = __skb_dequeue(&neigh->arp_queue); if (!buff) break; neigh->arp_queue_len_bytes -= buff->truesize; kfree_skb(buff); NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); } skb_dst_force(skb); __skb_queue_tail(&neigh->arp_queue, skb); neigh->arp_queue_len_bytes += skb->truesize; } rc = 1; } out_unlock_bh: if (immediate_probe) neigh_probe(neigh); 发出邻居项请求 solict报文 (arp请求等) else write_unlock(&neigh->lock); local_bh_enable(); return rc; }
neigh_hh_init :缓存二层头,以eth为例:就是缓存二层mac
/** * ether_setup - setup Ethernet network device * @dev: network device * * Fill in the fields of the device structure with Ethernet-generic values. */ void ether_setup(struct net_device *dev) { dev->header_ops = ð_header_ops; dev->type = ARPHRD_ETHER; dev->hard_header_len = ETH_HLEN; dev->mtu = ETH_DATA_LEN; dev->addr_len = ETH_ALEN; dev->tx_queue_len = 1000; /* Ethernet wants good queues */ dev->flags = IFF_BROADCAST|IFF_MULTICAST; dev->priv_flags |= IFF_TX_SKB_SHARING; memset(dev->broadcast, 0xFF, ETH_ALEN); } const struct header_ops eth_header_ops ____cacheline_aligned = { .create = eth_header, .parse = eth_header_parse, .rebuild = eth_rebuild_header, .cache = eth_header_cache, .cache_update = eth_header_cache_update, }; /** * eth_header_cache - fill cache entry from neighbour * @neigh: source neighbour * @hh: destination cache entry * @type: Ethernet type field * * Create an Ethernet header template from the neighbour. */ int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type) { struct ethhdr *eth; const struct net_device *dev = neigh->dev; eth = (struct ethhdr *) (((u8 *) hh->hh_data) + (HH_DATA_OFF(sizeof(*eth)))); if (type == htons(ETH_P_802_3)) return -1; eth->h_proto = type; memcpy(eth->h_source, dev->dev_addr, ETH_ALEN); memcpy(eth->h_dest, neigh->ha, ETH_ALEN); hh->hh_len = ETH_HLEN; return 0; } /* called with read_lock_bh(&n->lock); */ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst) { struct net_device *dev = dst->dev; __be16 prot = dst->ops->protocol; struct hh_cache *hh = &n->hh; write_lock_bh(&n->lock); /* Only one thread can come in here and initialize the * hh_cache entry. */ if (!hh->hh_len) dev->header_ops->cache(n, hh, prot); write_unlock_bh(&n->lock); } //根据代码可以看出 直接拷贝二层头
快速发送:
//ip_finish_output2 中会调用dst_neigh_output 输出报文 static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, struct sk_buff *skb) { struct hh_cache *hh; if (unlikely(dst->pending_confirm)) { n->confirmed = jiffies; dst->pending_confirm = 0; } hh = &n->hh; if ((n->nud_state & NUD_CONNECTED) && hh->hh_len) return neigh_hh_output(hh, skb);//快速发出 else return n->output(n, skb);// 慢速发出neigh_resolve_output }
/* 拷贝缓存的二层头部,输出 */ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb) { unsigned int seq; unsigned int hh_len; /* 拷贝二层头到skb */ do { seq = read_seqbegin(&hh->hh_lock); hh_len = hh->hh_len; /* 二层头部<DATA_MOD,直接使用该长度拷贝 */ if (likely(hh_len <= HH_DATA_MOD)) { /* this is inlined by gcc */ memcpy(skb->data - HH_DATA_MOD, hh->hh_data, HH_DATA_MOD); } /* >=DATA_MOD,对齐头部,拷贝 */ else { unsigned int hh_alen = HH_DATA_ALIGN(hh_len); memcpy(skb->data - hh_alen, hh->hh_data, hh_alen); } } while (read_seqretry(&hh->hh_lock, seq)); skb_push(skb, hh_len); /* 发送 */ return dev_queue_xmit(skb); }
neigh_hh_output-缓存输出,直接拷贝二层头部,然后输出;
neigh_connected_output-快速输出,用于连接状态的输出;需要重新构建二层头部,然后输出;
neigh_resolve_output-慢速输出,用于非连接状态的输出;需要对邻居项状态进行检查,然后重新构造二层头部,最后输出;
neigh_direct_output-直接输出,用于没有二层头部时的输出;
/* CONNECTED状态的发送函数,没有neigh_hh_output快,这个需要重建二层头 */ int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb) { struct net_device *dev = neigh->dev; unsigned int seq; int err; /* 拷贝二层头 */ do { __skb_pull(skb, skb_network_offset(skb)); seq = read_seqbegin(&neigh->ha_lock); err = dev_hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len); } while (read_seqretry(&neigh->ha_lock, seq)); /* 发送数据包 */ if (err >= 0) err = dev_queue_xmit(skb); else { err = -EINVAL; kfree_skb(skb); } return err; }