概述
ip_output-设置输出设备和协议,然后经过POST_ROUTING钩子点,最后调用ip_finish_output;
ip_finish_output-对skb进行分片判断,需要分片,则分片后输出,不需要分片则知直接输出;
ip_finish_output2-对skb的头部空间进行检查,看是否能够容纳下二层头部,若空间不足,则需要重新申请skb;然后,获取邻居子系统,并通过邻居子系统输出;
源码分析
1 int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb) 2 { 3 struct net_device *dev = skb_dst(skb)->dev; 4 5 IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len); 6 7 /* 设置输出设备和协议 */ 8 skb->dev = dev; 9 skb->protocol = htons(ETH_P_IP); 10 11 /* 经过NF的POST_ROUTING钩子点 */ 12 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, 13 net, sk, skb, NULL, dev, 14 ip_finish_output, 15 !(IPCB(skb)->flags & IPSKB_REROUTED)); 16 }
1 static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 2 { 3 unsigned int mtu; 4 int ret; 5 6 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); 7 if (ret) { 8 kfree_skb(skb); 9 return ret; 10 } 11 12 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 13 /* Policy lookup after SNAT yielded a new policy */ 14 if (skb_dst(skb)->xfrm) { 15 IPCB(skb)->flags |= IPSKB_REROUTED; 16 return dst_output(net, sk, skb); 17 } 18 #endif 19 /* 获取mtu */ 20 mtu = ip_skb_dst_mtu(sk, skb); 21 22 /* 是gso,则调用gso输出 */ 23 if (skb_is_gso(skb)) 24 return ip_finish_output_gso(net, sk, skb, mtu); 25 26 /* 长度>mtu或者设置了IPSKB_FRAG_PMTU标记,则分片 */ 27 if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU)) 28 return ip_fragment(net, sk, skb, mtu, ip_finish_output2); 29 30 /* 输出数据包 */ 31 return ip_finish_output2(net, sk, skb); 32 }
1 static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) 2 { 3 struct dst_entry *dst = skb_dst(skb); 4 struct rtable *rt = (struct rtable *)dst; 5 struct net_device *dev = dst->dev; 6 unsigned int hh_len = LL_RESERVED_SPACE(dev); 7 struct neighbour *neigh; 8 u32 nexthop; 9 10 if (rt->rt_type == RTN_MULTICAST) { 11 IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len); 12 } else if (rt->rt_type == RTN_BROADCAST) 13 IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len); 14 15 /* Be paranoid, rather than too clever. */ 16 /* skb头部空间不能存储链路头 */ 17 if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { 18 struct sk_buff *skb2; 19 20 /* 重新分配skb */ 21 skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev)); 22 if (!skb2) { 23 kfree_skb(skb); 24 return -ENOMEM; 25 } 26 /* 关联控制块 */ 27 if (skb->sk) 28 skb_set_owner_w(skb2, skb->sk); 29 30 /* 释放skb */ 31 consume_skb(skb); 32 33 /* 指向新的skb */ 34 skb = skb2; 35 } 36 37 if (lwtunnel_xmit_redirect(dst->lwtstate)) { 38 int res = lwtunnel_xmit(skb); 39 40 if (res < 0 || res == LWTUNNEL_XMIT_DONE) 41 return res; 42 } 43 44 rcu_read_lock_bh(); 45 /* 获取下一跳 */ 46 nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr); 47 /* 获取邻居子系统 */ 48 neigh = __ipv4_neigh_lookup_noref(dev, nexthop); 49 50 /* 创建邻居子系统 */ 51 if (unlikely(!neigh)) 52 neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); 53 54 /* 成功 */ 55 if (!IS_ERR(neigh)) { 56 int res; 57 58 /* 更新路由缓存确认 */ 59 sock_confirm_neigh(skb, neigh); 60 61 /* 通过邻居子系统输出 */ 62 res = neigh_output(neigh, skb); 63 64 rcu_read_unlock_bh(); 65 return res; 66 } 67 rcu_read_unlock_bh(); 68 69 net_dbg_ratelimited("%s: No header cache and no neighbour! ", 70 __func__); 71 /* 释放skb */ 72 kfree_skb(skb); 73 return -EINVAL; 74 }