alloc_skb:分配一个数据长度为size的network buffer {skb+data_buffer}
1 /** 2 * __alloc_skb - allocate a network buffer 3 * @size: size to allocate 4 * @gfp_mask: allocation mask 5 * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache 6 * instead of head cache and allocate a cloned (child) skb. 7 * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for 8 * allocations in case the data is required for writeback 9 * @node: numa node to allocate memory on 10 * 11 * Allocate a new &sk_buff. The returned buffer has no headroom and a 12 * tail room of at least size bytes. The object has a reference count 13 * of one. The return is the buffer. On a failure the return is %NULL. 14 * 15 * Buffers may only be allocated from interrupts using a @gfp_mask of 16 * %GFP_ATOMIC. 17 */ 18 /*1.SKB 的分配时机主要有两种,最常见的一种是在网卡的中断中,有数据包到达的时,系统分配 SKB 包进行包处理; 19 第二种情况是主动分配 SKB 包用于各种调试或者其他处理环境. 20 21 2.SKB 的 reserve 操作:SKB 在分配的过程中使用了一个小技巧 : 22 即在数据区中预留了 128 个字节大小的空间作为协议头使用, 23 通过移动 SKB 的 data 与 tail 指针的位置来实现这个功能. 24 3.当数据到达网卡后,会触发网卡的中断,从而进入 ISR 中,系统会在 ISR 中计算出此次接收到的数据的字节数 : pkt_len, 25 然后调用 SKB 分配函数来分配 SKB : 26 skb = dev_alloc_skb(pkt_len+); 27 实际上传入的数据区的长度还要比实际接收到的字节数多,这实际上是一种保护机制. 28 实际上,在 dev_alloc_skb 函数调用 __dev_alloc_skb 函数,而 __dev_alloc_skb 函数又调用 alloc_skb 函数 时, 29 其数据区的大小又增加了 128 字节, 这 128 字节就事前面我们所说的 reserve 机制预留的 header 空间 30 */ 31 struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, 32 int flags, int node) 33 { 34 struct kmem_cache *cache; 35 struct skb_shared_info *shinfo; 36 struct sk_buff *skb; 37 u8 *data; 38 bool pfmemalloc; 39 //获取指定的高速缓存 fclone_skb or skb 40 cache = (flags & SKB_ALLOC_FCLONE) 41 ? skbuff_fclone_cache : skbuff_head_cache; 42 43 if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX)) 44 gfp_mask |= __GFP_MEMALLOC; 45 46 /* Get the HEAD 从cache上分配, 如果cache上无法分配,则从内存中申请 */ 47 skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); 48 if (!skb) 49 goto out; 50 prefetchw(skb); //用于写预取 手工执行预抓取 ----提升性能 51 52 /* We do our best to align skb_shared_info on a separate cache 53 * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives 54 * aligned memory blocks, unless SLUB/SLAB debug is enabled. 55 * Both skb->head and skb_shared_info are cache line aligned. 56 */ 57 size = SKB_DATA_ALIGN(size);/* 数据对齐 */ 58 /* 对齐后的数据加上skb_shared_info对齐后的大小 */ 59 size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 60 //分配数据区 使用kmalloc ?????? 61 data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc);--- 62 if (!data) 63 goto nodata; 64 /* kmalloc(size) might give us more room than requested. 65 * Put skb_shared_info exactly at the end of allocated zone, 66 * to allow max possible filling before reallocation. 67 */ 68 /* 除了skb_shared_info以外的数据大小 */ 69 size = SKB_WITH_OVERHEAD(ksize(data)); 70 prefetchw(data + size);// 手工执行预抓取 71 72 /* 73 * Only clear those fields we need to clear, not those that we will 74 * actually initialise below. Hence, don't put any more fields after 75 * the tail pointer in struct sk_buff! 76 */ 77 memset(skb, 0, offsetof(struct sk_buff, tail)); 78 /* Account for allocated memory : skb + skb->head */ 79 /* 总长度= skb大小+ 数据大小+ skb_shared_info大小 */ 80 skb->truesize = SKB_TRUESIZE(size); 81 skb->pfmemalloc = pfmemalloc; 82 atomic_set(&skb->users, 1);/* 设置引用计数为1 */ 83 skb->head = data;/*head data tail均指向数据区头部*/ 84 skb->data = data; 85 skb_reset_tail_pointer(skb); 86 //end tail+size 指向尾部 87 skb->end = skb->tail + size; 88 // l2 l3 l4 head 初始化 为啥不是0 89 skb->mac_header = (typeof(skb->mac_header))~0U; 90 skb->transport_header = (typeof(skb->transport_header))~0U; 91 92 /* make sure we initialize shinfo sequentially */ 93 //之前 手工执行预抓取 现在使用 -------从end开始的区域为skb_shared_info 94 shinfo = skb_shinfo(skb);// skb->end 也就是 linear data的end ----> 数据的开始 95 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); 96 atomic_set(&shinfo->dataref, 1); 97 kmemcheck_annotate_variable(shinfo->destructor_arg); 98 99 /*skbuff_fclone_cache和skbuff_head_cache。它们两个的区别是前者是每两个skb为一组。 100 当从skbuff_fclone_cache分配skb时,会两个连续的skb一起分配,但是释放的时候可以分别释放。 101 也就是说当调用者知道需要两个skb时,如后面的操作很可能使用skb_clone时, 102 那么从skbuff_fclone_cache上分配skb会更高效一些。*/ 103 104 if (flags & SKB_ALLOC_FCLONE) {//如果有克隆标记 105 struct sk_buff_fclones *fclones;/* 如果是fclone cache的话,那么skb的下一个buf,也被分配le 106 之前使用的是flcone_cache 分配*/ 107 108 fclones = container_of(skb, struct sk_buff_fclones, skb1); 109 110 kmemcheck_annotate_bitfield(&fclones->skb2, flags1); 111 skb->fclone = SKB_FCLONE_ORIG; //orig 112 atomic_set(&fclones->fclone_ref, 1);// 113 114 fclones->skb2.fclone = SKB_FCLONE_CLONE; 115 fclones->skb2.pfmemalloc = pfmemalloc; 116 } 117 out: 118 return skb; 119 nodata: 120 kmem_cache_free(cache, skb); 121 skb = NULL; 122 goto out; 123 }
dev_alloc_skb:分配skb,通常被设备驱动用在中断上下文中,它是alloc_skb的封装函数,因为在中断处理函数中被调用,因此要求原子操作(GFP_ATOMIC)----不允许休眠;
GFP_ATOMIC:防止alloc memory 时 出现休眠导致 在中断里面出现 调度
static inline struct sk_buff *dev_alloc_skb(unsigned int length) { return netdev_alloc_skb(NULL, length); }
1 /** 2 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device 3 * @dev: network device to receive on 4 * @len: length to allocate 5 * @gfp_mask: get_free_pages mask, passed to alloc_skb 6 * 7 * Allocate a new &sk_buff and assign it a usage count of one. The 8 * buffer has NET_SKB_PAD headroom built in. Users should allocate 9 * the headroom they think they need without accounting for the 10 * built in space. The built in space is used for optimisations. 11 * 12 * %NULL is returned if there is no free memory. 13 */ 14 struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, 15 gfp_t gfp_mask) 16 { 17 struct page_frag_cache *nc; 18 unsigned long flags; 19 struct sk_buff *skb; 20 bool pfmemalloc; 21 void *data; 22 23 /* 分配长度+ skb_shared_info长度 然后对整个长度进行对齐*/ 24 len += NET_SKB_PAD; 25 len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 26 len = SKB_DATA_ALIGN(len); 27 28 if (sk_memalloc_socks()) 29 gfp_mask |= __GFP_MEMALLOC; 30 31 local_irq_save(flags);//为啥要 关闭中断?? 32 33 nc = this_cpu_ptr(&netdev_alloc_cache); 34 data = __alloc_page_frag(nc, len, gfp_mask); /* 分配空间 */ 35 pfmemalloc = nc->pfmemalloc; 36 37 local_irq_restore(flags);/* 开启中断 并restore flag*/ 38 39 if (unlikely(!data)) 40 return NULL; 41 42 skb = __build_skb(data, len);/* 构建skb */ 43 if (unlikely(!skb)) { 44 skb_free_frag(data); 45 return NULL; 46 } 47 48 /* use OR instead of assignment to avoid clearing of bits in mask */ 49 if (pfmemalloc) 50 skb->pfmemalloc = 1; 51 skb->head_frag = 1; 52 53 skb_success: 54 skb_reserve(skb, NET_SKB_PAD); /* 保留空间 */ 55 skb->dev = dev;/* 设置输入设备 */ 56 57 skb_fail: 58 return skb; 59 }
1 /** 2 * __build_skb - build a network buffer 3 * @data: data buffer provided by caller 4 * @frag_size: size of data, or 0 if head was kmalloced 5 * 6 * Allocate a new &sk_buff. Caller provides space holding head and 7 * skb_shared_info. @data must have been allocated by kmalloc() only if 8 * @frag_size is 0, otherwise data should come from the page allocator 9 * or vmalloc() 10 * The return is the new skb buffer. 11 * On a failure the return is %NULL, and @data is not freed. 12 * Notes : 13 * Before IO, driver allocates only data buffer where NIC put incoming frame 14 * Driver should add room at head (NET_SKB_PAD) and 15 * MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info)) 16 * After IO, driver calls build_skb(), to allocate sk_buff and populate it 17 * before giving packet to stack. 18 * RX rings only contains data buffers, not full skbs. 19 */ 20 struct sk_buff *__build_skb(void *data, unsigned int frag_size) 21 { 22 struct skb_shared_info *shinfo; 23 struct sk_buff *skb; 24 unsigned int size = frag_size ? : ksize(data); 25 26 skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC); 27 if (!skb) 28 return NULL; 29 30 size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 31 32 memset(skb, 0, offsetof(struct sk_buff, tail)); 33 skb->truesize = SKB_TRUESIZE(size); 34 atomic_set(&skb->users, 1); 35 skb->head = data; 36 skb->data = data; 37 skb_reset_tail_pointer(skb);skb->tail = skb->data; 38 skb->end = skb->tail + size; 39 skb->mac_header = (typeof(skb->mac_header))~0U; 40 skb->transport_header = (typeof(skb->transport_header))~0U; 41 42 /* make sure we initialize shinfo sequentially */ 43 shinfo = skb_shinfo(skb); 44 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); 45 atomic_set(&shinfo->dataref, 1); 46 kmemcheck_annotate_variable(shinfo->destructor_arg); 47 48 return skb; 49 }
napi_alloc_skb:分配skb,和dev_allock_skb 差不多:
- __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
- __netdev_alloc_skb - allocate an skbuff for rx on a specific device
和__netdev_alloc_skb 相比;__napi_alloc_skb 实现差不多 就多了一部分代码:
分配长度+ skb_shared_info长度> 一页 且有__GFP_DIRECT_RECLAIM | GFP_DMA 标记------>则调用 alloc_skb分配
struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, gfp_t gfp_mask) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); struct sk_buff *skb; void *data; len += NET_SKB_PAD + NET_IP_ALIGN; if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); if (!skb) goto skb_fail; goto skb_success; } ------------------- }
当然 分配内存 最后的底层实现就不看了:有机会再看吧;应该是kmalloc kmem_cache slab get_page order_page啥的吧
kfree_skb:减少skb引用,为0则释放;Drop a reference to the buffer and free it if the usage count has hit zero.
/** * kfree_skb - free an sk_buff * @skb: buffer to free * * Drop a reference to the buffer and free it if the usage count has * hit zero. */ void kfree_skb(struct sk_buff *skb) { if (unlikely(!skb)) return; /* 引用为1,可直接释放 */ if (likely(atomic_read(&skb->users) == 1)) smp_rmb(); // 对引用减1,并且判断,如果结果不为0 说明还有对象持有 返回 else if (likely(!atomic_dec_and_test(&skb->users))) return; trace_kfree_skb(skb, __builtin_return_address(0)); __kfree_skb(skb); //真正的skb释放 } /** * __kfree_skb - private function * @skb: buffer * * Free an sk_buff. Release anything attached to the buffer. * Clean the state. This is an internal helper function. Users should * always call kfree_skb */ void __kfree_skb(struct sk_buff *skb) { skb_release_all(skb);/* 释放skb附带的所有数据 */ kfree_skbmem(skb);/* 释放skb */ }
consume_skb:释放skb,与kfree_skb区别是,kfree_skb用于失败时丢包释放;
也就是:consume_skb 表示 skb是正常释放。kfree_skb 表示因为某种错误报文被丢弃
#define dev_kfree_skb(a) consume_skb(a)
1 /** 2 * consume_skb - free an skbuff 3 * @skb: buffer to free 4 * 5 * Drop a ref to the buffer and free it if the usage count has hit zero 6 * Functions identically to kfree_skb, but kfree_skb assumes that the frame 7 * is being dropped after a failure and notes that 8 */ 9 void consume_skb(struct sk_buff *skb) 10 { 11 if (unlikely(!skb)) 12 return; 13 if (likely(atomic_read(&skb->users) == 1)) 14 smp_rmb(); 15 else if (likely(!atomic_dec_and_test(&skb->users))) 16 return; 17 trace_consume_skb(skb); 18 __kfree_skb(skb); 19 }
所以 consume_skb 和kfree_skb 基本相同;除了统计分析的函数不一样