sk_buff

  1 /*
  2  * Routines having to do with the 'struct sk_buff' memory handlers.
  3  *
  4  * Authors: Alan Cox <iiitac@pyr.swan.ac.uk>
  5  * Florian La Roche <rzsfl@rz.uni-sb.de>
  6  *
  7  * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $
  8  *
  9  * Fixes:
 10  * Alan Cox : Fixed the worst of the load
 11  * balancer bugs.
 12  * Dave Platt : Interrupt stacking fix.
 13  * Richard Kooijman : Timestamp fixes.
 14  * Alan Cox : Changed buffer format.
 15  * Alan Cox : destructor hook for AF_UNIX etc.
 16  * Linus Torvalds : Better skb_clone.
 17  * Alan Cox : Added skb_copy.
 18  * Alan Cox : Added all the changed routines Linus
 19  * only put in the headers
 20  * Ray VanTassle : Fixed --skb->lock in free
 21  * Alan Cox : skb_copy copy arp field
 22  * Andi Kleen : slabified it.
 23  * Robert Olsson : Removed skb_head_pool
 24  *
 25  * NOTE:
 26  * The __skb_ routines should be called with interrupts
 27  * disabled, or you better be *real* sure that the operation is atomic
 28  * with respect to whatever list is being frobbed (e.g. via lock_sock()
 29  * or via disabling bottom half handlers, etc).
 30  *
 31  * This program is free software; you can redistribute it and/or
 32  * modify it under the terms of the GNU General Public License
 33  * as published by the Free Software Foundation; either version
 34  * 2 of the License, or (at your option) any later version.
 35  */
 36 
 37 /*
 38  * The functions in this file will not compile correctly with gcc 2.4.x
 39  */
 40 
 41 #include <linux/config.h>
 42 #include <linux/module.h>
 43 #include <linux/types.h>
 44 #include <linux/kernel.h>
 45 #include <linux/sched.h>
 46 #include <linux/mm.h>
 47 #include <linux/interrupt.h>
 48 #include <linux/in.h>
 49 #include <linux/inet.h>
 50 #include <linux/slab.h>
 51 #include <linux/netdevice.h>
 52 #ifdef CONFIG_NET_CLS_ACT
 53 #include <net/pkt_sched.h>
 54 #endif
 55 #include <linux/string.h>
 56 #include <linux/skbuff.h>
 57 #include <linux/cache.h>
 58 #include <linux/rtnetlink.h>
 59 #include <linux/init.h>
 60 #include <linux/highmem.h>
 61 
 62 #include <net/protocol.h>
 63 #include <net/dst.h>
 64 #include <net/sock.h>
 65 #include <net/checksum.h>
 66 #include <net/xfrm.h>
 67 
 68 #include <asm/uaccess.h>
 69 #include <asm/system.h>
 70 
 71 static kmem_cache_t *skbuff_head_cache;
 72 
 73 /*
 74  * Keep out-of-line to prevent kernel bloat.
 75  * __builtin_return_address is not used because it is not always
 76  * reliable.
 77  */
 78 
 79 /**
 80  * skb_over_panic - private function
 81  * @skb: buffer
 82  * @sz: size
 83  * @here: address
 84  *
 85  * Out of line support code for skb_put(). Not user callable.
 86  */
 87 void skb_over_panic(struct sk_buff *skb, int sz, void *here)
 88 {
 89     printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
 90      "data:%p tail:%p end:%p dev:%s/n",
 91      here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end,
 92      skb->dev ? skb->dev->name : "<NULL>");
 93     BUG();
 94 }
 95 
 96 /**
 97  * skb_under_panic - private function
 98  * @skb: buffer
 99  * @sz: size
100  * @here: address
101  *
102  * Out of line support code for skb_push(). Not user callable.
103  */
104 
105 void skb_under_panic(struct sk_buff *skb, int sz, void *here)
106 {
107     printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
108      "data:%p tail:%p end:%p dev:%s/n",
109      here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end,
110      skb->dev ? skb->dev->name : "<NULL>");
111     BUG();
112 }
113 
114 /* Allocate a new skbuff. We do this ourselves so we can fill in a few
115  * 'private' fields and also do memory statistics to find all the
116  * [BEEP] leaks.
117  *
118  */
119 
120 /**
121  * alloc_skb - allocate a network buffer
122  * @size: size to allocate
123  * @gfp_mask: allocation mask
124  *
125  * Allocate a new &sk_buff. The returned buffer has no headroom and a
126  * tail room of size bytes. The object has a reference count of one.
127  * The return is the buffer. On a failure the return is %NULL.
128  *
129  * Buffers may only be allocated from interrupts using a @gfp_mask of
130  * %GFP_ATOMIC.
131  */
132 struct sk_buff *alloc_skb(unsigned int size, int gfp_mask)
133 {
134     struct sk_buff *skb;
135     u8 *data;
136 
137     /* Get the HEAD */
138     /* 从cache缓冲池中获取内存 */
139     skb = kmem_cache_alloc(skbuff_head_cache,
140              gfp_mask & ~__GFP_DMA);
141     if (!skb)
142         goto out;
143 
144     /* Get the DATA. Size must match skb_add_mtu(). */
145 
146     /* 对其size */
147     size = SKB_DATA_ALIGN(size);
148 
149     /* 分配的缓冲长度包含skb_shared_info的长度 */
150     data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
151     if (!data)
152         goto nodata;
153     
154     /* 
155      * offsetof是一个编译器宏或者是自定义的宏，用于计算member在struct中的偏移量。
156      * 把在truesize前面的field全部清零。
157      */
158     memset(skb, 0, offsetof(struct sk_buff, truesize));
159     
160     /* truesize是广义SKB的大小，包含了4个部分的长度：skb自身，header，page frags，frag list */
161     skb->truesize = size + sizeof(struct sk_buff);
162     
163     /* users初始化成1 */
164     atomic_set(&skb->users, 1);
165 
166     /* 初始化所有数据指针 */
167     skb->head = data;
168     skb->data = data;
169     skb->tail = data;
170     skb->end = data + size;
171     
172     /* 
173      * skb_shinfo是个宏，#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end))
174      * 所以用这个宏的时候必须等skb->end已经初始化。
175      * skb_shinfo 接在skb->end指向的内存空间后面。
176       */
177 
178     /* 初始化skb_shared_info结构体 */
179     atomic_set(&(skb_shinfo(skb)->dataref), 1);
180     skb_shinfo(skb)->nr_frags = 0;
181     skb_shinfo(skb)->tso_size = 0;
182     skb_shinfo(skb)->tso_segs = 0;
183     skb_shinfo(skb)->frag_list = NULL;
184 out:
185     return skb;
186 nodata:
187     kmem_cache_free(skbuff_head_cache, skb);
188     skb = NULL;
189     goto out;
190 }
191 
192 /**
193  * alloc_skb_from_cache - allocate a network buffer
194  * @cp: kmem_cache from which to allocate the data area
195  * (object size must be big enough for @size bytes + skb overheads)
196  * @size: size to allocate
197  * @gfp_mask: allocation mask
198  *
199  * Allocate a new &sk_buff. The returned buffer has no headroom and
200  * tail room of size bytes. The object has a reference count of one.
201  * The return is the buffer. On a failure the return is %NULL.
202  *
203  * Buffers may only be allocated from interrupts using a @gfp_mask of
204  * %GFP_ATOMIC.
205  */
206 struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
207                  unsigned int size, int gfp_mask)
208 {
209     struct sk_buff *skb;
210     u8 *data;
211 
212     /* Get the HEAD */
213     skb = kmem_cache_alloc(skbuff_head_cache,
214              gfp_mask & ~__GFP_DMA);
215     if (!skb)
216         goto out;
217 
218     /* Get the DATA. */
219     size = SKB_DATA_ALIGN(size);
220     
221     /* 这个函数和上面函数不同的地方就在下面这句，不用kmalloc，而用kmem_cache_alloc。 */
222     data = kmem_cache_alloc(cp, gfp_mask);
223     if (!data)
224         goto nodata;
225 
226     memset(skb, 0, offsetof(struct sk_buff, truesize));
227     skb->truesize = size + sizeof(struct sk_buff);
228     atomic_set(&skb->users, 1);
229     skb->head = data;
230     skb->data = data;
231     skb->tail = data;
232     skb->end = data + size;
233 
234     atomic_set(&(skb_shinfo(skb)->dataref), 1);
235     skb_shinfo(skb)->nr_frags = 0;
236     skb_shinfo(skb)->tso_size = 0;
237     skb_shinfo(skb)->tso_segs = 0;
238     skb_shinfo(skb)->frag_list = NULL;
239 out:
240     return skb;
241 nodata:
242     kmem_cache_free(skbuff_head_cache, skb);
243     skb = NULL;
244     goto out;
245 }
246 
247 /* 这个函数是用来释放当前skb的frag_list区的 */
248 static void skb_drop_fraglist(struct sk_buff *skb)
249 {
250     struct sk_buff *list = skb_shinfo(skb)->frag_list;
251 
252     skb_shinfo(skb)->frag_list = NULL;
253     
254     /* 循环前进，直到没有为止。 */
255     do {
256         struct sk_buff *this = list;
257         list = list->next;
258         kfree_skb(this);
259     } while (list);
260 }
261 
262 static void skb_clone_fraglist(struct sk_buff *skb)
263 {
264     struct sk_buff *list;
265     /* 对当前skb的frag_list区链上的每个skb增加引用计数。 */
266     for (list = skb_shinfo(skb)->frag_list; list; list = list->next)
267         skb_get(list);
268 }
269 
270 void skb_release_data(struct sk_buff *skb)
271 {
272     /* 查看skb是否被clone？skb_shinfo的dataref是否为0？
273      * 如果是，那么就释放skb非线性区域和线性区域。 */
274     if (!skb->cloned ||
275      !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
276              &skb_shinfo(skb)->dataref)) {
277         
278         /* 释放page frags区 */
279         if (skb_shinfo(skb)->nr_frags) {
280             int i;
281             for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
282                 put_page(skb_shinfo(skb)->frags[i].page);
283         }
284 
285         /* 释放frag_list区 */
286         if (skb_shinfo(skb)->frag_list)
287             skb_drop_fraglist(skb);
288 
289         /* 释放线性区域 */
290         kfree(skb->head);
291     }
292 }
293 
294 /*
295  * Free an skbuff by memory without cleaning the state.
296  */
297 
298 /* 把skb自身和线性，非线性区域全部释放 */
299 void kfree_skbmem(struct sk_buff *skb)
300 {
301     skb_release_data(skb);
302     kmem_cache_free(skbuff_head_cache, skb);
303 }
304 
305 /**
306  * __kfree_skb - private function
307  * @skb: buffer
308  *
309  * Free an sk_buff. Release anything attached to the buffer.
310  * Clean the state. This is an internal helper function. Users should
311  * always call kfree_skb
312  */
313 /* 这个函数应该也能算是一个wrapper函数 */
314 
315 void __kfree_skb(struct sk_buff *skb)
316 {
317     BUG_ON(skb->list != NULL);
318 
319     dst_release(skb->dst);
320 #ifdef CONFIG_XFRM
321     secpath_put(skb->sp);
322 #endif
323     if (skb->destructor) {
324         WARN_ON(in_irq());
325         skb->destructor(skb);
326     }
327 #ifdef CONFIG_NETFILTER
328     nf_conntrack_put(skb->nfct);
329 #ifdef CONFIG_BRIDGE_NETFILTER
330     nf_bridge_put(skb->nf_bridge);
331 #endif
332 #endif
333 /* XXX: IS this still necessary? - JHS */
334 #ifdef CONFIG_NET_SCHED
335     skb->tc_index = 0;
336 #ifdef CONFIG_NET_CLS_ACT
337     skb->tc_verd = 0;
338     skb->tc_classid = 0;
339 #endif
340 #endif
341 
342     kfree_skbmem(skb);
343 }
344 
345 /**
346  * skb_clone - duplicate an sk_buff
347  * @skb: buffer to clone
348  * @gfp_mask: allocation priority
349  *
350  * Duplicate an &sk_buff. The new one is not owned by a socket. Both
351  * copies share the same packet data but not structure. The new
352  * buffer has a reference count of 1. If the allocation fails the
353  * function returns %NULL otherwise the new buffer is returned.
354  *
355  * If this function is called from an interrupt gfp_mask() must be
356  * %GFP_ATOMIC.
357  */
358 
359 struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
360 {
361     /* 从cache池中分配一个skb */
362     struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
363 
364     if (!n) 
365         return NULL;
366     
367     /* 这个C(x) 就是clone的意思 */
368 #define C(x) n->x = skb->x
369 
370     n->next = n->prev = NULL;
371     n->list = NULL;
372     n->sk = NULL;
373     /* 把skb中各个成员都clone过去 */
374     C(stamp);
375     C(dev);
376     C(real_dev);
377     C(h);
378     C(nh);
379     C(mac);
380     C(dst);
381     dst_clone(skb->dst);
382     C(sp);
383 #ifdef CONFIG_INET
384     secpath_get(skb->sp);
385 #endif
386     memcpy(n->cb, skb->cb, sizeof(skb->cb));
387     C(len);
388     C(data_len);
389     C(csum);
390     C(local_df);
391     /* 新分配的skb是clone的 */
392     n->cloned = 1;
393     n->nohdr = 0;
394     C(pkt_type);
395     C(ip_summed);
396     C(priority);
397     C(protocol);
398     C(security);
399     n->destructor = NULL;
400 #ifdef CONFIG_NETFILTER
401     C(nfmark);
402     C(nfcache);
403     C(nfct);
404     nf_conntrack_get(skb->nfct);
405     C(nfctinfo);
406 #ifdef CONFIG_NETFILTER_DEBUG
407     C(nf_debug);
408 #endif
409 #ifdef CONFIG_BRIDGE_NETFILTER
410     C(nf_bridge);
411     nf_bridge_get(skb->nf_bridge);
412 #endif
413 #endif /*CONFIG_NETFILTER*/
414 #if defined(CONFIG_HIPPI)
415     C(private);
416 #endif
417 #ifdef CONFIG_NET_SCHED
418     C(tc_index);
419 #ifdef CONFIG_NET_CLS_ACT
420     n->tc_verd = SET_TC_VERD(skb->tc_verd,0);
421     n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd);
422     n->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
423     C(input_dev);
424     C(tc_classid);
425 #endif
426 
427 #endif
428     C(truesize);
429     /* 新skb的users初始化为1 */
430     atomic_set(&n->users, 1);
431     C(head);
432     C(data);
433     C(tail);
434     C(end);
435     
436     /* 增加被clone的skb的数据引用 */
437     atomic_inc(&(skb_shinfo(skb)->dataref));
438     /* 设置原skb也是被clone了 */
439     skb->cloned = 1;
440 
441     return n;
442 }
443 
444 
445 static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
446 {
447     /*
448      * Shift between the two data areas in bytes
449      */
450     /* 为了等一下要给网络各层的指针赋值，现在要先算出两个data的偏移量 */
451     unsigned long offset = new->data - old->data;
452 
453     new->list = NULL;
454     new->sk = NULL;
455     new->dev = old->dev;
456     new->real_dev = old->real_dev;
457     new->priority = old->priority;
458     new->protocol = old->protocol;
459     new->dst = dst_clone(old->dst);
460 #ifdef CONFIG_INET
461     new->sp = secpath_get(old->sp);
462 #endif
463     /* 用上面算出来的offset来算 */
464     new->h.raw = old->h.raw + offset;
465     new->nh.raw = old->nh.raw + offset;
466     new->mac.raw = old->mac.raw + offset;
467 
468     /* 拷贝control block */
469     memcpy(new->cb, old->cb, sizeof(old->cb));
470 
471     new->local_df = old->local_df;
472     new->pkt_type = old->pkt_type;
473     new->stamp = old->stamp;
474     new->destructor = NULL;
475     new->security = old->security;
476 #ifdef CONFIG_NETFILTER
477     new->nfmark = old->nfmark;
478     new->nfcache = old->nfcache;
479     new->nfct = old->nfct;
480     nf_conntrack_get(old->nfct);
481     new->nfctinfo = old->nfctinfo;
482 #ifdef CONFIG_NETFILTER_DEBUG
483     new->nf_debug = old->nf_debug;
484 #endif
485 #ifdef CONFIG_BRIDGE_NETFILTER
486     new->nf_bridge = old->nf_bridge;
487     nf_bridge_get(old->nf_bridge);
488 #endif
489 #endif
490 #ifdef CONFIG_NET_SCHED
491 #ifdef CONFIG_NET_CLS_ACT
492     new->tc_verd = old->tc_verd;
493 #endif
494     new->tc_index = old->tc_index;
495 #endif
496     /* 设置新的skb的users为1 */
497     atomic_set(&new->users, 1);
498 
499     /* 把skb_shinfo的东西也一起copy过去 */
500     skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size;
501     skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs;
502 }
503 
504 /**
505  * skb_copy - create private copy of an sk_buff
506  * @skb: buffer to copy
507  * @gfp_mask: allocation priority
508  *
509  * Make a copy of both an &sk_buff and its data. This is used when the
510  * caller wishes to modify the data and needs a private copy of the
511  * data to alter. Returns %NULL on failure or the pointer to the buffer
512  * on success. The returned buffer has a reference count of 1.
513  *
514  * As by-product this function converts non-linear &sk_buff to linear
515  * one, so that &sk_buff becomes completely private and caller is allowed
516  * to modify all the data of returned buffer. This means that this
517  * function is not recommended for use in circumstances when only
518  * header is going to be modified. Use pskb_copy() instead.
519  */
520 
521 struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
522 {
523     int headerlen = skb->data - skb->head;
524     /*
525      * Allocate the copy buffer
526      */
527 
528     /* 
529      * 分配内存包含线性数据区的长度和非线性数据区的长度 
530      * data_len是指非线性数据区的长度。
531      */
532     struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len,
533                  gfp_mask);
534     if (!n)
535         return NULL;
536 
537     /* Set the data pointer */
538     /* 预留头的长度 */
539     skb_reserve(n, headerlen);
540     /* Set the tail pointer and length */
541     /* len是指线性和非线性数据的总长，把tail往后推 */
542     skb_put(n, skb->len);
543     n->csum = skb->csum;
544     n->ip_summed = skb->ip_summed;
545     /* 因为 skb_copy_bits 函数中 offset是对有效负载的，即skb->data。
546      * 因此这里的offset为-headerlen。目的是从skb->data向前推headerlen。
547      * 从skb的head处拷贝到n的head处。这个函数把skb的线性和非线性部分全部拷贝到
548      * n的线性部分去了。
549      */
550     if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
551         BUG();
552     
553     /* 把skb的本身复制到n的本身 */
554     copy_skb_header(n, skb);
555     return n;
556 }
557 
558 
559 /**
560  * pskb_copy - create copy of an sk_buff with private head.
561  * @skb: buffer to copy
562  * @gfp_mask: allocation priority
563  *
564  * Make a copy of both an &sk_buff and part of its data, located
565  * in header. Fragmented data remain shared. This is used when
566  * the caller wishes to modify only header of &sk_buff and needs
567  * private copy of the header to alter. Returns %NULL on failure
568  * or the pointer to the buffer on success.
569  * The returned buffer has a reference count of 1.
570  */
571 
572 struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
573 {
574     /*
575      * Allocate the copy buffer
576      */
577     /* 分配一个新的skb_buff n,它的线性区长度是和原skb长度一样 */
578     struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask);
579 
580     if (!n)
581         goto out;
582 
583     /* Set the data pointer */
584     /* 预留head到data之间的空隙 */
585     skb_reserve(n, skb->data - skb->head);
586 
587     /* Set the tail pointer and length */
588     /* 准备向n放数据，试放数据长度是skb的header section的长度 */
589     skb_put(n, skb_headlen(skb));
590 
591     /* Copy the bytes */
592     /* 拷贝有效负载，长度是n->len。上面skb_put中已经把n->len赋值成skb_headlen(skb)
593      * 所以这里拷贝线性区域的长度。
594      */
595     memcpy(n->data, skb->data, n->len);
596 
597     /* 复制skb本身信息到n */
598     n->csum = skb->csum;
599     n->ip_summed = skb->ip_summed;
600 
601     n->data_len = skb->data_len;
602     n->len = skb->len;
603 
604     /* 把skb中page frags的指针复制到n的page frags。 */
605     if (skb_shinfo(skb)->nr_frags) {
606         int i;
607 
608         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
609             skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
610             get_page(skb_shinfo(n)->frags[i].page);
611         }
612         skb_shinfo(n)->nr_frags = i;
613     }
614 
615     /* 把skb中frag_list地址复制到n的frag_list */
616     if (skb_shinfo(skb)->frag_list) {
617         skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
618         skb_clone_fraglist(n);
619     }
620 
621     /* 把skb的本身复制到n的本身 */
622     copy_skb_header(n, skb);
623 out:
624     return n;
625 }
626 
627 /**
628  * pskb_expand_head - reallocate header of &sk_buff
629  * @skb: buffer to reallocate
630  * @nhead: room to add at head
631  * @ntail: room to add at tail
632  * @gfp_mask: allocation priority
633  *
634  * Expands (or creates identical copy, if &nhead and &ntail are zero)
635  * header of skb. &sk_buff itself is not changed. &sk_buff MUST have
636  * reference count of 1. Returns zero in the case of success or error,
637  * if expansion failed. In the last case, &sk_buff is not changed.
638  *
639  * All the pointers pointing into skb header may change and must be
640  * reloaded after call to this function.
641  */
642 /* 这个函数要注意的是原来的skb结构体并没有释放
643  * 释放的是header section数据区。
644  */
645 int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
646 {
647     int i;
648     u8 *data;
649     /* 算出原来线性区的长度，再加上现在要求的增加的headroom和tailroom。 */
650     int size = nhead + (skb->end - skb->head) + ntail;
651     long off;
652 
653     if (skb_shared(skb))
654         BUG();
655     
656     /* 对齐size的大小 */
657     size = SKB_DATA_ALIGN(size);
658 
659     /* 按照要求分配新的header section */
660     data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
661     if (!data)
662         goto nodata;
663 
664     /* Copy only real data... and, alas, header. This should be
665      * optimized for the cases when header is void. */
666     /* 拷贝payload到正确的位置上 */
667     memcpy(data + nhead, skb->head, skb->tail - skb->head);
668     memcpy(data + size, skb->end, sizeof(struct skb_shared_info));
669 
670     /* 下面复制page frags区域和fraglist区域的指针 */
671     for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
672         get_page(skb_shinfo(skb)->frags[i].page);
673 
674     if (skb_shinfo(skb)->frag_list)
675         skb_clone_fraglist(skb);
676 
677     /* 释放原来的数据区 */
678     skb_release_data(skb);
679     
680     /* 计算偏移量 */
681     off = (data + nhead) - skb->head;
682 
683     skb->head = data;
684     skb->end = data + size;
685     skb->data += off;
686     skb->tail += off;
687     skb->mac.raw += off;
688     skb->h.raw += off;
689     skb->nh.raw += off;
690     skb->cloned = 0;
691     skb->nohdr = 0;
692     atomic_set(&skb_shinfo(skb)->dataref, 1);
693     return 0;
694 
695 nodata:
696     return -ENOMEM;
697 }
698 
699 /* Make private copy of skb with writable head and some headroom */
700 
701 struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
702 {
703     struct sk_buff *skb2;
704     /* 计算现在要求的headroom 和原来headroom之间的差值 */
705     int delta = headroom - skb_headroom(skb);
706     
707     /* 如果现在要求的headroom没有原来的headroom大，那说明原来的header section可以用，
708      * 所以只要用pskb_copy复制一份skb结构体和它的线性区域就可以了。
709      */
710     if (delta <= 0)
711         skb2 = pskb_copy(skb, GFP_ATOMIC);
712     else {
713         /* 如果要求的headroom比原来的headroom大的话，clone一个skb */
714         skb2 = skb_clone(skb, GFP_ATOMIC);
715         /* 把新clone的skb用pskb_expand_head扩大headroom */
716         if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0,
717                      GFP_ATOMIC)) {
718             kfree_skb(skb2);
719             skb2 = NULL;
720         }
721     }
722     return skb2;
723 }
724 
725 
726 /**
727  * skb_copy_expand - copy and expand sk_buff
728  * @skb: buffer to copy
729  * @newheadroom: new free bytes at head
730  * @newtailroom: new free bytes at tail
731  * @gfp_mask: allocation priority
732  *
733  * Make a copy of both an &sk_buff and its data and while doing so
734  * allocate additional space.
735  *
736  * This is used when the caller wishes to modify the data and needs a
737  * private copy of the data to alter as well as more space for new fields.
738  * Returns %NULL on failure or the pointer to the buffer
739  * on success. The returned buffer has a reference count of 1.
740  *
741  * You must pass %GFP_ATOMIC as the allocation priority if this function
742  * is called from an interrupt.
743  *
744  * BUG ALERT: ip_summed is not copied. Why does this work? Is it used
745  * only by netfilter in the cases when checksum is recalculated? --ANK
746  */
747 struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
748                 int newheadroom, int newtailroom, int gfp_mask)
749 {
750     /*
751      * Allocate the copy buffer
752      */
753     /* 分配一个新的skb结构体，header section长度是原来的skb所有数据长度加上新的skb要求的headroom
754      * 和要求的tailroom。目的是把原来的SKB线性化。
755      */
756     struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom,
757                  gfp_mask);
758     int head_copy_len, head_copy_off;
759 
760     if (!n)
761         return NULL;
762 
763     /* 新的sk_buff n的headroom长度为newheadroom */
764     skb_reserve(n, newheadroom);
765 
766     /* Set the tail pointer and length */
767     /* 设置tail指针和n->len */
768     skb_put(n, skb->len);
769 
770     /* 设置head_copy_len 为老的skb的headroom */
771     head_copy_len = skb_headroom(skb);
772     head_copy_off = 0;
773     /* 如果新的headroom比老的headroom小，
774      *　拷贝长度就为新的headroom的长度。
775      */
776     if (newheadroom <= head_copy_len)
777         head_copy_len = newheadroom;
778     else
779         head_copy_off = newheadroom - head_copy_len;
780 
781     /* Copy the linear header and data. */
782     /* offset为原来skb->data-head_copy_len */
783     if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
784              skb->len + head_copy_len))
785         BUG();
786 
787     /* 拷贝skb结构体到n结构体 */
788     copy_skb_header(n, skb);
789 
790     return n;
791 }
792 
793 /**
794  * skb_pad - zero pad the tail of an skb
795  * @skb: buffer to pad
796  * @pad: space to pad
797  *
798  * Ensure that a buffer is followed by a padding area that is zero
799  * filled. Used by network drivers which may DMA or transfer data
800  * beyond the buffer end onto the wire.
801  *
802  * May return NULL in out of memory cases.
803  */
804  
805 struct sk_buff *skb_pad(struct sk_buff *skb, int pad)
806 {
807     struct sk_buff *nskb;
808     
809     /* If the skbuff is non linear tailroom is always zero.. */
810     /* 如果需要pad的长度比skb_tailroom小的话，
811      * 就直接从skb->data+skb->len,开始清零.
812      */
813     if (skb_tailroom(skb) >= pad) {
814         memset(skb->data+skb->len, 0, pad);
815         return skb;
816     }
817     
818     /* 如果需要pad的长度比tailroom长的话，就skb_copy_expand */
819     nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad,GFP_ATOMIC);
820     /* 释放原来的SKB */
821     kfree_skb(skb);
822     /* 清零 */
823     if (nskb)
824         memset(nskb->data+nskb->len, 0, pad);
825     return nskb;
826 }
相关阅读:
POJ1704 Georgia and Bob
BZOJ1299 巧克力棒
 IPSec
GRE协议
 L2TP协议
 AAA及Radius
网络安全概论
 路由策略与引入
 BGP协议
 路由协议
原文地址：https://www.cnblogs.com/enki-fang/p/9961844.html