• sk_buff


      1 /*
      2  * Routines having to do with the 'struct sk_buff' memory handlers.
      3  *
      4  * Authors: Alan Cox <iiitac@pyr.swan.ac.uk>
      5  * Florian La Roche <rzsfl@rz.uni-sb.de>
      6  *
      7  * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $
      8  *
      9  * Fixes:
     10  * Alan Cox : Fixed the worst of the load
     11  * balancer bugs.
     12  * Dave Platt : Interrupt stacking fix.
     13  * Richard Kooijman : Timestamp fixes.
     14  * Alan Cox : Changed buffer format.
     15  * Alan Cox : destructor hook for AF_UNIX etc.
     16  * Linus Torvalds : Better skb_clone.
     17  * Alan Cox : Added skb_copy.
     18  * Alan Cox : Added all the changed routines Linus
     19  * only put in the headers
     20  * Ray VanTassle : Fixed --skb->lock in free
     21  * Alan Cox : skb_copy copy arp field
     22  * Andi Kleen : slabified it.
     23  * Robert Olsson : Removed skb_head_pool
     24  *
     25  * NOTE:
     26  * The __skb_ routines should be called with interrupts
     27  * disabled, or you better be *real* sure that the operation is atomic
     28  * with respect to whatever list is being frobbed (e.g. via lock_sock()
     29  * or via disabling bottom half handlers, etc).
     30  *
     31  * This program is free software; you can redistribute it and/or
     32  * modify it under the terms of the GNU General Public License
     33  * as published by the Free Software Foundation; either version
     34  * 2 of the License, or (at your option) any later version.
     35  */
     36 
     37 /*
     38  * The functions in this file will not compile correctly with gcc 2.4.x
     39  */
     40 
     41 #include <linux/config.h>
     42 #include <linux/module.h>
     43 #include <linux/types.h>
     44 #include <linux/kernel.h>
     45 #include <linux/sched.h>
     46 #include <linux/mm.h>
     47 #include <linux/interrupt.h>
     48 #include <linux/in.h>
     49 #include <linux/inet.h>
     50 #include <linux/slab.h>
     51 #include <linux/netdevice.h>
     52 #ifdef CONFIG_NET_CLS_ACT
     53 #include <net/pkt_sched.h>
     54 #endif
     55 #include <linux/string.h>
     56 #include <linux/skbuff.h>
     57 #include <linux/cache.h>
     58 #include <linux/rtnetlink.h>
     59 #include <linux/init.h>
     60 #include <linux/highmem.h>
     61 
     62 #include <net/protocol.h>
     63 #include <net/dst.h>
     64 #include <net/sock.h>
     65 #include <net/checksum.h>
     66 #include <net/xfrm.h>
     67 
     68 #include <asm/uaccess.h>
     69 #include <asm/system.h>
     70 
     71 static kmem_cache_t *skbuff_head_cache;
     72 
     73 /*
     74  * Keep out-of-line to prevent kernel bloat.
     75  * __builtin_return_address is not used because it is not always
     76  * reliable.
     77  */
     78 
     79 /**
     80  * skb_over_panic - private function
     81  * @skb: buffer
     82  * @sz: size
     83  * @here: address
     84  *
     85  * Out of line support code for skb_put(). Not user callable.
     86  */
     87 void skb_over_panic(struct sk_buff *skb, int sz, void *here)
     88 {
     89     printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
     90      "data:%p tail:%p end:%p dev:%s/n",
     91      here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end,
     92      skb->dev ? skb->dev->name : "<NULL>");
     93     BUG();
     94 }
     95 
     96 /**
     97  * skb_under_panic - private function
     98  * @skb: buffer
     99  * @sz: size
    100  * @here: address
    101  *
    102  * Out of line support code for skb_push(). Not user callable.
    103  */
    104 
    105 void skb_under_panic(struct sk_buff *skb, int sz, void *here)
    106 {
    107     printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
    108      "data:%p tail:%p end:%p dev:%s/n",
    109      here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end,
    110      skb->dev ? skb->dev->name : "<NULL>");
    111     BUG();
    112 }
    113 
    114 /* Allocate a new skbuff. We do this ourselves so we can fill in a few
    115  * 'private' fields and also do memory statistics to find all the
    116  * [BEEP] leaks.
    117  *
    118  */
    119 
    120 /**
    121  * alloc_skb - allocate a network buffer
    122  * @size: size to allocate
    123  * @gfp_mask: allocation mask
    124  *
    125  * Allocate a new &sk_buff. The returned buffer has no headroom and a
    126  * tail room of size bytes. The object has a reference count of one.
    127  * The return is the buffer. On a failure the return is %NULL.
    128  *
    129  * Buffers may only be allocated from interrupts using a @gfp_mask of
    130  * %GFP_ATOMIC.
    131  */
    132 struct sk_buff *alloc_skb(unsigned int size, int gfp_mask)
    133 {
    134     struct sk_buff *skb;
    135     u8 *data;
    136 
    137     /* Get the HEAD */
    138     /* 从cache缓冲池中获取内存 */
    139     skb = kmem_cache_alloc(skbuff_head_cache,
    140              gfp_mask & ~__GFP_DMA);
    141     if (!skb)
    142         goto out;
    143 
    144     /* Get the DATA. Size must match skb_add_mtu(). */
    145 
    146     /* 对其size */
    147     size = SKB_DATA_ALIGN(size);
    148 
    149     /* 分配的缓冲长度包含skb_shared_info的长度 */
    150     data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
    151     if (!data)
    152         goto nodata;
    153     
    154     /* 
    155      * offsetof是一个编译器宏或者是自定义的宏,用于计算member在struct中的偏移量。
    156      * 把在truesize前面的field全部清零。
    157      */
    158     memset(skb, 0, offsetof(struct sk_buff, truesize));
    159     
    160     /* truesize是广义SKB的大小,包含了4个部分的长度:skb自身,header,page frags,frag list */
    161     skb->truesize = size + sizeof(struct sk_buff);
    162     
    163     /* users初始化成1 */
    164     atomic_set(&skb->users, 1);
    165 
    166     /* 初始化所有数据指针 */
    167     skb->head = data;
    168     skb->data = data;
    169     skb->tail = data;
    170     skb->end = data + size;
    171     
    172     /* 
    173      * skb_shinfo是个宏,#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end))
    174      * 所以用这个宏的时候必须等skb->end已经初始化。
    175      * skb_shinfo 接在skb->end指向的内存空间后面。
    176       */
    177 
    178     /* 初始化skb_shared_info结构体 */
    179     atomic_set(&(skb_shinfo(skb)->dataref), 1);
    180     skb_shinfo(skb)->nr_frags = 0;
    181     skb_shinfo(skb)->tso_size = 0;
    182     skb_shinfo(skb)->tso_segs = 0;
    183     skb_shinfo(skb)->frag_list = NULL;
    184 out:
    185     return skb;
    186 nodata:
    187     kmem_cache_free(skbuff_head_cache, skb);
    188     skb = NULL;
    189     goto out;
    190 }
    191 
    192 /**
    193  * alloc_skb_from_cache - allocate a network buffer
    194  * @cp: kmem_cache from which to allocate the data area
    195  * (object size must be big enough for @size bytes + skb overheads)
    196  * @size: size to allocate
    197  * @gfp_mask: allocation mask
    198  *
    199  * Allocate a new &sk_buff. The returned buffer has no headroom and
    200  * tail room of size bytes. The object has a reference count of one.
    201  * The return is the buffer. On a failure the return is %NULL.
    202  *
    203  * Buffers may only be allocated from interrupts using a @gfp_mask of
    204  * %GFP_ATOMIC.
    205  */
    206 struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
    207                  unsigned int size, int gfp_mask)
    208 {
    209     struct sk_buff *skb;
    210     u8 *data;
    211 
    212     /* Get the HEAD */
    213     skb = kmem_cache_alloc(skbuff_head_cache,
    214              gfp_mask & ~__GFP_DMA);
    215     if (!skb)
    216         goto out;
    217 
    218     /* Get the DATA. */
    219     size = SKB_DATA_ALIGN(size);
    220     
    221     /* 这个函数和上面函数不同的地方就在下面这句,不用kmalloc,而用kmem_cache_alloc。 */
    222     data = kmem_cache_alloc(cp, gfp_mask);
    223     if (!data)
    224         goto nodata;
    225 
    226     memset(skb, 0, offsetof(struct sk_buff, truesize));
    227     skb->truesize = size + sizeof(struct sk_buff);
    228     atomic_set(&skb->users, 1);
    229     skb->head = data;
    230     skb->data = data;
    231     skb->tail = data;
    232     skb->end = data + size;
    233 
    234     atomic_set(&(skb_shinfo(skb)->dataref), 1);
    235     skb_shinfo(skb)->nr_frags = 0;
    236     skb_shinfo(skb)->tso_size = 0;
    237     skb_shinfo(skb)->tso_segs = 0;
    238     skb_shinfo(skb)->frag_list = NULL;
    239 out:
    240     return skb;
    241 nodata:
    242     kmem_cache_free(skbuff_head_cache, skb);
    243     skb = NULL;
    244     goto out;
    245 }
    246 
    247 /* 这个函数是用来释放当前skb的frag_list区的 */
    248 static void skb_drop_fraglist(struct sk_buff *skb)
    249 {
    250     struct sk_buff *list = skb_shinfo(skb)->frag_list;
    251 
    252     skb_shinfo(skb)->frag_list = NULL;
    253     
    254     /* 循环前进,直到没有为止。 */
    255     do {
    256         struct sk_buff *this = list;
    257         list = list->next;
    258         kfree_skb(this);
    259     } while (list);
    260 }
    261 
    262 static void skb_clone_fraglist(struct sk_buff *skb)
    263 {
    264     struct sk_buff *list;
    265     /* 对当前skb的frag_list区链上的每个skb增加引用计数。 */
    266     for (list = skb_shinfo(skb)->frag_list; list; list = list->next)
    267         skb_get(list);
    268 }
    269 
    270 void skb_release_data(struct sk_buff *skb)
    271 {
    272     /* 查看skb是否被clone?skb_shinfo的dataref是否为0?
    273      * 如果是,那么就释放skb非线性区域和线性区域。 */
    274     if (!skb->cloned ||
    275      !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
    276              &skb_shinfo(skb)->dataref)) {
    277         
    278         /* 释放page frags区 */
    279         if (skb_shinfo(skb)->nr_frags) {
    280             int i;
    281             for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
    282                 put_page(skb_shinfo(skb)->frags[i].page);
    283         }
    284 
    285         /* 释放frag_list区 */
    286         if (skb_shinfo(skb)->frag_list)
    287             skb_drop_fraglist(skb);
    288 
    289         /* 释放线性区域 */
    290         kfree(skb->head);
    291     }
    292 }
    293 
    294 /*
    295  * Free an skbuff by memory without cleaning the state.
    296  */
    297 
    298 /* 把skb自身和线性,非线性区域全部释放 */
    299 void kfree_skbmem(struct sk_buff *skb)
    300 {
    301     skb_release_data(skb);
    302     kmem_cache_free(skbuff_head_cache, skb);
    303 }
    304 
    305 /**
    306  * __kfree_skb - private function
    307  * @skb: buffer
    308  *
    309  * Free an sk_buff. Release anything attached to the buffer.
    310  * Clean the state. This is an internal helper function. Users should
    311  * always call kfree_skb
    312  */
    313 /* 这个函数应该也能算是一个wrapper函数 */
    314 
    315 void __kfree_skb(struct sk_buff *skb)
    316 {
    317     BUG_ON(skb->list != NULL);
    318 
    319     dst_release(skb->dst);
    320 #ifdef CONFIG_XFRM
    321     secpath_put(skb->sp);
    322 #endif
    323     if (skb->destructor) {
    324         WARN_ON(in_irq());
    325         skb->destructor(skb);
    326     }
    327 #ifdef CONFIG_NETFILTER
    328     nf_conntrack_put(skb->nfct);
    329 #ifdef CONFIG_BRIDGE_NETFILTER
    330     nf_bridge_put(skb->nf_bridge);
    331 #endif
    332 #endif
    333 /* XXX: IS this still necessary? - JHS */
    334 #ifdef CONFIG_NET_SCHED
    335     skb->tc_index = 0;
    336 #ifdef CONFIG_NET_CLS_ACT
    337     skb->tc_verd = 0;
    338     skb->tc_classid = 0;
    339 #endif
    340 #endif
    341 
    342     kfree_skbmem(skb);
    343 }
    344 
    345 /**
    346  * skb_clone - duplicate an sk_buff
    347  * @skb: buffer to clone
    348  * @gfp_mask: allocation priority
    349  *
    350  * Duplicate an &sk_buff. The new one is not owned by a socket. Both
    351  * copies share the same packet data but not structure. The new
    352  * buffer has a reference count of 1. If the allocation fails the
    353  * function returns %NULL otherwise the new buffer is returned.
    354  *
    355  * If this function is called from an interrupt gfp_mask() must be
    356  * %GFP_ATOMIC.
    357  */
    358 
    359 struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
    360 {
    361     /* 从cache池中分配一个skb */
    362     struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
    363 
    364     if (!n) 
    365         return NULL;
    366     
    367     /* 这个C(x) 就是clone的意思 */
    368 #define C(x) n->x = skb->x
    369 
    370     n->next = n->prev = NULL;
    371     n->list = NULL;
    372     n->sk = NULL;
    373     /* 把skb中各个成员都clone过去 */
    374     C(stamp);
    375     C(dev);
    376     C(real_dev);
    377     C(h);
    378     C(nh);
    379     C(mac);
    380     C(dst);
    381     dst_clone(skb->dst);
    382     C(sp);
    383 #ifdef CONFIG_INET
    384     secpath_get(skb->sp);
    385 #endif
    386     memcpy(n->cb, skb->cb, sizeof(skb->cb));
    387     C(len);
    388     C(data_len);
    389     C(csum);
    390     C(local_df);
    391     /* 新分配的skb是clone的 */
    392     n->cloned = 1;
    393     n->nohdr = 0;
    394     C(pkt_type);
    395     C(ip_summed);
    396     C(priority);
    397     C(protocol);
    398     C(security);
    399     n->destructor = NULL;
    400 #ifdef CONFIG_NETFILTER
    401     C(nfmark);
    402     C(nfcache);
    403     C(nfct);
    404     nf_conntrack_get(skb->nfct);
    405     C(nfctinfo);
    406 #ifdef CONFIG_NETFILTER_DEBUG
    407     C(nf_debug);
    408 #endif
    409 #ifdef CONFIG_BRIDGE_NETFILTER
    410     C(nf_bridge);
    411     nf_bridge_get(skb->nf_bridge);
    412 #endif
    413 #endif /*CONFIG_NETFILTER*/
    414 #if defined(CONFIG_HIPPI)
    415     C(private);
    416 #endif
    417 #ifdef CONFIG_NET_SCHED
    418     C(tc_index);
    419 #ifdef CONFIG_NET_CLS_ACT
    420     n->tc_verd = SET_TC_VERD(skb->tc_verd,0);
    421     n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd);
    422     n->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
    423     C(input_dev);
    424     C(tc_classid);
    425 #endif
    426 
    427 #endif
    428     C(truesize);
    429     /* 新skb的users初始化为1 */
    430     atomic_set(&n->users, 1);
    431     C(head);
    432     C(data);
    433     C(tail);
    434     C(end);
    435     
    436     /* 增加被clone的skb的数据引用 */
    437     atomic_inc(&(skb_shinfo(skb)->dataref));
    438     /* 设置原skb也是被clone了 */
    439     skb->cloned = 1;
    440 
    441     return n;
    442 }
    443 
    444 
    445 static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
    446 {
    447     /*
    448      * Shift between the two data areas in bytes
    449      */
    450     /* 为了等一下要给网络各层的指针赋值,现在要先算出两个data的偏移量 */
    451     unsigned long offset = new->data - old->data;
    452 
    453     new->list = NULL;
    454     new->sk = NULL;
    455     new->dev = old->dev;
    456     new->real_dev = old->real_dev;
    457     new->priority = old->priority;
    458     new->protocol = old->protocol;
    459     new->dst = dst_clone(old->dst);
    460 #ifdef CONFIG_INET
    461     new->sp = secpath_get(old->sp);
    462 #endif
    463     /* 用上面算出来的offset来算 */
    464     new->h.raw = old->h.raw + offset;
    465     new->nh.raw = old->nh.raw + offset;
    466     new->mac.raw = old->mac.raw + offset;
    467 
    468     /* 拷贝control block */
    469     memcpy(new->cb, old->cb, sizeof(old->cb));
    470 
    471     new->local_df = old->local_df;
    472     new->pkt_type = old->pkt_type;
    473     new->stamp = old->stamp;
    474     new->destructor = NULL;
    475     new->security = old->security;
    476 #ifdef CONFIG_NETFILTER
    477     new->nfmark = old->nfmark;
    478     new->nfcache = old->nfcache;
    479     new->nfct = old->nfct;
    480     nf_conntrack_get(old->nfct);
    481     new->nfctinfo = old->nfctinfo;
    482 #ifdef CONFIG_NETFILTER_DEBUG
    483     new->nf_debug = old->nf_debug;
    484 #endif
    485 #ifdef CONFIG_BRIDGE_NETFILTER
    486     new->nf_bridge = old->nf_bridge;
    487     nf_bridge_get(old->nf_bridge);
    488 #endif
    489 #endif
    490 #ifdef CONFIG_NET_SCHED
    491 #ifdef CONFIG_NET_CLS_ACT
    492     new->tc_verd = old->tc_verd;
    493 #endif
    494     new->tc_index = old->tc_index;
    495 #endif
    496     /* 设置新的skb的users为1 */
    497     atomic_set(&new->users, 1);
    498 
    499     /* 把skb_shinfo的东西也一起copy过去 */
    500     skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size;
    501     skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs;
    502 }
    503 
    504 /**
    505  * skb_copy - create private copy of an sk_buff
    506  * @skb: buffer to copy
    507  * @gfp_mask: allocation priority
    508  *
    509  * Make a copy of both an &sk_buff and its data. This is used when the
    510  * caller wishes to modify the data and needs a private copy of the
    511  * data to alter. Returns %NULL on failure or the pointer to the buffer
    512  * on success. The returned buffer has a reference count of 1.
    513  *
    514  * As by-product this function converts non-linear &sk_buff to linear
    515  * one, so that &sk_buff becomes completely private and caller is allowed
    516  * to modify all the data of returned buffer. This means that this
    517  * function is not recommended for use in circumstances when only
    518  * header is going to be modified. Use pskb_copy() instead.
    519  */
    520 
    521 struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
    522 {
    523     int headerlen = skb->data - skb->head;
    524     /*
    525      * Allocate the copy buffer
    526      */
    527 
    528     /* 
    529      * 分配内存包含线性数据区的长度和非线性数据区的长度 
    530      * data_len是指非线性数据区的长度。
    531      */
    532     struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len,
    533                  gfp_mask);
    534     if (!n)
    535         return NULL;
    536 
    537     /* Set the data pointer */
    538     /* 预留头的长度 */
    539     skb_reserve(n, headerlen);
    540     /* Set the tail pointer and length */
    541     /* len是指线性和非线性数据的总长,把tail往后推 */
    542     skb_put(n, skb->len);
    543     n->csum = skb->csum;
    544     n->ip_summed = skb->ip_summed;
    545     /* 因为 skb_copy_bits 函数中 offset是对有效负载的,即skb->data。
    546      * 因此这里的offset为-headerlen。目的是从skb->data向前推headerlen。
    547      * 从skb的head处拷贝到n的head处。这个函数把skb的线性和非线性部分全部拷贝到
    548      * n的线性部分去了。
    549      */
    550     if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
    551         BUG();
    552     
    553     /* 把skb的本身复制到n的本身 */
    554     copy_skb_header(n, skb);
    555     return n;
    556 }
    557 
    558 
    559 /**
    560  * pskb_copy - create copy of an sk_buff with private head.
    561  * @skb: buffer to copy
    562  * @gfp_mask: allocation priority
    563  *
    564  * Make a copy of both an &sk_buff and part of its data, located
    565  * in header. Fragmented data remain shared. This is used when
    566  * the caller wishes to modify only header of &sk_buff and needs
    567  * private copy of the header to alter. Returns %NULL on failure
    568  * or the pointer to the buffer on success.
    569  * The returned buffer has a reference count of 1.
    570  */
    571 
    572 struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
    573 {
    574     /*
    575      * Allocate the copy buffer
    576      */
    577     /* 分配一个新的skb_buff n,它的线性区长度是和原skb长度一样 */
    578     struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask);
    579 
    580     if (!n)
    581         goto out;
    582 
    583     /* Set the data pointer */
    584     /* 预留head到data之间的空隙 */
    585     skb_reserve(n, skb->data - skb->head);
    586 
    587     /* Set the tail pointer and length */
    588     /* 准备向n放数据,试放数据长度是skb的header section的长度 */
    589     skb_put(n, skb_headlen(skb));
    590 
    591     /* Copy the bytes */
    592     /* 拷贝有效负载,长度是n->len。上面skb_put中已经把n->len赋值成skb_headlen(skb)
    593      * 所以这里拷贝线性区域的长度。
    594      */
    595     memcpy(n->data, skb->data, n->len);
    596 
    597     /* 复制skb本身信息到n */
    598     n->csum = skb->csum;
    599     n->ip_summed = skb->ip_summed;
    600 
    601     n->data_len = skb->data_len;
    602     n->len = skb->len;
    603 
    604     /* 把skb中page frags的指针复制到n的page frags。 */
    605     if (skb_shinfo(skb)->nr_frags) {
    606         int i;
    607 
    608         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
    609             skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
    610             get_page(skb_shinfo(n)->frags[i].page);
    611         }
    612         skb_shinfo(n)->nr_frags = i;
    613     }
    614 
    615     /* 把skb中frag_list地址复制到n的frag_list */
    616     if (skb_shinfo(skb)->frag_list) {
    617         skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
    618         skb_clone_fraglist(n);
    619     }
    620 
    621     /* 把skb的本身复制到n的本身 */
    622     copy_skb_header(n, skb);
    623 out:
    624     return n;
    625 }
    626 
    627 /**
    628  * pskb_expand_head - reallocate header of &sk_buff
    629  * @skb: buffer to reallocate
    630  * @nhead: room to add at head
    631  * @ntail: room to add at tail
    632  * @gfp_mask: allocation priority
    633  *
    634  * Expands (or creates identical copy, if &nhead and &ntail are zero)
    635  * header of skb. &sk_buff itself is not changed. &sk_buff MUST have
    636  * reference count of 1. Returns zero in the case of success or error,
    637  * if expansion failed. In the last case, &sk_buff is not changed.
    638  *
    639  * All the pointers pointing into skb header may change and must be
    640  * reloaded after call to this function.
    641  */
    642 /* 这个函数要注意的是原来的skb结构体并没有释放
    643  * 释放的是header section数据区。
    644  */
    645 int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
    646 {
    647     int i;
    648     u8 *data;
    649     /* 算出原来线性区的长度,再加上现在要求的增加的headroom和tailroom。 */
    650     int size = nhead + (skb->end - skb->head) + ntail;
    651     long off;
    652 
    653     if (skb_shared(skb))
    654         BUG();
    655     
    656     /* 对齐size的大小 */
    657     size = SKB_DATA_ALIGN(size);
    658 
    659     /* 按照要求分配新的header section */
    660     data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
    661     if (!data)
    662         goto nodata;
    663 
    664     /* Copy only real data... and, alas, header. This should be
    665      * optimized for the cases when header is void. */
    666     /* 拷贝payload到正确的位置上 */
    667     memcpy(data + nhead, skb->head, skb->tail - skb->head);
    668     memcpy(data + size, skb->end, sizeof(struct skb_shared_info));
    669 
    670     /* 下面复制page frags区域和fraglist区域的指针 */
    671     for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
    672         get_page(skb_shinfo(skb)->frags[i].page);
    673 
    674     if (skb_shinfo(skb)->frag_list)
    675         skb_clone_fraglist(skb);
    676 
    677     /* 释放原来的数据区 */
    678     skb_release_data(skb);
    679     
    680     /* 计算偏移量 */
    681     off = (data + nhead) - skb->head;
    682 
    683     skb->head = data;
    684     skb->end = data + size;
    685     skb->data += off;
    686     skb->tail += off;
    687     skb->mac.raw += off;
    688     skb->h.raw += off;
    689     skb->nh.raw += off;
    690     skb->cloned = 0;
    691     skb->nohdr = 0;
    692     atomic_set(&skb_shinfo(skb)->dataref, 1);
    693     return 0;
    694 
    695 nodata:
    696     return -ENOMEM;
    697 }
    698 
    699 /* Make private copy of skb with writable head and some headroom */
    700 
    701 struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
    702 {
    703     struct sk_buff *skb2;
    704     /* 计算现在要求的headroom 和原来headroom之间的差值 */
    705     int delta = headroom - skb_headroom(skb);
    706     
    707     /* 如果现在要求的headroom没有原来的headroom大,那说明原来的header section可以用,
    708      * 所以只要用pskb_copy复制一份skb结构体和它的线性区域就可以了。
    709      */
    710     if (delta <= 0)
    711         skb2 = pskb_copy(skb, GFP_ATOMIC);
    712     else {
    713         /* 如果要求的headroom比原来的headroom大的话,clone一个skb */
    714         skb2 = skb_clone(skb, GFP_ATOMIC);
    715         /* 把新clone的skb用pskb_expand_head扩大headroom */
    716         if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0,
    717                      GFP_ATOMIC)) {
    718             kfree_skb(skb2);
    719             skb2 = NULL;
    720         }
    721     }
    722     return skb2;
    723 }
    724 
    725 
    726 /**
    727  * skb_copy_expand - copy and expand sk_buff
    728  * @skb: buffer to copy
    729  * @newheadroom: new free bytes at head
    730  * @newtailroom: new free bytes at tail
    731  * @gfp_mask: allocation priority
    732  *
    733  * Make a copy of both an &sk_buff and its data and while doing so
    734  * allocate additional space.
    735  *
    736  * This is used when the caller wishes to modify the data and needs a
    737  * private copy of the data to alter as well as more space for new fields.
    738  * Returns %NULL on failure or the pointer to the buffer
    739  * on success. The returned buffer has a reference count of 1.
    740  *
    741  * You must pass %GFP_ATOMIC as the allocation priority if this function
    742  * is called from an interrupt.
    743  *
    744  * BUG ALERT: ip_summed is not copied. Why does this work? Is it used
    745  * only by netfilter in the cases when checksum is recalculated? --ANK
    746  */
    747 struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
    748                 int newheadroom, int newtailroom, int gfp_mask)
    749 {
    750     /*
    751      * Allocate the copy buffer
    752      */
    753     /* 分配一个新的skb结构体,header section长度是原来的skb所有数据长度加上新的skb要求的headroom
    754      * 和要求的tailroom。目的是把原来的SKB线性化。
    755      */
    756     struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom,
    757                  gfp_mask);
    758     int head_copy_len, head_copy_off;
    759 
    760     if (!n)
    761         return NULL;
    762 
    763     /* 新的sk_buff n的headroom长度为newheadroom */
    764     skb_reserve(n, newheadroom);
    765 
    766     /* Set the tail pointer and length */
    767     /* 设置tail指针和n->len */
    768     skb_put(n, skb->len);
    769 
    770     /* 设置head_copy_len 为老的skb的headroom */
    771     head_copy_len = skb_headroom(skb);
    772     head_copy_off = 0;
    773     /* 如果新的headroom比老的headroom小,
    774      * 拷贝长度就为新的headroom的长度。
    775      */
    776     if (newheadroom <= head_copy_len)
    777         head_copy_len = newheadroom;
    778     else
    779         head_copy_off = newheadroom - head_copy_len;
    780 
    781     /* Copy the linear header and data. */
    782     /* offset为原来skb->data-head_copy_len */
    783     if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
    784              skb->len + head_copy_len))
    785         BUG();
    786 
    787     /* 拷贝skb结构体到n结构体 */
    788     copy_skb_header(n, skb);
    789 
    790     return n;
    791 }
    792 
    793 /**
    794  * skb_pad - zero pad the tail of an skb
    795  * @skb: buffer to pad
    796  * @pad: space to pad
    797  *
    798  * Ensure that a buffer is followed by a padding area that is zero
    799  * filled. Used by network drivers which may DMA or transfer data
    800  * beyond the buffer end onto the wire.
    801  *
    802  * May return NULL in out of memory cases.
    803  */
    804  
    805 struct sk_buff *skb_pad(struct sk_buff *skb, int pad)
    806 {
    807     struct sk_buff *nskb;
    808     
    809     /* If the skbuff is non linear tailroom is always zero.. */
    810     /* 如果需要pad的长度比skb_tailroom小的话,
    811      * 就直接从skb->data+skb->len,开始清零.
    812      */
    813     if (skb_tailroom(skb) >= pad) {
    814         memset(skb->data+skb->len, 0, pad);
    815         return skb;
    816     }
    817     
    818     /* 如果需要pad的长度比tailroom长的话,就skb_copy_expand */
    819     nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad,GFP_ATOMIC);
    820     /* 释放原来的SKB */
    821     kfree_skb(skb);
    822     /* 清零 */
    823     if (nskb)
    824         memset(nskb->data+nskb->len, 0, pad);
    825     return nskb;
    826 }
  • 相关阅读:
    POJ1704 Georgia and Bob
    BZOJ1299 巧克力棒
    IPSec
    GRE协议
    L2TP协议
    AAA及Radius
    网络安全概论
    路由策略与引入
    BGP协议
    路由协议
  • 原文地址:https://www.cnblogs.com/enki-fang/p/9961844.html
Copyright © 2020-2023  润新知