• ip_rcv 中使用skb_share_check


    /*
     *     Main IP Receive routine.
     */
    int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
    {
        const struct iphdr *iph;
        struct net *net;
        u32 len;
    
        /* When the interface is in promisc. mode, drop all the crap
         * that it receives, do not try to analyse it.
         */
        if (skb->pkt_type == PACKET_OTHERHOST)
            goto drop;
    
    
        net = dev_net(dev);
        IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_IN, skb->len);
    
        skb = skb_share_check(skb, GFP_ATOMIC);
        if (!skb) {
            IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS);
            goto out;
        }

      /*     *pskb_may_pull确保skb->data指向的内存包含的数据至少为IP头部大小,由于每个   

    *IP数据包包括IP分片必须包含一个完整的IP头部。如果小于IP头部大小,则缺失     

    *的部分将从数据分片中拷贝。这些分片保存在skb_shinfo(skb)->frags[]中。     

    */

    if (!pskb_may_pull(skb, sizeof(struct iphdr)))
            goto inhdr_error;
    -----------------------------------------------------------
    
    }
    /**
     *    skb_share_check - check if buffer is shared and if so clone it
     *    @skb: buffer to check
     *    @pri: priority for memory allocation
     *
     *    If the buffer is shared the buffer is cloned and the old copy
     *    drops a reference. A new clone with a single reference is returned.
     *    If the buffer is not shared the original buffer is returned. When
     *    being called from interrupt status or with spinlocks held pri must
     *    be GFP_ATOMIC.
     *
     *    NULL is returned on a memory allocation failure.
     */
    static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri)
    {
        might_sleep_if(gfpflags_allow_blocking(pri));
        if (skb_shared(skb)) {
            struct sk_buff *nskb = skb_clone(skb, pri);
    
            if (likely(nskb))
                consume_skb(skb);
            else
                kfree_skb(skb);
            skb = nskb;
        }
        return skb;
    }

    为什么要这样做??p_rcv调用skb_share_check函数后,自己克隆了一个sk_buff,释放传入的sk_buff-----为了啥???

    -----------原来的skb可能被共享,如果需要修改skb,则会影响共享该sbk的其他函数,因此如果被共享,则克隆一份,再调用kfree_skb(实际只是skb->users--,减少引用计数)

    ----------__netif_receive_skb_core 函数中 会 扫描ptype_all链表与ptype_base哈希表。ptype_base是各种已经注册的协议的哈希表,根据每个数据包协议不同分派给不同的协议来处理,在这里递增skb进行索引计数

    在调用 skb_pull() 去掉外层协议头之前,通常先调用此函数判断一下是否有足够的数据用于“pull”。
    如果线性 buffer足够 pull,则返回1;
    如果需要 pull 的数据超过 skb->len,则返回0;
    最后,调用__pskb_pull_tail() 来检查 page buffer 有没有足够的数据用于 pull。

    sk_buff->len,表示当前协议下的数据长度,包括线性缓冲区的数据长度和分片的数据长度。线性缓冲区的长度从skb->data指针开始计算。注意skb->data是随着协议不同而变化的

    sk_buff->data_len,只表示分片的数据长度

    sk_buff->truesize,表示sk_buff->len 加上struct sk_buff结构大小

    static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len)
    {
    /*
    sk_buff->len,表示当前协议下的数据长度,包括线性缓冲区的数据长度和分片的数据长度。线性缓冲区的长度从skb->data指针开始计算。注意skb->data是随着协议不同而变化的
    
    sk_buff->data_len,只表示分片的数据长度
    
    sk_buff->truesize,表示sk_buff->len 加上struct sk_buff结构大小
    */
    if (likely(len <= skb_headlen(skb)))//skb->data 到 skb->tail之间的数据足够len长度
            return 1;
        if (unlikely(len > skb->len))//len长度超过skb总长度
            return 0;//移动后边的数据到skb->data中
        return __pskb_pull_tail(skb, len - skb_headlen(skb)) != NULL;
    }
    
    
    /**
     *    __pskb_pull_tail - advance tail of skb header
     *    @skb: buffer to reallocate
     *    @delta: number of bytes to advance tail
     *
     *    The function makes a sense only on a fragmented &sk_buff,
     *    it expands header moving its tail forward and copying necessary
     *    data from fragmented part.
     *
     *    &sk_buff MUST have reference count of 1.
     *
     *    Returns %NULL (and &sk_buff does not change) if pull failed
     *    or value of new tail of skb in the case of success.
     *
     *    All the pointers pointing into skb header may change and must be
     *    reloaded after call to this function.
     */
    
    /* Moves tail of skb head forward, copying data from fragmented part,
     * when it is necessary.
     * 1. It may fail due to malloc failure.
     * 2. It may change skb pointers.
     *
     * It is pretty complicated. Luckily, it is called only in exceptional cases.
     *///delta为需要从frags或者frag_list向前移动的数据量
    unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
    {
        /* If skb has not enough free space at tail, get new one
         * plus 128 bytes for future expansions. If we have enough
         * room at tail, reallocate without expansion only if skb is cloned.
         *///eat为去除当前skb可用内存,还需要多少内存
        int i, k, eat = (skb->tail + delta) - skb->end;
    
        if (eat > 0 || skb_cloned(skb)) {//判断当前skb是否被克隆
            if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0,
                         GFP_ATOMIC))//对sk_buff重新分配头
                return NULL;
        }
    /从skb的offset(skb->tail),拷贝delta个字节到skb->tail之后
        if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta))
            BUG();
    
        /* Optimization: no fragments, no reasons to preestimate
         * size of pulled pages. Superb.
         *///没有分段
        if (!skb_has_frag_list(skb))
            goto pull_pages;
    //由于数据已经拷贝到了skb->data中,因此需要释放frags,frag_list中被拷贝过的数据
        //计算从frags数组中拷贝的数据量
        /* Estimate size of pulled pages. */
        eat = delta;
        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
        //寻找到满足eat这么多数据量的最后一个page
            int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
    
            if (size >= eat)
                goto pull_pages;
            eat -= size;
        }
    
        /* If we need update frag list, we are in troubles.
         * Certainly, it possible to add an offset to skb data,
         * but taking into account that pulling is expected to
         * be very rare operation, it is worth to fight against
         * further bloating skb head and crucify ourselves here instead.
         * Pure masohism, indeed. 8)8)
         *///eat仍不为0,说明从frag_list中进行了拷贝,释放frag_list
        if (eat) {
            struct sk_buff *list = skb_shinfo(skb)->frag_list;
            struct sk_buff *clone = NULL;
            struct sk_buff *insp = NULL;
    
            do {
                BUG_ON(!list);//list为null,说明数据量不够
    
                if (list->len <= eat) {//当前skb的长度小于需要的长度
                    /* Eaten as whole. */
                    eat -= list->len;//找到下一个skb
                    list = list->next;//list指向下一个需要的skb
                    insp = list;//insp指向当前的skb
                } else {
                    /* Eaten partially. */
                    //此时insp指向前一个skb
                    //说明当前skb可以满足需要的数据量
                    if (skb_shared(list)) {//但是当前skb被共享
                        /* Sucks! We need to fork list. :-( */
                        clone = skb_clone(list, GFP_ATOMIC);//对最后那个拷贝不完全的skb,进行克隆
                        if (!clone)
                            return NULL;
                            //list指向当前被克隆的的skb
                        //insp指向下一个skb
                        insp = list->next;
                        list = clone;
                    } else {
                        /* This may be pulled without
                         * problems. *///list与insp指向当前的skb
                        insp = list;
                    }//修改最后一个skb,移动指针,删除掉被拷贝的数据
                    if (!pskb_pull(list, eat)) {
                        kfree_skb(clone);///递减clone的引用计数
                        return NULL;
                    }
                    break;
                }
            } while (eat);
    //list指向frag_list头
            //直到list遍历到数据量足够的最后一个skb
            /* Free pulled out fragments. */
            while ((list = skb_shinfo(skb)->frag_list) != insp) {
                skb_shinfo(skb)->frag_list = list->next;
                kfree_skb(list);
            }
            /* And insert new clone at head. */
            if (clone) {//说明最后一个skb只被拷贝了一部分,将此skb挂到frag_list头
                clone->next = list;
                skb_shinfo(skb)->frag_list = clone;
            }
        }
        /* Success! Now we may commit changes to skb data. */
    
    pull_pages:
        eat = delta;
        k = 0;//释放frags中的page
        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
            int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
    
            if (size <= eat) {
                skb_frag_unref(skb, i);
                eat -= size;
            } else {
                skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
                if (eat) {
                    skb_shinfo(skb)->frags[k].page_offset += eat;
                    skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
                    eat = 0;
                }
                k++;
            }
        }
        skb_shinfo(skb)->nr_frags = k;
    
        skb->tail     += delta;
        skb->data_len -= delta;
    
        return skb_tail_pointer(skb);
    }
    EXPORT_SYMBOL(__pskb_pull_tail);
    /* This data is invariant across clones and lives at
     * the end of the header data, ie. at skb->end.
     */
    struct skb_shared_info {
        unsigned char    nr_frags;
        __u8        tx_flags;
        unsigned short    gso_size;
        /* Warning: this field is not always filled in (UFO)! */
        unsigned short    gso_segs;
        unsigned short  gso_type;
        struct sk_buff    *frag_list;
        struct skb_shared_hwtstamps hwtstamps;
        u32        tskey;
        __be32          ip6_frag_id;
    
        /*
         * Warning : all fields before dataref are cleared in __alloc_skb()
         */
        atomic_t    dataref;
    
        /* Intermediate layers must ensure that destructor_arg
         * remains valid until skb destructor */
        void *        destructor_arg;
    
        /* must be last field, see pskb_expand_head() */
        skb_frag_t    frags[MAX_SKB_FRAGS];
    };

    typedef struct skb_frag_struct skb_frag_t;
    
    struct skb_frag_struct {
        struct {
            struct page *p;
        } page;
    #if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
        __u32 page_offset;
        __u32 size;
    #else
        __u16 page_offset;
        __u16 size;
    #endif
    };
    
    
    
     

    关于 skb_shinfo  需要继续分析理清楚;

    注意:struct sk_buff *frag_list; 

     /* must be last field, see pskb_expand_head() */
        skb_frag_t    frags[MAX_SKB_FRAGS];

    而frag_list则指向链表的下一个skb,合并skb数据的时候,先是合并frags数组的数据,然后再在frag_list里遍历下一个skb。frag_list一般用于IP分片的场景中,相比而言frags数组只是简单的scatter-gather IO

     frags[]用于分散收集I/O缓冲区;只有在DMA支持物理分散页的Scatter/Gather(SG,分散/聚集)操作时候才可以使用frags[]来保存剩下的数据,否则,只能扩展线性数据区域进行保存!!!
    frag_list用于IP片段

     那么skb->next pre 是用于什么呢???

  • 相关阅读:
    在ensp上配置Trunk接口
    在ensp上VLAN基础配置以及Access接口
    在ensp上的ARP及Proxy ARP
    在ensp上简单的配置交换机
    1000000 / 60S 的 RocketMQ 不停机,扩容,平滑升级!
    DE1-SOC 只要加载驱动VNC就断开(DE1-SOC 只要加载驱动串口就卡住)
    通过U盘拷贝文件到DE1-SOC 的 Linux系统
    Linux 系统响应来自 FPGA 端的中断的中断号到底怎么对应?(GIC控制器)
    HPS 访问 FPGA 方法之五—— 通过FPGA 中断访问
    HPS 访问 FPGA 方法之四—— 编写 Linux 字符设备驱动
  • 原文地址:https://www.cnblogs.com/codestack/p/13441135.html
Copyright © 2020-2023  润新知