• sk_buff


    他里面包含了各层协议的头部,比如ethernet, ip ,tcp ,udp等等。熟悉他是进一步了解Linux网络协议栈的基础
    此结构定义头文件

    <include/linux/skbuff.h>
    struct sk_buff_head {
          /* These two members must be first. */
          struct sk_buff *next;
          struct sk_buff *prev;
          __u32       qlen; //代表元素节点数目
          spinlock_t      lock; //加锁,防止对表的并发访问
    };

    sk_buff_head

    管理函数

    int skb_headroom(const struct sk_buff *skb)
    int skb_tailroom(const struct sk_buff *skb)

    skb_headroom通常比较小(只有48),用的时候小心越界
    skb_tailroom大小1500,足够自由调整
    skb

    //同时增加len和tail。用于向数据报尾部追加数据。返回原来tail所在位置
    unsigned char *skb_put(struct sk_buff *skb, unsigned int len) 

    skb_put

    //将data指针上移并增加len长度。这个函数用来向头部添加一些数据。当然前提是有足够的headroom
    unsigned char *skb_push(struct sk_buff *skb, unsigned int len) 

    skb_push

    //将data指针下移,并减小len的值。这个函数一般用来除去某个头部
    unsigned char *skb_pull(struct sk_buff *skb, unsigned int len) 

    skb_pull

    //将data指针和tail指针同时下移。这个操作在存储空间的头部预留len长度的空隙
    void skb_reserve(struct sk_buff *skb, unsigned int len) 

    skb_reserve

    //将网络报文的长度缩减到len。这个操作丢弃了网络报文尾部的填充值
    void skb_trim(struct sk_buff *skb, unsigned int len) 

    skb_trim
    下图是穿过协议栈从tcp层向下到链路层的过程
    tcp

    分配内存

    alloc_skb 分配缓冲区和一个sk_buff结构
    dev_alloc_skb 设备驱动程序使用的缓冲区分配函数

    释放内存

    kfree_skb 只有skb->users计数器为1时才释放
    dev_kfree_skb

    缓冲区克隆

    skb_clone

    sk_buff

    /** 
     *   struct sk_buff - socket buffer
     *   @next: Next buffer in list
     *   @prev: Previous buffer in list
     *   @tstamp: Time we arrived
     *   @sk: Socket we are owned by
     *   @dev: Device we arrived on/are leaving by
     *   @cb: Control buffer. Free for use by every layer. Put private vars here
     *   @_skb_refdst: destination entry (with norefcount bit)
     *   @sp: the security path, used for xfrm
     *   @len: Length of actual data
     *   @data_len: Data length
     *   @mac_len: Length of link layer header
     *   @hdr_len: writable header length of cloned skb
     *   @csum: Checksum (must include start/offset pair)
     *   @csum_start: Offset from skb->head where checksumming should start
     *   @csum_offset: Offset from csum_start where checksum should be stored
     *   @priority: Packet queueing priority
     *   @local_df: allow local fragmentation
     *   @cloned: Head may be cloned (check refcnt to be sure)
     *   @ip_summed: Driver fed us an IP checksum
     *   @nohdr: Payload reference only, must not modify header
     *   @nfctinfo: Relationship of this skb to the connection
     *   @pkt_type: Packet class
     *   @fclone: skbuff clone status
     *   @ipvs_property: skbuff is owned by ipvs
     *   @peeked: this packet has been seen already, so stats have been
     *        done for it, don't do them again
     *   @nf_trace: netfilter packet trace flag
     *   @protocol: Packet protocol from driver
     *   @nfct: Associated connection, if any
     *   @nfct_reasm: netfilter conntrack re-assembly pointer
     *   @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
     *   @skb_iif: ifindex of device we arrived on
     *   @tc_index: Traffic control index
     *   @tc_verd: traffic control verdict
     *   @rxhash: the packet hash computed on receive
     *   @queue_mapping: Queue mapping for multiqueue devices
     *   @ndisc_nodetype: router type (from link layer)
     *   @ooo_okay: allow the mapping of a socket to a queue to be changed
     *   @l4_rxhash: indicate rxhash is a canonical 4-tuple hash over transport
     *        ports.
     *   @wifi_acked_valid: wifi_acked was set
     *   @wifi_acked: whether frame was acked on wifi or not
     *   @no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
     *   @dma_cookie: a cookie to one of several possible DMA operations
     *        done by skb DMA functions
     *   @secmark: security marking
     *   @mark: Generic packet mark
     *   @dropcount: total number of sk_receive_queue overflows
     *   @vlan_tci: vlan tag control information
     *   @inner_transport_header: Inner transport layer header (encapsulation)
     *   @inner_network_header: Network layer header (encapsulation)
     *   @transport_header: Transport layer header
     *   @network_header: Network layer header
     *   @mac_header: Link layer header
     *   @tail: Tail pointer
     *   @end: End pointer
     *   @head: Head of buffer
     *   @data: Data head pointer
     *   @truesize: Buffer size
     *   @users: User count - see {datagram,tcp}.c
     *   @destructor: Destruct function
     */
    
    struct sk_buff {
         /* These two members must be first. */
         struct sk_buff      *next;
         struct sk_buff      *prev;
    
         ktime_t             tstamp; //skb接收包的时间
    
         struct sock         *sk; //从属于哪个socket,被4层用到
         struct net_device   *dev; //一个网络设备,当发出包时它表示输出设备,当接收包时它表示输入设备
    
         /*
          * This is the control buffer. It is free to use for every
          * layer. Please put your private variables there. If you
          * want to keep them across layers you have to do a skb_clone()
          * first. This is owned by whoever has the skb queued ATM.
          */
         char           cb[48] __aligned(8); //保存每层的控制信息
    
         unsigned long       _skb_refdst; //用于路由子系统。保存了一些路由相关信息
    #ifdef CONFIG_XFRM
         struct    sec_path  *sp;
    #endif
         unsigned int        len, //当前skb中的数据长度,这个值会随着从一层到另一层而改变
                        data_len; //分片数据长度
         __u16               mac_len, //mac头长度
                        hdr_len; //clone的skb头长度
         union {
              __wsum         csum;
              struct {
                   __u16     csum_start;
                   __u16     csum_offset;
              };
         };
         __u32               priority; //优先级。用于QOS
         kmemcheck_bitfield_begin(flags1);
         __u8           local_df:1, //是否可以本地切片
                        cloned:1, //头是否被clone
                        ip_summed:2, //硬件驱动是否已经进行了校验
                        nohdr:1, //skb的头指针是否分配完毕(比如head和data)
                        nfctinfo:3; 
         __u8           pkt_type:3, //数据包的类型
                        fclone:2, //clone标记。在fast clone中被设置
                        ipvs_property:1, //ipvs拥有的域
                        peeked:1,
                        nf_trace:1;
         kmemcheck_bitfield_end(flags1);
         __be16              protocol; //L3层的协议
    
    #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
         struct nf_conntrack *nfct;
    #endif
    #ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
         struct sk_buff      *nfct_reasm;
    #endif
    #ifdef CONFIG_BRIDGE_NETFILTER
         struct nf_bridge_info    *nf_bridge;
    #endif
    
         int            skb_iif; //接收设备的index
    
         __u32               rxhash;
    
         __u16               vlan_tci;
    
    //流量控制的相关域
    #ifdef CONFIG_NET_SCHED
         __u16               tc_index; /* traffic control index */
    #ifdef CONFIG_NET_CLS_ACT
         __u16               tc_verd;  /* traffic control verdict */
    #endif
    #endif
    
         __u16               queue_mapping; //多队列设备的映射,映射到那个队列
         kmemcheck_bitfield_begin(flags2);
    #ifdef CONFIG_IPV6_NDISC_NODETYPE
         __u8           ndisc_nodetype:2;
    #endif
         __u8           pfmemalloc:1;
         __u8           ooo_okay:1;
         __u8           l4_rxhash:1;
         __u8           wifi_acked_valid:1;
         __u8           wifi_acked:1;
         __u8           no_fcs:1;
         __u8           head_frag:1;
         /* Encapsulation protocol and NIC drivers should use
          * this flag to indicate to each other if the skb contains
          * encapsulated packet or not and maybe use the inner packet
          * headers if needed
          */
         __u8           encapsulation:1;
         /* 7/9 bit hole (depending on ndisc_nodetype presence) */
         kmemcheck_bitfield_end(flags2);
    
    #ifdef CONFIG_NET_DMA
         dma_cookie_t        dma_cookie;
    #endif
    #ifdef CONFIG_NETWORK_SECMARK
         __u32               secmark;
    #endif
         union {
              __u32          mark; //skb的标记
              __u32          dropcount;
              __u32          reserved_tailroom;
         };
    
         sk_buff_data_t      inner_transport_header;
         sk_buff_data_t      inner_network_header;
         sk_buff_data_t      transport_header; //传输层的头
         sk_buff_data_t      network_header; //网络层的头
         sk_buff_data_t      mac_header; //链路层的头
         /* These elements must be at the end, see alloc_skb() for details.  */
         sk_buff_data_t      tail; //skb数据操作指针
         sk_buff_data_t      end;
         unsigned char       *head,
                        *data;
         unsigned int        truesize; //整个skb的大小
         atomic_t       users; //skb的引用计数
    
         void           (*destructor)(struct sk_buff *skb); //skb的析构函数(sock_rfree和sock_wfree)
    };

    skb_put

    static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
    {
        unsigned char *tmp = skb_tail_pointer(skb);
        SKB_LINEAR_ASSERT(skb);
        skb->tail += len;
        skb->len  += len;
        return tmp;
    }

    skb_push

    static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
    {
        skb->data -= len;
        skb->len  += len;
        return skb->data;
    }

    skb_pull

    static inline unsigned char *__skb_pull(struct sk_buff *skb , unsigned int len)  
    {  
        skb->len -= len ;  
        BUG_ON(skb->len < skb->data_len ) ;  
        return skb->data += len ;  
    }  

    alloc_skb

    struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
                    int fclone, int node)
    {
        struct kmem_cache *cache;
        struct skb_shared_info *shinfo;
        struct sk_buff *skb;
        u8 *data;
    
    //这里通过fclone的值来判断是要从fclone cache还是说从head cache中取
        cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
    
    //首先是分配skb,也就是包头
        skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
        if (!skb)
            goto out;
    //首先将size对齐,这里是按一级缓存的大小来对齐
        size = SKB_DATA_ALIGN(size);
    //然后是数据区的大小,大小为size+ sizeof(struct skb_shared_info的大小
        data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
                gfp_mask, node);
        if (!data)
            goto nodata;
    
    //初始化相关域
        memset(skb, 0, offsetof(struct sk_buff, tail));
    //这里truesize可以看到就是我们分配的整个skb+data的大小
        skb->truesize = size + sizeof(struct sk_buff);
    //users加一
        atomic_set(&skb->users, 1);
    //一开始head和data是一样大的
        skb->head = data;
        skb->data = data;
    //设置tail指针
        skb_reset_tail_pointer(skb);
    //一开始tail也就是和data是相同的
        skb->end = skb->tail + size;
        kmemcheck_annotate_bitfield(skb, flags1);
        kmemcheck_annotate_bitfield(skb, flags2);
    #ifdef NET_SKBUFF_DATA_USES_OFFSET
        skb->mac_header = ~0U;
    #endif
    
    //初始化shinfo,这个我就不介绍了,前面的blog分析切片时,这个结构很详细的分析过了
        shinfo = skb_shinfo(skb);
        atomic_set(&shinfo->dataref, 1);
        shinfo->nr_frags  = 0;
        shinfo->gso_size = 0;
        shinfo->gso_segs = 0;
        shinfo->gso_type = 0;
        shinfo->ip6_frag_id = 0;
        shinfo->tx_flags.flags = 0;
        skb_frag_list_init(skb);
        memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
    
    //fclone为1,说明多分配了一块内存,因此需要设置对应的fclone域
        if (fclone) {
    //可以看到多分配的内存刚好在当前的skb的下方
            struct sk_buff *child = skb + 1;
            atomic_t *fclone_ref = (atomic_t *) (child + 1);
    
            kmemcheck_annotate_bitfield(child, flags1);
            kmemcheck_annotate_bitfield(child, flags2);
    //设置标记。这里要注意,当前的skb和多分配的skb设置的fclone是不同的
            skb->fclone = SKB_FCLONE_ORIG;
            atomic_set(fclone_ref, 1);
    
            child->fclone = SKB_FCLONE_UNAVAILABLE;
        }
    out:
        return skb;
    nodata:
        kmem_cache_free(cache, skb);
        skb = NULL;
        goto out;
    }

    skb_linearize

    int skb_linearize(struct sk_buff *skb, gfp_t gfp) 

    将frag_list链表里面的数据包整合成一个报文

    skb_clone

    
    struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
    {
        struct sk_buff *n;
    
    //n为skb紧跟着那块内存,这里如果skb是通过skb_fclone分配的,那么n就是一个skb
        n = skb + 1;
    //skb和n的fclone都要符合要求,可以看到这里的值就是我们在__alloc_skb中设置的值
        if (skb->fclone == SKB_FCLONE_ORIG &&
            n->fclone == SKB_FCLONE_UNAVAILABLE) {
    //到这里,就说明我们不需要alloc一个skb,直接取n就可以了,并且设置fclone的标记。并修改引用计数
            atomic_t *fclone_ref = (atomic_t *) (n + 1);
            n->fclone = SKB_FCLONE_CLONE;
            atomic_inc(fclone_ref);
        } else {
    
    //这里就需要从cache中取得一块内存
            n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
            if (!n)
                return NULL;
    
            kmemcheck_annotate_bitfield(n, flags1);
            kmemcheck_annotate_bitfield(n, flags2);
    //设置新的skb的fclone域。这里我们新建的skb,没有被fclone的都是这个标记
            n->fclone = SKB_FCLONE_UNAVAILABLE;
        }
    
        return __skb_clone(n, skb);
    }

    只是复制sk_buff结构,并不复制skb的数据缓冲区。Clone后的sk_buff结构与原始的sk_buff指向同一数据缓冲区。原始的和clone后的skb描述符的cloned值都会被置1,clone的skb描述符的users值置1,同时数据缓冲区的引用计数dataref增加1
    skb_clone()操作的skb结构的数据缓冲区是不能被修改的

    pskb_copy

    struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
    {
        /*
         *  Allocate the copy buffer
         */
        struct sk_buff *n;
    #ifdef NET_SKBUFF_DATA_USES_OFFSET
        n = alloc_skb(skb->end, gfp_mask);
    #else
        n = alloc_skb(skb->end - skb->head, gfp_mask);
    #endif
        if (!n)
            goto out;
    
        /* Set the data pointer */
        skb_reserve(n, skb->data - skb->head);
        /* Set the tail pointer and length */
        skb_put(n, skb_headlen(skb));
    //复制线性数据段
        skb_copy_from_linear_data(skb, n->data, n->len);
    //更新相关域
        n->truesize += skb->data_len;
        n->data_len  = skb->data_len;
        n->len       = skb->len;
    
    //下面只是复制切片数据的指针
    if (skb_shinfo(skb)->nr_frags) {
            int i;
    
            for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
                get_page(skb_shinfo(n)->frags[i].page);
            }
            skb_shinfo(n)->nr_frags = i;
        }
    
        copy_skb_header(n, skb);
    out:
        return n;
    }

    pskb_copy()与skb_copy()重量级拷贝,还拷贝skb->data指向的数据

    skb_copy

    struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
    {
        int headerlen = skb->data - skb->head;
        /*
         *  Allocate the copy buffer
         */
    //先alloc一个新的skb
        struct sk_buff *n;
    #ifdef NET_SKBUFF_DATA_USES_OFFSET
        n = alloc_skb(skb->end + skb->data_len, gfp_mask);
    #else
        n = alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
    #endif
        if (!n)
            return NULL;
    
        /* Set the data pointer */
        skb_reserve(n, headerlen);
        /* Set the tail pointer and length */
        skb_put(n, skb->len);
    //然后复制所有的数据
        if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
            BUG();
    
        copy_skb_header(n, skb);
        return n;
    }

    kfree_skb

    void kfree_skb(struct sk_buff *skb)
    {
        if (unlikely(!skb))
            return;
        if (likely(atomic_read(&skb->users) == 1))
            smp_rmb();
    //减一,然后判断
        else if (likely(!atomic_dec_and_test(&skb->users)))
            return;
        trace_kfree_skb(skb, __builtin_return_address(0));
        __kfree_skb(skb);
    }

    其他函数

    #ifdef NET_SKBUFF_DATA_USES_OFFSET
    static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
    {
        return skb->head + skb->transport_header;
    }
    static inline void skb_reset_transport_header(struct sk_buff *skb)
    {
        skb->transport_header = skb->data - skb->head;
    }
    static inline void skb_set_transport_header(struct sk_buff *skb,
             const int offset)
    {
        skb_reset_transport_header(skb);
        skb->transport_header += offset;
    }
    static inline unsigned char *skb_network_header(const struct sk_buff *skb)
    {
        return skb->head + skb->network_header;
    }
    static inline void skb_reset_network_header(struct sk_buff *skb)
    {
        skb->network_header = skb->data - skb->head;
    }
    static inline void skb_set_network_header(struct sk_buff *skb, const int offset)
    {
        skb_reset_network_header(skb);
        skb->network_header += offset;
    }
    static inline unsigned char *skb_mac_header(const struct sk_buff *skb)
    {
        return skb->head + skb->mac_header;
    }
    static inline int skb_mac_header_was_set(const struct sk_buff *skb)
    {
        return skb->mac_header != ~0U;
    }
    static inline void skb_reset_mac_header(struct sk_buff *skb)
    {
        skb->mac_header = skb->data - skb->head;
    }
    static inline void skb_set_mac_header(struct sk_buff *skb, const int offset)
    {
        skb_reset_mac_header(skb);
        skb->mac_header += offset;
    }
    #else /* NET_SKBUFF_DATA_USES_OFFSET */
    不使用相对偏移的情况
    static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
    {
        return skb->transport_header;
    }
    static inline void skb_reset_transport_header(struct sk_buff *skb)
    {
        skb->transport_header = skb->data;
    }
    static inline void skb_set_transport_header(struct sk_buff *skb,
             const int offset)
    {
        skb->transport_header = skb->data + offset;
    }
    static inline unsigned char *skb_network_header(const struct sk_buff *skb)
    {
        return skb->network_header;
    }
    static inline void skb_reset_network_header(struct sk_buff *skb)
    {
        skb->network_header = skb->data;
    }
    static inline void skb_set_network_header(struct sk_buff *skb, const int offset)
    {
        skb->network_header = skb->data + offset;
    }
    static inline unsigned char *skb_mac_header(const struct sk_buff *skb)
    {
        return skb->mac_header;
    }
    static inline int skb_mac_header_was_set(const struct sk_buff *skb)
    {
        return skb->mac_header != NULL;
    }
    static inline void skb_reset_mac_header(struct sk_buff *skb)
    {
        skb->mac_header = skb->data;
    }
    static inline void skb_set_mac_header(struct sk_buff *skb, const int offset)
    {
        skb->mac_header = skb->data + offset;
    }
    #endif /* NET_SKBUFF_DATA_USES_OFFSET */
    //获得sk_buff结构中TCP头的指针
    static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb)
    {
        return (struct tcphdr *)skb_transport_header(skb);
    }
    
    //获得TCP头的长度
    static inline unsigned int tcp_hdrlen(const struct sk_buff *skb)
    {
        return tcp_hdr(skb)->doff * 4;
    }
    
    //获取tcp option的长度
    static inline unsigned int tcp_optlen(const struct sk_buff *skb)
    {
        return (tcp_hdr(skb)->doff - 5) * 4;
    }
    
    //获得ip头
    static inline struct iphdr *ip_hdr(const struct sk_buff *skb)
    {
        return (struct iphdr *)skb_network_header(skb);
    }
    
    //获得ipip头
    static inline struct iphdr *ipip_hdr(const struct sk_buff *skb)
    {
        return (struct iphdr *)skb_transport_header(skb);
    }
    
    //获取802.3MAC头指针
    static inline struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb)
    {
        return (struct ebt_802_3_hdr *)skb_mac_header(skb);
    }
    
    //获取以太网MAC头指针
    static inline struct ethhdr *eth_hdr(const struct sk_buff *skb)
    {
        return (struct ethhdr *)skb_mac_header(skb);
    }

    skb_copy_expand

    struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
                                    int newheadroom,
                                    int newtailroom,
                                    int gfp_mask)
    {
        struct sk_buff *n;
    
        /*
         *        Allocate the copy buffer
         */
    
        n=alloc_skb(newheadroom + skb->len + newtailroom,
                    gfp_mask);
        if(n==NULL)
                return NULL;
    
        skb_reserve(n,newheadroom);
    
        /* Set the tail pointer and length */
        skb_put(n,skb->len);
        /* 2.4的代码 ---- start ---------*/
        /* Copy the data only. */
        if (skb_copy_bits(skb, 0, n->data, skb->len))
                BUG();
        /* 2.4的代码 ---- end ---------*/
        /* 2.6的代码 ---- start ---------*/
        head_copy_len = skb_headroom(skb);
        head_copy_off = 0;
        if (newheadroom <= head_copy_len)
                head_copy_len = newheadroom;
        else
                head_copy_off = newheadroom - head_copy_len;
    
        /* Copy the linear header and data. */
        if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
                          skb->len + head_copy_len))
                BUG();
        /* 2.6的代码 ---- end ---------*/
    
        copy_skb_header(n, skb);
        return n;
    }

    改变skb的数据域

    先判断skb的tailroom,如果空间够大,则我们可以把需要添加的数据放在skb的tailroom里。如果tailroom不够大,则需要调用skb_copy_expand函数来扩充tailroom或者headroom

    if(skb_tailroom(skb) < 16)  
    {  
        nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + 16, GFP_ATOMIC);  
        if(!nskb)  
        {  
            printk("low memory..../n");          
            dev_kfree_skb(skb);  
            return -1;  
        }  
    
        else  
        {  
            kfree_skb(skb);
            skb = nskb;  
        }  
    
        memcpy(skb_put(skb, 16), data, 16); //拷贝数据到skb tail  
    }
  • 相关阅读:
    DS博客作业03--树
    DS博客作业02--栈和队列
    数据结构——线性表
    结构体
    C博客作业05--指针
    C语言博客作业04--数组
    面向对象设计大作业第二阶段:图书馆系统
    Java程序设计-有理数类的设计
    DS博客作业05-查找
    DS博客作业04-图
  • 原文地址:https://www.cnblogs.com/zhangxuechao/p/11709834.html
Copyright © 2020-2023  润新知