• 邻居子系统1.5 neigh output


    1.5.1

    当邻居项不处于NUD_CONNECTD状态时,不允许快速路径发送报文,函数neigh_resolve_output 用于慢而安全的输出,通常用初始化neigh_ops结构

    来实例output函数,当邻居从NUD_CONNECT转到非NUD_CONNECT的时候,使用neigh_suspect 将output设置为neigh_resolve_output ()

    /* Neighbour state is suspicious;
       disable fast path.

       Called with write_locked neigh.
     */
    static void neigh_suspect(struct neighbour *neigh)
    {
        NEIGH_PRINTK2("neigh %p is suspected. ", neigh);

        neigh->output = neigh->ops->output;
    }

    /* Neighbour state is OK;
       enable fast path.

       Called with write_locked neigh.
     */
    static void neigh_connect(struct neighbour *neigh)
    {
        NEIGH_PRINTK2("neigh %p is connected. ", neigh);

        neigh->output = neigh->ops->connected_output;
    }
    static const struct neigh_ops arp_generic_ops = {
        .family =        AF_INET,
        .solicit =        arp_solicit,
        .error_report =        arp_error_report,
        .output =        neigh_resolve_output,
        .connected_output =    neigh_connected_output,
    };

    neigh_resolve_output:大概含义为:邻居项的输出设备支持hard_header_cache 同时二层首部没有建立

    则为改路由缓存建立硬件首部缓存,然后再输出报文中添加改二层硬件首部.;否则直接在报文首部添加硬件首部

    /* Slow and careful. */
    
    int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
    {
        struct dst_entry *dst = skb_dst(skb);
        int rc = 0;
    
        if (!dst)
            goto discard;
    /* 检测邻居项状态有效性 */
        if (!neigh_event_send(neigh, skb)) {
            int err;
            struct net_device *dev = neigh->dev;
            unsigned int seq;
    ///* 有二层头缓存函数,则缓存之 */
            if (dev->header_ops->cache && !neigh->hh.hh_len)
                neigh_hh_init(neigh, dst);
    
            do {/* 填充二层头 */
                __skb_pull(skb, skb_network_offset(skb));
                seq = read_seqbegin(&neigh->ha_lock);
                err = dev_hard_header(skb, dev, ntohs(skb->protocol),
                              neigh->ha, NULL, skb->len);
            } while (read_seqretry(&neigh->ha_lock, seq));
    
            if (err >= 0)//如果添加首部成功调用xmit 输出到网络设备
                rc = dev_queue_xmit(skb);/* 数据包发送 */
            else
                goto out_kfree_skb;
        }
    out:
        return rc;
    discard:
        NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p
    ",
                  dst, neigh);
    out_kfree_skb:
        rc = -EINVAL;
        kfree_skb(skb);
        goto out;
    }
    static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
    {
        unsigned long now = jiffies;
        
        if (neigh->used != now)
            neigh->used = now;
        if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE)))
            return __neigh_event_send(neigh, skb);
        return 0;
    }
    int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
    {
        int rc;
        bool immediate_probe = false;
    
        write_lock_bh(&neigh->lock);
    
        rc = 0;
        if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
            goto out_unlock_bh;
        /*
            去掉NUD_CONNECT NUD_DELAY NUD_PROBE  状态
            那么就只剩下 NUD_STALE NUD_INCOMPLETE NUD_NONE NUD_FAILD
        */
        if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { //NUD_NONE状态
            if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
                //如果允许发送广播请求或者应用程序发送请求解析neigh地址
                unsigned long next, now = jiffies;
    
                atomic_set(&neigh->probes, neigh->parms->ucast_probes);
                neigh->nud_state     = NUD_INCOMPLETE;
                neigh->updated = now;
                next = now + max(neigh->parms->retrans_time, HZ/2);
                neigh_add_timer(neigh, next); //启动定时器
                immediate_probe = true;  //发送arp 请求(ipv4) 请求邻居表项
            } else {
                neigh->nud_state = NUD_FAILED;//邻居无效 不能输出
                neigh->updated = jiffies;
                write_unlock_bh(&neigh->lock);
    
                kfree_skb(skb);
                return 1;
            }
        } else if (neigh->nud_state & NUD_STALE) {
            NEIGH_PRINTK2("neigh %p is delayed.
    ", neigh);
            neigh->nud_state = NUD_DELAY;//转变为delay 状态
            neigh->updated = jiffies;
            neigh_add_timer(neigh,
                    jiffies + neigh->parms->delay_probe_time);
        }
    
        if (neigh->nud_state == NUD_INCOMPLETE) {//说明之前有报文发送
            if (skb) {
                while (neigh->arp_queue_len_bytes + skb->truesize >
                       neigh->parms->queue_len_bytes) {//如果请求报文已经满了,但还没有收到应答。
                    struct sk_buff *buff;//如果缓存队列还没有达到上限,则将报文加入到输出缓存队列中
                                    //否者 丢弃队列中最早加入的报文然后加入队列
                                    //但是返回值都是1 即 不能立即发送
                    buff = __skb_dequeue(&neigh->arp_queue);
                    if (!buff)
                        break;
                    neigh->arp_queue_len_bytes -= buff->truesize;
                    kfree_skb(buff);
                    NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
                }
                skb_dst_force(skb);
                __skb_queue_tail(&neigh->arp_queue, skb);
                neigh->arp_queue_len_bytes += skb->truesize;
            }
            rc = 1;
        }
    out_unlock_bh:
        if (immediate_probe)
            neigh_probe(neigh); 发出邻居项请求 solict报文 (arp请求等)
        else
            write_unlock(&neigh->lock);
        local_bh_enable();
        return rc;
    }

    neigh_hh_init :缓存二层头,以eth为例:就是缓存二层mac

    /**
     * ether_setup - setup Ethernet network device
     * @dev: network device
     *
     * Fill in the fields of the device structure with Ethernet-generic values.
     */
    void ether_setup(struct net_device *dev)
    {
        dev->header_ops        = &eth_header_ops;
        dev->type        = ARPHRD_ETHER;
        dev->hard_header_len     = ETH_HLEN;
        dev->mtu        = ETH_DATA_LEN;
        dev->addr_len        = ETH_ALEN;
        dev->tx_queue_len    = 1000;    /* Ethernet wants good queues */
        dev->flags        = IFF_BROADCAST|IFF_MULTICAST;
        dev->priv_flags        |= IFF_TX_SKB_SHARING;
    
        memset(dev->broadcast, 0xFF, ETH_ALEN);
    
    }
    const struct header_ops eth_header_ops ____cacheline_aligned = {
        .create        = eth_header,
        .parse        = eth_header_parse,
        .rebuild    = eth_rebuild_header,
        .cache        = eth_header_cache,
        .cache_update    = eth_header_cache_update,
    };
    
    /**
     * eth_header_cache - fill cache entry from neighbour
     * @neigh: source neighbour
     * @hh: destination cache entry
     * @type: Ethernet type field
     *
     * Create an Ethernet header template from the neighbour.
     */
    int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type)
    {
        struct ethhdr *eth;
        const struct net_device *dev = neigh->dev;
    
        eth = (struct ethhdr *)
            (((u8 *) hh->hh_data) + (HH_DATA_OFF(sizeof(*eth))));
    
        if (type == htons(ETH_P_802_3))
            return -1;
    
        eth->h_proto = type;
        memcpy(eth->h_source, dev->dev_addr, ETH_ALEN);
        memcpy(eth->h_dest, neigh->ha, ETH_ALEN);
        hh->hh_len = ETH_HLEN;
        return 0;
    }
    
    /* called with read_lock_bh(&n->lock); */
    static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
    {
        struct net_device *dev = dst->dev;
        __be16 prot = dst->ops->protocol;
        struct hh_cache    *hh = &n->hh;
    
        write_lock_bh(&n->lock);
    
        /* Only one thread can come in here and initialize the
         * hh_cache entry.
         */
        if (!hh->hh_len)
            dev->header_ops->cache(n, hh, prot);
    
        write_unlock_bh(&n->lock);
    }
    //根据代码可以看出  直接拷贝二层头
    View Code

    快速发送:

    //ip_finish_output2 中会调用dst_neigh_output  输出报文
    static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
                       struct sk_buff *skb)
    {
        struct hh_cache *hh;
    
        if (unlikely(dst->pending_confirm)) {
            n->confirmed = jiffies;
            dst->pending_confirm = 0;
        }
    
        hh = &n->hh;
        if ((n->nud_state & NUD_CONNECTED) && hh->hh_len)
            return neigh_hh_output(hh, skb);//快速发出
        else
            return n->output(n, skb);// 慢速发出neigh_resolve_output
    }
    /* 拷贝缓存的二层头部,输出 */
    static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb)
    {
        unsigned int seq;
        unsigned int hh_len;
    
        /* 拷贝二层头到skb */
        do {
            seq = read_seqbegin(&hh->hh_lock);
            hh_len = hh->hh_len;
            /* 二层头部<DATA_MOD,直接使用该长度拷贝 */
            if (likely(hh_len <= HH_DATA_MOD)) {
                /* this is inlined by gcc */
                memcpy(skb->data - HH_DATA_MOD, hh->hh_data, HH_DATA_MOD);
            } 
            /* >=DATA_MOD,对齐头部,拷贝 */
            else {
                unsigned int hh_alen = HH_DATA_ALIGN(hh_len);
    
                memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
            }
        } while (read_seqretry(&hh->hh_lock, seq));
    
        skb_push(skb, hh_len);
    
        /* 发送 */
        return dev_queue_xmit(skb);
    }

    neigh_hh_output-缓存输出,直接拷贝二层头部,然后输出;

    neigh_connected_output-快速输出,用于连接状态的输出;需要重新构建二层头部,然后输出;

    neigh_resolve_output-慢速输出,用于非连接状态的输出;需要对邻居项状态进行检查,然后重新构造二层头部,最后输出;

    neigh_direct_output-直接输出,用于没有二层头部时的输出;

    /* CONNECTED状态的发送函数,没有neigh_hh_output快,这个需要重建二层头 */
    int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
    {
        struct net_device *dev = neigh->dev;
        unsigned int seq;
        int err;
    
        /* 拷贝二层头 */
        do {
            __skb_pull(skb, skb_network_offset(skb));
            seq = read_seqbegin(&neigh->ha_lock);
            err = dev_hard_header(skb, dev, ntohs(skb->protocol),
                          neigh->ha, NULL, skb->len);
        } while (read_seqretry(&neigh->ha_lock, seq));
    
        /* 发送数据包 */
        if (err >= 0)
            err = dev_queue_xmit(skb);
        else {
            err = -EINVAL;
            kfree_skb(skb);
        }
        return err;
    }
  • 相关阅读:
    Codeforces Round #263 (Div. 2)
    蓝桥杯 翻硬币
    蓝桥杯 错误的票据
    蓝桥杯 带分数
    蓝桥杯 核桃的数量 求三个数的最小公倍数
    poj 3928 ping pong 树状数组
    lca
    poj 3927 Priest John's Busiest Day
    种类并查集
    高桥和低桥 ( 代代相传刷qq + 无敌二分 )
  • 原文地址:https://www.cnblogs.com/codestack/p/11844857.html
Copyright © 2020-2023  润新知