• PCPU路由缓存


    路由查找

    与IPv4不同,IPv6的出口路由和入口路由都使用函数ip6_pol_route实现,区别在于传入的接口索引参数不同

    INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_input(struct net *net,
                            struct fib6_table *table,
                            struct flowi6 *fl6,
                            const struct sk_buff *skb,
                            int flags)
    {
        return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
    }
    INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_output(struct net *net,
                             struct fib6_table *table,
                             struct flowi6 *fl6,
                             const struct sk_buff *skb,
                             int flags)
    {
        return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
    }
    /*
    正常添加路由命令是通过ip6_pol_route_lookup->fib6_lookup->fib6_looup_1查表;
    
    其他方式查找路由:ip6_pol_route->fib6_lookup->fib6_looup_1  —通过该函数查表.
    
    */
    static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
                          struct flowi6 *fl6, int flags)
    {
        struct fib6_node *fn, *saved_fn;
        struct rt6_info *rt;
        int strict = 0;
    
        strict |= flags & RT6_LOOKUP_F_IFACE;
        if (net->ipv6.devconf_all->forwarding == 0)
            strict |= RT6_LOOKUP_F_REACHABLE;
    
        read_lock_bh(&table->tb6_lock);
    // find leaf node
        fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
        saved_fn = fn;
    
        if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
            oif = 0;
    
    redo_rt6_select:
        rt = rt6_select(fn, oif, strict);
        if (rt->rt6i_nsiblings)
            rt = rt6_multipath_select(rt, fl6, oif, strict);
        if (rt == net->ipv6.ip6_null_entry) {
            fn = fib6_backtrack(fn, &fl6->saddr);
            if (fn)
                goto redo_rt6_select;
            else if (strict & RT6_LOOKUP_F_REACHABLE) {
                /* also consider unreachable route */
                strict &= ~RT6_LOOKUP_F_REACHABLE;
                fn = saved_fn;
                goto redo_rt6_select;
            }
        }
    
       //查找缓存的路由,如果找到,则返回此值
    
        if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
            dst_use(&rt->dst, jiffies);
            read_unlock_bh(&table->tb6_lock);
    
            rt6_dst_from_metrics_check(rt);
            return rt;
        } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
                    !(rt->rt6i_flags & RTF_GATEWAY))) {
            /* Create a RTF_CACHE clone which will not be
             * owned by the fib6 tree.  It is for the special case where
             * the daddr in the skb during the neighbor look-up is different
             * from the fl6->daddr used to look-up route here.
             否则,判断流结构flowi6是否设置了FLOWI_FLAG_KNOWN_NH,并且没有设置了下一跳网关的地址组,
             这种已知下一跳的前提下查找路由的情况不常见。而且,由于在fl6结构目的地址成员daddr使用的是下一跳地址,
             而不是skb报文中的目的地址,此时创建的路由缓存项不会缓存在fib6树种,
             将其添加到uncached_list链表
    
             */
    
            struct rt6_info *uncached_rt;
    
            dst_use(&rt->dst, jiffies);
            read_unlock_bh(&table->tb6_lock);
    
            uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
            dst_release(&rt->dst);
    
            if (uncached_rt)
                rt6_uncached_list_add(uncached_rt);
            else
                uncached_rt = net->ipv6.ip6_null_entry;
    
            dst_hold(&uncached_rt->dst);
            return uncached_rt;
    
        } else {
            /* Get a percpu copy 
            以上两种情况都没有成立,分配每处理器路由缓存项,其过程中将缓存路由项,不必加到uncached_list链表*/
    
            struct rt6_info *pcpu_rt;
    
            rt->dst.lastuse = jiffies;
            rt->dst.__use++;
            pcpu_rt = rt6_get_pcpu_route(rt);
    
            if (pcpu_rt) {
                read_unlock_bh(&table->tb6_lock);
            } else {
                /* We have to do the read_unlock first
                 * because rt6_make_pcpu_route() may trigger
                 * ip6_dst_gc() which will take the write_lock.
                 */
                dst_hold(&rt->dst);
                read_unlock_bh(&table->tb6_lock);
                pcpu_rt = rt6_make_pcpu_route(rt);
                dst_release(&rt->dst);
            }
    
            return pcpu_rt;
    
        }
    }

      对于使用ICMPv6的IPv6邻居发现、IGMP和MLD协议,利用icmp6_dst_alloc分配路由缓存项。对于这类报文,仅限于本地网络,报文的下一跳地址和目的地址相同,这里不查询fib6表,直接分配缓存项,导致新分配的路由在fib树中没有缓存位置,所以将其添加到uncached_list链表。

    /*
    对于使用ICMPv6的IPv6邻居发现、IGMP和MLD协议,利用icmp6_dst_alloc分配路由缓存项。
    对于这类报文,仅限于本地网络,报文的下一跳地址和目的地址相同,这里不查询fib6表,直接分配缓存项,
    导致新分配的路由在fib树中没有缓存位置,所以将其添加到uncached_list链表 如果放入uncache_list链表, 则需要开启timer 定时回收。
    */
    struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
                      struct flowi6 *fl6)
    {
        struct dst_entry *dst;
        struct rt6_info *rt;
        struct inet6_dev *idev = in6_dev_get(dev);
        struct net *net = dev_net(dev);
    
        if (unlikely(!idev))
            return ERR_PTR(-ENODEV);
    
        rt = ip6_dst_alloc(net, dev, 0);
        if (unlikely(!rt)) {
            in6_dev_put(idev);
            dst = ERR_PTR(-ENOMEM);
            goto out;
        }
    
        rt->dst.flags |= DST_HOST;
        rt->dst.input = ip6_input;
        rt->dst.output  = ip6_output;
        atomic_set(&rt->dst.__refcnt, 1);
        rt->rt6i_gateway  = fl6->daddr;
        rt->rt6i_dst.addr = fl6->daddr;
        rt->rt6i_dst.plen = 128;
        rt->rt6i_idev     = idev;
        dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
    
        spin_lock_bh(&icmp6_dst_lock);
        rt->dst.next = icmp6_dst_gc_list;
        icmp6_dst_gc_list = &rt->dst;
        spin_unlock_bh(&icmp6_dst_lock);
    
        fib6_force_start_gc(net);
    
        dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
    
    out:
        return dst;
    }

    uncached路由缓存清除

    当接口被注销或者down时,由函数rt6_uncached_list_flush_dev清除设备相关的uncached路由缓存

    static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
    {
        struct net_device *loopback_dev = net->loopback_dev;
        int cpu;
    
        if (dev == loopback_dev)
            return;
    //遍历所有的rt6_uncached_list中的路由缓存,将其中与操作设备相等的缓存项的设备换成黑洞设备blackhole_netdev,
    //并且将路由项的inet6_dev换成回环接口对应的inet6_dev。实际上并没有将路由缓存项从uncached_list链表中删除
        for_each_possible_cpu(cpu) {
            struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
            struct rt6_info *rt;
    
            spin_lock_bh(&ul->lock);
            list_for_each_entry(rt, &ul->head, rt6i_uncached) {
                struct inet6_dev *rt_idev = rt->rt6i_idev;
                struct net_device *rt_dev = rt->dst.dev;
    
                if (rt_idev->dev == dev) {
                    rt->rt6i_idev = in6_dev_get(loopback_dev);
                    in6_dev_put(rt_idev);
                }
    
                if (rt_dev == dev) {
                    rt->dst.dev = loopback_dev;//将其中与操作设备相等的缓存项的设备换成黑洞设备blackhole_netdev/loopback_dev,
                    dev_hold(rt->dst.dev);
                    dev_put(rt_dev);
                }
            }
            spin_unlock_bh(&ul->lock);
        }
    }

    PCPU路由缓存查找

      如果路由查询结果中rt6_info成员rt6i_pcpu有值,表明缓存存在,直接返回其值。

    /* It should be called with read_lock_bh(&tb6_lock) acquired */
    static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
    {
        struct rt6_info *pcpu_rt, **p;
    
        p = this_cpu_ptr(rt->rt6i_pcpu); 
        pcpu_rt = *p;
    
        if (pcpu_rt) {
            dst_hold(&pcpu_rt->dst);
            rt6_dst_from_metrics_check(pcpu_rt);
        }
        return pcpu_rt;
    }

       函数ip6_rt_pcpu_alloc负责分配初始化每处理器路由缓存,如果在内核在删除路由信息,即在函数fib6_drop_pcpu_from将路由信息的fib6_destroying设置为1,此种情况下,应当释放每处理器路由缓存依据的路由信息。

    static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
    {
        struct fib6_table *table = rt->rt6i_table;
        struct rt6_info *pcpu_rt, *prev, **p;
     //分配路由缓存,并进行初始化,设置RTF_PCPU标志
        pcpu_rt = ip6_rt_pcpu_alloc(rt);
        if (!pcpu_rt) {
            struct net *net = dev_net(rt->dst.dev);
    
            dst_hold(&net->ipv6.ip6_null_entry->dst);
            return net->ipv6.ip6_null_entry;
        }
    
        read_lock_bh(&table->tb6_lock);
        if (rt->rt6i_pcpu) {
            p = this_cpu_ptr(rt->rt6i_pcpu);
            prev = cmpxchg(p, NULL, pcpu_rt);
            if (prev) {
                /* If someone did it before us, return prev instead */
                dst_destroy(&pcpu_rt->dst);
                pcpu_rt = prev;
            }
        } else {
            /* rt has been removed from the fib6 tree
             * before we have a chance to acquire the read_lock.
             * In this case, don't brother to create a pcpu rt
             * since rt is going away anyway.  The next
             * dst_check() will trigger a re-lookup.
             */// 此处的逻辑要注意  直接释放生成的pcpurt
            dst_destroy(&pcpu_rt->dst);
            pcpu_rt = rt;
        }
        dst_hold(&pcpu_rt->dst);
        rt6_dst_from_metrics_check(pcpu_rt);
        read_unlock_bh(&table->tb6_lock);
        return pcpu_rt;
    }
  • 相关阅读:
    【2017下长沙学院软工3班_助教博客】 第一次作业成绩公示
    《构建之法》读书笔记第3章
    《构建之法》读书笔记第1、2章
    【2017下集美大学软件工程1413软工实践_助教博客】 第0次作业成绩公示
    软件工程——构建之法高分Tips
    第09组 Alpha冲刺(1/6)
    2019 SDN上机第2次作业
    第09组 团队Git现场编程实战
    2019 SDN上机第一次作业
    第09组 团队项目-需求分析报告
  • 原文地址:https://www.cnblogs.com/codestack/p/16071952.html
Copyright © 2020-2023  润新知