• calico


    先设置变量:

    NS=cali
    VETH=v-cali
    

    创建 netns 和 veth, veth 一端塞进去, 射 ip:

    ip netns add $NS
    
    ip l add $VETH type veth peer name $VETH-peer
    
    ip l set $VETH-peer up
    ip l set $VETH netns $NS
    ip netns exec $NS ip l set $VETH up
    
    ip netns exec $NS ip a add 10.2.0.1/32 dev $VETH
    

    然后在宿主机直接路由 ip 到 veth:

    ip r add 10.2.0.1/32 dev $VETH-peer
    

    netns 里设置 default gw 到 veth:

    ip netns exec $NS ip r add default dev $VETH
    [root@bogon ~]# ping 10.2.0.1
    PING 10.2.0.1 (10.2.0.1) 56(84) bytes of data.
    64 bytes from 10.2.0.1: icmp_seq=1 ttl=64 time=0.107 ms
    64 bytes from 10.2.0.1: icmp_seq=2 ttl=64 time=0.024 ms
    ^C
    --- 10.2.0.1 ping statistics ---
    2 packets transmitted, 2 received, 0% packet loss, time 1021ms
    rtt min/avg/max/mdev = 0.024/0.065/0.107/0.042 ms
    [root@bogon ~]# route -n
    Kernel IP routing table
    Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
    0.0.0.0         10.10.16.254    0.0.0.0         UG    0      0        0 enahisic2i0
    10.2.0.1        0.0.0.0         255.255.255.255 UH    0      0        0 v-cali-peer
    [root@bogon ~]# ip netns exec cali  ip a
    1: lo: <LOOPBACK> mtu 65536 qdisc noop state DOWN group default qlen 1000
        link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    98: v-cali@if97: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
        link/ether 16:95:64:35:e3:17 brd ff:ff:ff:ff:ff:ff link-netnsid 0
        inet 10.2.0.1/32 scope global v-cali
           valid_lft forever preferred_lft forever
        inet6 fe80::1495:64ff:fe35:e317/64 scope link 
           valid_lft forever preferred_lft forever
    [root@bogon ~]# ip netns exec cali  tcpdump -i v-cali icmp -nnvv
    tcpdump: listening on v-cali, link-type EN10MB (Ethernet), capture size 262144 bytes
    11:48:00.249026 IP (tos 0x0, ttl 64, id 43401, offset 0, flags [DF], proto ICMP (1), length 84)
        10.10.16.81 > 10.2.0.1: ICMP echo request, id 46022, seq 1, length 64
    11:48:00.249052 IP (tos 0x0, ttl 64, id 18221, offset 0, flags [none], proto ICMP (1), length 84)
        10.2.0.1 > 10.10.16.81: ICMP echo reply, id 46022, seq 1, length 64
    11:48:01.252474 IP (tos 0x0, ttl 64, id 43423, offset 0, flags [DF], proto ICMP (1), length 84)
        10.10.16.81 > 10.2.0.1: ICMP echo request, id 46022, seq 2, length 64
    11:48:01.252490 IP (tos 0x0, ttl 64, id 18254, offset 0, flags [none], proto ICMP (1), length 84)
        10.2.0.1 > 10.10.16.81: ICMP echo reply, id 46022, seq 2, length 64

    这时候可以从 host ping netns, 但是反过来就不可达, 抓包发现是因为 arp 不知道 mac 地址, 加上 arp proxy

    [root@bogon ~]# ip netns exec cali  ping 10.10.16.81
    PING 10.10.16.81 (10.10.16.81) 56(84) bytes of data.
    64 bytes from 10.10.16.81: icmp_seq=1 ttl=64 time=0.067 ms
    64 bytes from 10.10.16.81: icmp_seq=2 ttl=64 time=0.036 ms
    64 bytes from 10.10.16.81: icmp_seq=3 ttl=64 time=0.033 ms
    64 bytes from 10.10.16.81: icmp_seq=4 ttl=64 time=0.024 ms
    64 bytes from 10.10.16.81: icmp_seq=5 ttl=64 time=0.027 ms
    ^C^C
    --- 10.10.16.81 ping statistics ---
    5 packets transmitted, 5 received, 0% packet loss, time 4187ms
    rtt min/avg/max/mdev = 0.024/0.037/0.067/0.016 ms
    [root@bogon ~]# ip netns exec cali  ping 8.8.8.8
    PING 8.8.8.8 (8.8.8.8) 56(84) bytes of data.
    ^C
    --- 8.8.8.8 ping statistics ---
    2 packets transmitted, 0 received, 100% packet loss, time 1047ms
    
    [root@bogon ~]# 

    添加snat,还是无法访问

    [root@bogon ~]# iptables -t nat -A POSTROUTING -s 10.2.0.1/32 -j MASQUERADE
    [root@bogon ~]# ip netns exec cali  ping 8.8.8.8
    PING 8.8.8.8 (8.8.8.8) 56(84) bytes of data.
    ^C
    --- 8.8.8.8 ping statistics ---
    2 packets transmitted, 0 received, 100% packet loss, time 1027ms
    
    [root@bogon ~]# 

    host上抓包

    [root@bogon ~]# tcpdump -i v-cali-peer arp -nv
    tcpdump: listening on v-cali-peer, link-type EN10MB (Ethernet), capture size 262144 bytes
    11:50:57.812451 ARP, Ethernet (len 6), IPv4 (len 4), Request who-has 8.8.8.8 tell 10.2.0.1, length 28
    11:50:58.852454 ARP, Ethernet (len 6), IPv4 (len 4), Request who-has 8.8.8.8 tell 10.2.0.1, length 28
    11:50:59.892511 ARP, Ethernet (len 6), IPv4 (len 4), Request who-has 8.8.8.8 tell 10.2.0.1, length 28
    11:51:00.932453 ARP, Ethernet (len 6), IPv4 (len 4), Request who-has 8.8.8.8 tell 10.2.0.1, length 28
    11:51:01.972453 ARP, Ethernet (len 6), IPv4 (len 4), Request who-has 8.8.8.8 tell 10.2.0.1, length 28
    11:51:03.012519 ARP, Ethernet (len 6), IPv4 (len 4), Request who-has 8.8.8.8 tell 10.2.0.1, length 28

    抓包发现是因为 arp 不知道 mac 地址, 加上 arp proxy

    [root@bogon ~]# echo 1 > /proc/sys/net/ipv4/conf/$VETH-peer/proxy_arp
    [root@bogon ~]# sysctl -p
    net.bridge.bridge-nf-call-iptables = 1
    net.bridge.bridge-nf-call-ip6tables = 1
    net.ipv4.ip_nonlocal_bind = 1
    [root@bogon ~]# 
    [root@bogon ~]# ip netns exec cali  ip n
    8.8.8.8 dev v-cali lladdr 92:07:52:14:06:42 STALE
    10.10.16.81 dev v-cali lladdr 92:07:52:14:06:42 STALE
    [root@bogon ~]# 

    可以访问通了

    这下可以和 host 互 ping 了, google.com 也没问题了, 功能上没问题.

    不过有个优化的问题, arp proxy 会有一些问题, 比如这里会导致 netns 里的 arp cache 无限扩张, 所有的 outbound ip 都会产生一条 arp entry.

    [root@bogon ~]# ip netns exec cali  ip n
    8.8.8.8 dev v-cali lladdr 92:07:52:14:06:42 STALE
    10.10.16.81 dev v-cali lladdr 92:07:52:14:06:42 STALE
    114.114.114.114 dev v-cali lladdr 92:07:52:14:06:42 REACHABLE
    [root@bogon ~]# 

    为了解决这个问题, 我们用一个假的 ip 169.254.1.1 作为 link-local address, 绕一下:

    scope link
    [root@bogon ~]# ip netns exec $NS ip r del default dev $VETH
    [root@bogon ~]# ip netns exec $NS ip r add 169.254.1.1 dev $VETH  scope link
    [root@bogon ~]# ip netns exec $NS ip r add default via 169.254.1.1 dev $VETH
    [root@bogon ~]# ip netns exec $NS ip r 
    default via 169.254.1.1 dev v-cali 
    169.254.1.1 dev v-cali scope link 
    [root@bogon ~]# ip netns exec $NS ip a
    1: lo: <LOOPBACK> mtu 65536 qdisc noop state DOWN group default qlen 1000
        link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    98: v-cali@if97: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
        link/ether 16:95:64:35:e3:17 brd ff:ff:ff:ff:ff:ff link-netnsid 0
        inet 10.2.0.1/32 scope global v-cali
           valid_lft forever preferred_lft forever
        inet6 fe80::1495:64ff:fe35:e317/64 scope link 
           valid_lft forever preferred_lft forever
    [root@bogon ~]# 
     [root@bogon ~]# ip netns exec cali  ip n
    8.8.8.8 dev v-cali lladdr 92:07:52:14:06:42 STALE
    10.10.16.81 dev v-cali lladdr 92:07:52:14:06:42 STALE
    114.114.114.114 dev v-cali lladdr 92:07:52:14:06:42 STALE
    [root@bogon ~]# ip netns exec cali  ip n del 8.8.8.8 dev v-cali
    [root@bogon ~]# ip netns exec cali  ip n del 10.10.16.81 dev v-cali
    [root@bogon ~]# ip netns exec cali  ip n del 114.114.114.114  dev v-cali
    [root@bogon ~]# ip netns exec cali  ip n
    [root@bogon ~]# ip netns exec cali  ping 8.8.8.8
    PING 8.8.8.8 (8.8.8.8) 56(84) bytes of data.
    64 bytes from 8.8.8.8: icmp_seq=1 ttl=103 time=279 ms
    64 bytes from 8.8.8.8: icmp_seq=2 ttl=103 time=11.1 ms
    ^C
    --- 8.8.8.8 ping statistics ---
    2 packets transmitted, 2 received, 0% packet loss, time 1000ms
    rtt min/avg/max/mdev = 11.161/145.151/279.141/133.990 ms
    [root@bogon ~]# ip netns exec cali  ping 114.114.114.114
    PING 114.114.114.114 (114.114.114.114) 56(84) bytes of data.
    64 bytes from 114.114.
    [root@bogon ~]# ip netns exec cali  ip n
    10.10.16.81 dev v-cali lladdr 92:07:52:14:06:42 STALE
    169.254.1.1 dev v-cali lladdr 92:07:52:14:06:42 REACHABLE
    [root@bogon ~]# ip netns exec cali  ip a
    1: lo: <LOOPBACK> mtu 65536 qdisc noop state DOWN group default qlen 1000
        link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    98: v-cali@if97: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
        link/ether 16:95:64:35:e3:17 brd ff:ff:ff:ff:ff:ff link-netnsid 0
        inet 10.2.0.1/32 scope global v-cali
           valid_lft forever preferred_lft forever
        inet6 fe80::1495:64ff:fe35:e317/64 scope link 
           valid_lft forever preferred_lft forever
    [root@bogon ~]# ip a | grep cali
    97: v-cali-peer@if98: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    [root@bogon ~]# ip a sh v-cali-peer
    97: v-cali-peer@if98: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
        link/ether 92:07:52:14:06:42 brd ff:ff:ff:ff:ff:ff link-netnsid 5
        inet6 fe80::9007:52ff:fe14:642/64 scope link 
           valid_lft forever preferred_lft forever
    [root@bogon ~]# 

     arp请求报文

    [root@bogon ~]# tcpdump -i v-cali-peer arp -nv
    tcpdump: listening on v-cali-peer, link-type EN10MB (Ethernet), capture size 262144 bytes
    12:02:08.852461 ARP, Ethernet (len 6), IPv4 (len 4), Request who-has 10.2.0.1 tell 10.10.16.81, length 28
    12:02:08.852487 ARP, Ethernet (len 6), IPv4 (len 4), Request who-has 169.254.1.1 tell 10.2.0.1, length 28
    12:02:08.852499 ARP, Ethernet (len 6), IPv4 (len 4), Reply 169.254.1.1 is-at 92:07:52:14:06:42, length 28
    12:02:08.852695 ARP, Ethernet (len 6), IPv4 (len 4), Reply 10.2.0.1 is-at 16:95:64:35:e3:17, length 28
    [root@bogon ~]# iptables -t nat -A POSTROUTING -s 10.2.0.1/32 -j MASQUERADE
    [root@bogon ~]# ip netns exec cali  ping 8.8.8.8
    PING 8.8.8.8 (8.8.8.8) 56(84) bytes of data.
    64 bytes from 8.8.8.8: icmp_seq=1 ttl=103 time=11.5 ms
    64 bytes from 8.8.8.8: icmp_seq=2 ttl=103 time=11.0 ms
    64 bytes from 8.8.8.8: icmp_seq=3 ttl=103 time=11.0 ms
    ^C
    --- 8.8.8.8 ping statistics ---
    3 packets transmitted, 3 received, 0% packet loss, time 2002ms
    rtt min/avg/max/mdev = 11.082/11.229/11.513/0.234 ms
    [root@bogon ~]# 
    [root@bogon ~]# NS=cali
    [root@bogon ~]# VETH=v-cali
    [root@bogon ~]# ip netns add $NS
    [root@bogon ~]# ip l add $VETH type veth peer name $VETH-peer
    [root@bogon ~]# ip l set $VETH-peer up
    [root@bogon ~]# ip l set $VETH netns $NS
    [root@bogon ~]# ip netns exec $NS ip l set $VETH up
    [root@bogon ~]# ip netns exec $NS ip a add 10.2.0.1/32 dev $VETH
    [root@bogon ~]# ip r add 10.2.0.1/32 dev $VETH-peer
    [root@bogon ~]# ip netns exec $NS ip r add default dev $VETH
    [root@bogon ~]# 

    calico plugin源码解析

    func Main(version string) {
        // ...
        err := flagSet.Parse(os.Args[1:])
        // ...
        // 注册 `ADD` 和 `DEL` 命令
        skel.PluginMain(cmdAdd, nil, cmdDel,
            cniSfunc Main(version string) {
        // ...
        err := flagSet.Parse(os.Args[1:])
        // ...
        // 注册 `ADD` 和 `DEL` 命令
        skel.PluginMain(cmdAdd, nil, cmdDel,
            cniSpecVersion.PluginSupports("0.1.0", "0.2.0", "0.3.0", "0.3.1"),
            "Calico CNI plugin "+version)
    }pecVersion.PluginSupports("0.1.0", "0.2.0", "0.3.0", "0.3.1"),
            "Calico CNI plugin "+version)
    }

    ADD 命令里,主要做了三个逻辑:

    • 查询calico datastore里有没有WorkloadEndpoint对象和当前的pod名字匹配,没有匹配,则会创建新的WorkloadEndpoint对象,该对象内主要保存该pod在host network namespace内的网卡名字和pod ip地址,以及container network namespace的网卡名字等等信息,对象示例如下。
    • 创建一个veth pair,并把其中一个网卡置于宿主机端网络命名空间,另一个置于容器端网络命名空间。在container network namespace内创建网卡如eth0,并通过调用calico-ipam获得的IP地址赋值给该eth0网卡;在host network namespace内创建网卡,网卡名格式为 "cali" + sha1(namespace.pod)[:11] ,并设置MAC地址"ee:ee:ee:ee:ee:ee"。
    • 在容器端和宿主机端创建路由。在容器端,设置默认网关为 169.254.1.1 ,该网关地址代码写死的;在宿主机端,添加路由如 10.217.120.85 dev calid0bda9976d5 scope link ,其中 10.217.120.85 是pod ip地址,calid0bda9976d5 是该pod在宿主机端的网卡,也就是veth pair在宿主机这端的virtual ethernet interface虚拟网络设备。
  • 相关阅读:
    好玩夫妻
    笔记整理MS SQL2005 中查询表的字段信息,
    庆幸也与你逛过那一段旅程
    PureMVC
    简单工厂模式
    工厂方法模式
    UML类图
    PureMVC
    oracle双机热备
    一个不错的免费网络硬盘
  • 原文地址:https://www.cnblogs.com/dream397/p/14862268.html
Copyright © 2020-2023  润新知