• 关于virtio_net网卡命名的小问题


    最近看了一个小问题,涉及到一致性网络设备命名(Consistent Network Device Naming),在此记录一下。
    系统是 4.18.0-240.el8.x86_64,centos 8.3
    系统上的网卡如下:

    1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
        link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
        inet 127.0.0.1/8 scope host lo
           valid_lft forever preferred_lft forever
        inet6 ::1/128 scope host
           valid_lft forever preferred_lft forever
    6: eno1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
        link/ether b4:05:5d:d5:1e:1d brd ff:ff:ff:ff:ff:ff
        inet *.*.*.*/24 brd 10.20.25.255 scope global noprefixroute eno1
           valid_lft forever preferred_lft forever
        inet6 fe80::b605:5dff:fed5:1e1d/64 scope link
           valid_lft forever preferred_lft forever
    7: eno2: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc mq state DOWN group default qlen 1000
        link/ether b4:05:5d:d5:1e:1e brd ff:ff:ff:ff:ff:ff
    8: eno3: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc mq state DOWN group default qlen 1000
        link/ether b4:05:5d:d5:1e:1f brd ff:ff:ff:ff:ff:ff
    9: eno4: <BROADCAST,MULTICAST> mtu 1500 qdisc mq state DOWN group default qlen 1000
        link/ether b4:05:5d:d5:1e:20 brd ff:ff:ff:ff:ff:ff
    18: ens47: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
        link/ether 52:54:00:0e:26:13 brd ff:ff:ff:ff:ff:ff
    19: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
        link/ether 52:54:00:6b:c6:c7 brd ff:ff:ff:ff:ff:ff
        inet 192.188.2.10/24 brd 192.188.2.255 scope global noprefixroute eth1
           valid_lft forever preferred_lft forever
        inet6 fe80::5054:ff:fe6b:c6c7/64 scope link noprefixroute
           valid_lft forever preferred_lft forever
    20: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
        link/ether 52:54:00:09:b3:32 brd ff:ff:ff:ff:ff:ff
        inet 192.188.3.10/24 brd 192.188.3.255 scope global noprefixroute eth0
           valid_lft forever preferred_lft forever
        inet6 fe80::5054:ff:fe09:b332/64 scope link noprefixroute
           valid_lft forever preferred_lft forever
    21: eth2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
        link/ether 52:54:00:95:de:fd brd ff:ff:ff:ff:ff:ff
        inet 192.188.4.10/24 brd 192.188.4.255 scope global noprefixroute eth2
           valid_lft forever preferred_lft forever
        inet6 fe80::5054:ff:fe95:defd/64 scope link noprefixroute
           valid_lft forever preferred_lft forever
    
    

    测试小伙伴查看网卡,发现同样是virtio_net设备,为啥有的是正常的eth0,有的是ens47。

    [root]# ethtool -i ens47
    **driver: virtio_net**
    version: 1.0.0
    firmware-version:
    expansion-rom-version:
    bus-info: 0000:c1:00.0
    supports-statistics: yes
    supports-test: no
    supports-eeprom-access: no
    supports-register-dump: no
    supports-priv-flags: no
    [root]# ethtool -i eth0
    **driver: virtio_net**
    version: 1.0.0
    firmware-version:
    expansion-rom-version:
    bus-info: 0000:c3:00.0
    supports-statistics: yes
    supports-test: no
    supports-eeprom-access: no
    supports-register-dump: no
    supports-priv-flags: no
    
    

    virtio_net 驱动中关于网卡name的调用流程是:

    static int virtnet_probe(struct virtio_device *vdev)
    {
    ....
    	/* Allocate ourselves a network device with room for our info */
    	dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);//caq:里面会设置name的格式eth%d
    ....
            err = register_netdev(dev);//caq:满足一个net_dev的语义
    }
    

    具体来看下 alloc_etherdev_mq 的行为:

    struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
    				      unsigned int rxqs)
    {
    	return alloc_netdev_mqs(sizeof_priv, "eth%d", NET_NAME_UNKNOWN,
    				ether_setup, txqs, rxqs);//caq:命名为eth,同时传参设置 name_assign_type 为 NET_NAME_UNKNOWN
    }
    /* interface name assignment types (sysfs name_assign_type attribute) */
    #define NET_NAME_UNKNOWN	0	/* unknown origin (not exposed to userspace) *///caq:请注意,提示了不暴露给用户态
    #define NET_NAME_ENUM		1	/* enumerated by kernel */
    #define NET_NAME_PREDICTABLE	2	/* predictably named by the kernel */
    #define NET_NAME_USER		3	/* provided by user-space */
    #define NET_NAME_RENAMED	4	/* renamed by user-space */
    

    在alloc_netdev_mqs中:

    struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
    		unsigned char name_assign_type,
    		void (*setup)(struct net_device *),
    		unsigned int txqs, unsigned int rxqs)
    {
    //caq:关于命名的两行关键代码:
    	strcpy(dev->name, name);//caq:拷贝传入的eth%d字符串
    	dev->name_assign_type = name_assign_type;//caq:此时为0
    }
    

    接下来,查看 register_netdev 的流程,
    register_netdev语义中,关于name部分的流程为:
    register_netdev--->dev_get_valid_name

    int dev_get_valid_name(struct net *net, struct net_device *dev,
    		       const char *name)
    {
    	BUG_ON(!net);
    
    	if (!dev_valid_name(name))
    		return -EINVAL;
    
    	if (strchr(name, '%'))//caq:一般走这,比如eth%d
    		return dev_alloc_name_ns(net, dev, name);
    	else if (__dev_get_by_name(net, name))
    		return -EEXIST;
    	else if (dev->name != name)
    		strlcpy(dev->name, name, IFNAMSIZ);
    
    	return 0;
    }
    

    由于我们设置的设备初始name是eth%,所以走 dev_alloc_name_ns 分支,
    dev_alloc_name_ns 会进一步调用 __dev_alloc_name:
    dev_alloc_name_ns--->__dev_alloc_name

    static int __dev_alloc_name(struct net *net, const char *name, char *buf)
    {
    	int i = 0;
    	const char *p;
    	const int max_netdevices = 8*PAGE_SIZE;
    	unsigned long *inuse;
    	struct net_device *d;
    
    	if (!dev_valid_name(name))
    		return -EINVAL;
    
    	p = strchr(name, '%');
    	if (p) {
    		/*
    		 * Verify the string as this thing may have come from
    		 * the user.  There must be either one "%d" and no other "%"
    		 * characters.
    		 */
    		if (p[1] != 'd' || strchr(p + 2, '%'))
    			return -EINVAL;
    
    		/* Use one page as a bit array of possible slots */
    		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
    		if (!inuse)
    			return -ENOMEM;
    
    		for_each_netdev(net, d) {//caq:在同一个net_namespace中查找
    			if (!sscanf(d->name, name, &i))
    				continue;
    			if (i < 0 || i >= max_netdevices)
    				continue;
    
    			/*  avoid cases where sscanf is not exact inverse of printf */
    			snprintf(buf, IFNAMSIZ, name, i);
    			if (!strncmp(buf, d->name, IFNAMSIZ))
    				set_bit(i, inuse);//caq:先遍历一遍,把所有的已被使用的编号设置到inuse中
    		}
    
    		i = find_first_zero_bit(inuse, max_netdevices);//caq:然后从inuse中取未使用的第一个id
    		free_page((unsigned long) inuse);
    	}
    
    	snprintf(buf, IFNAMSIZ, name, i);//caq:如果经历过上面循环,这个时候取的i肯定是没人使用的,如果没有经历循环,则不一定
    	if (!__dev_get_by_name(net, buf))//caq:再次校验
    		return i;//caq:返回NULL,说明找到的i是有效的
    
    	/* It is possible to run out of possible slots
    	 * when the name is long and there isn't enough space left
    	 * for the digits, or if all bits are used.
    	 */
    	return -ENFILE;
    }
    

    以上就是获取ethx之类名字的流程。

    了解了以上流程,下面进一步查看,为什么 我们的设备上出现了 测试同学认为的正常的eth0,eth1之类,也出现了所谓异常的ens47。

    那是不是内核设置完eth之后,用户就直接使用eth呢?这个在之前说法是成立的,但是从以上的逻辑也可以看出,这个eth0和eth1,没有必然的先后顺序,
    之后申请的先后顺序,有时候会出现复位之后eth0和eth1的顺序相反的情况,或者热插拔之后name被别人占用的情况。
    所以,centos7之后,由于使用了systemd,systemd和udev引入了一种新的网络设备命名方式:一致网络设备命名(CONSISTENT NETWORK DEVICE NAMING)。
    我们来查看一下网卡的udev信息:

    [root]# udevadm info /sys/class/net/eth0
    P: /devices/pci0000:ae/0000:ae:00.0/0000:af:00.0/0000:b0:12.0/0000:c3:00.0/virtio6/net/eth0
    E: DEVPATH=/devices/pci0000:ae/0000:ae:00.0/0000:af:00.0/0000:b0:12.0/0000:c3:00.0/virtio6/net/eth0
    E: ID_BUS=pci
    E: ID_MODEL_FROM_DATABASE=Virtio network device
    E: ID_MODEL_ID=0x1000
    E: ID_NET_DRIVER=virtio_net
    E: ID_NET_LINK_FILE=/usr/lib/systemd/network/99-default.link
    **E: ID_NET_NAME=ens47**
    **E: ID_NET_NAME_MAC=enx52540009b332**
    **E: ID_NET_NAME_PATH=enp195s0**
    **E: ID_NET_NAME_SLOT=ens47**
    E: ID_NET_NAMING_SCHEME=rhel-8.0
    E: ID_PATH=pci-0000:c3:00.0
    E: ID_PATH_TAG=pci-0000_c3_00_0
    E: ID_PCI_CLASS_FROM_DATABASE=Network controller
    E: ID_PCI_SUBCLASS_FROM_DATABASE=Ethernet controller
    E: ID_VENDOR_FROM_DATABASE=Red Hat, Inc.
    E: ID_VENDOR_ID=0x1af4
    E: IFINDEX=20
    E: INTERFACE=eth0
    E: SUBSYSTEM=net
    E: SYSTEMD_ALIAS=/sys/subsystem/net/devices/ens47
    E: TAGS=:systemd:
    E: USEC_INITIALIZED=1080595193
    
    [root]# udevadm info /sys/class/net/eth1
    P: /devices/pci0000:ae/0000:ae:00.0/0000:af:00.0/0000:b0:11.0/0000:c2:00.0/virtio5/net/eth1
    E: DEVPATH=/devices/pci0000:ae/0000:ae:00.0/0000:af:00.0/0000:b0:11.0/0000:c2:00.0/virtio5/net/eth1
    E: ID_BUS=pci
    E: ID_MODEL_FROM_DATABASE=Virtio network device
    E: ID_MODEL_ID=0x1000
    E: ID_NET_DRIVER=virtio_net
    E: ID_NET_LINK_FILE=/usr/lib/systemd/network/99-default.link
    **E: ID_NET_NAME=ens47**
    **E: ID_NET_NAME_MAC=enx5254006bc6c7**
    **E: ID_NET_NAME_PATH=enp194s0**
    **E: ID_NET_NAME_SLOT=ens47**
    E: ID_NET_NAMING_SCHEME=rhel-8.0
    E: ID_PATH=pci-0000:c2:00.0
    E: ID_PATH_TAG=pci-0000_c2_00_0
    E: ID_PCI_CLASS_FROM_DATABASE=Network controller
    E: ID_PCI_SUBCLASS_FROM_DATABASE=Ethernet controller
    E: ID_VENDOR_FROM_DATABASE=Red Hat, Inc.
    E: ID_VENDOR_ID=0x1af4
    E: IFINDEX=19
    E: INTERFACE=eth1
    E: SUBSYSTEM=net
    E: SYSTEMD_ALIAS=/sys/subsystem/net/devices/ens47
    E: TAGS=:systemd:
    E: USEC_INITIALIZED=1080405318
    
    [root]# udevadm info /sys/class/net/eth3
    Unknown device, --name=, --path=, or absolute path in /dev/ or /sys expected.
    [root@192-188-2-10 network-scripts]# udevadm info /sys/class/net/eth2
    P: /devices/pci0000:ae/0000:ae:00.0/0000:af:00.0/0000:b0:13.0/0000:c4:00.0/virtio7/net/eth2
    E: DEVPATH=/devices/pci0000:ae/0000:ae:00.0/0000:af:00.0/0000:b0:13.0/0000:c4:00.0/virtio7/net/eth2
    E: ID_BUS=pci
    E: ID_MODEL_FROM_DATABASE=Virtio network device
    E: ID_MODEL_ID=0x1000
    E: ID_NET_DRIVER=virtio_net
    E: ID_NET_LINK_FILE=/usr/lib/systemd/network/99-default.link
    **E: ID_NET_NAME=ens47**
    **E: ID_NET_NAME_MAC=enx52540095defd**
    **E: ID_NET_NAME_PATH=enp196s0**
    **E: ID_NET_NAME_SLOT=ens47**
    E: ID_NET_NAMING_SCHEME=rhel-8.0
    E: ID_PATH=pci-0000:c4:00.0
    E: ID_PATH_TAG=pci-0000_c4_00_0
    E: ID_PCI_CLASS_FROM_DATABASE=Network controller
    E: ID_PCI_SUBCLASS_FROM_DATABASE=Ethernet controller
    E: ID_VENDOR_FROM_DATABASE=Red Hat, Inc.
    E: ID_VENDOR_ID=0x1af4
    E: IFINDEX=21
    E: INTERFACE=eth2
    E: SUBSYSTEM=net
    E: SYSTEMD_ALIAS=/sys/subsystem/net/devices/ens47
    E: TAGS=:systemd:
    E: USEC_INITIALIZED=1080613166
    
    

    然后查看对应的日志:

     10:06:08 localhost.localdomain kernel: **virtio_net virtio4 ens47: renamed from eth0**//caq:将eth0重命名为ens47
     10:06:08 localhost.localdomain NetworkManager[1819]: <info>  [1641827168.5899] device (eth0): interface index 18 renamed iface from 'eth0' to 'ens47'//caq:这个是重命名的发起方
     10:06:08 localhost.localdomain systemd-udevd[3465]: could not rename interface '19' from 'eth1' to 'ens47': File exists//caq:eth1根据规则,也需要重命名为ens47,但由于ens47已经被占用,所以命名失败
     10:06:08 localhost.localdomain systemd-udevd[3468]: could not rename interface '20' from 'eth0' to 'ens47': File exists//caq:eth1根据规则,也需要重命名为ens47,但由于ens47已经被占用,所以命名失败
     10:06:08 localhost.localdomain systemd-udevd[3474]: could not rename interface '21' from 'eth2' to 'ens47': File exists//caq:eth1根据规则,也需要重命名为ens47,但由于ens47已经被占用,所以命名失败
    
    

    而ens47的来源是因为如下的命名规则:
    二、Centos7中的命名策略

    Scheme 1: 如果从BIOS中能够取到可用的,板载网卡的索引号,则使用这个索引号命名,例如: eno1,如不能则尝试Scheme 2
    Scheme 2: 如果从BIOS中能够取到可以用的,网卡所在的PCI-E热插拔插槽的索引号,则使用这个索引号命名,例如: ens1,如不能则尝试Scheme 3
    Scheme 3:如果能拿到设备所连接的物理位置信息,则使用这个信息命名,例如:enp2s0,如不能则尝试Scheme 4
    Scheme 4 使用网卡的MAC地址来命名,这个方法一般不使用。
    Scheme 5:传统的kernel命名方法,例如: eth0,这种命名方法的结果不可预知的,即可能第二块网卡对应eth0,第一块网卡对应eth1。

    参考资料:
    https://www.cnblogs.com/zyd112/p/8143464.html
    https://www.cnblogs.com/yinfutao/p/9634350.html
    https://www.freedesktop.org/wiki/Software/systemd/PredictableNetworkInterfaceNames/

    水平有限,如果有错误,请帮忙提醒我。如果您觉得本文对您有帮助,可以点击下面的 推荐 支持一下我。版权所有,需要转发请带上本文源地址,博客一直在更新,欢迎 关注 。
  • 相关阅读:
    阅读任务
    自我介绍
    学习总结
    第十二周课程总结
    第十一周课程总结
    第十周课程总结
    第九周课程总结&实验报告(七)
    第四周课程总结&实验报告
    第3周Java编程总结
    学习总结
  • 原文地址:https://www.cnblogs.com/10087622blog/p/15785275.html
Copyright © 2020-2023  润新知