最近看了一个小问题,涉及到一致性网络设备命名(Consistent Network Device Naming),在此记录一下。
系统是 4.18.0-240.el8.x86_64,centos 8.3
系统上的网卡如下:
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
6: eno1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
link/ether b4:05:5d:d5:1e:1d brd ff:ff:ff:ff:ff:ff
inet *.*.*.*/24 brd 10.20.25.255 scope global noprefixroute eno1
valid_lft forever preferred_lft forever
inet6 fe80::b605:5dff:fed5:1e1d/64 scope link
valid_lft forever preferred_lft forever
7: eno2: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc mq state DOWN group default qlen 1000
link/ether b4:05:5d:d5:1e:1e brd ff:ff:ff:ff:ff:ff
8: eno3: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc mq state DOWN group default qlen 1000
link/ether b4:05:5d:d5:1e:1f brd ff:ff:ff:ff:ff:ff
9: eno4: <BROADCAST,MULTICAST> mtu 1500 qdisc mq state DOWN group default qlen 1000
link/ether b4:05:5d:d5:1e:20 brd ff:ff:ff:ff:ff:ff
18: ens47: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
link/ether 52:54:00:0e:26:13 brd ff:ff:ff:ff:ff:ff
19: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
link/ether 52:54:00:6b:c6:c7 brd ff:ff:ff:ff:ff:ff
inet 192.188.2.10/24 brd 192.188.2.255 scope global noprefixroute eth1
valid_lft forever preferred_lft forever
inet6 fe80::5054:ff:fe6b:c6c7/64 scope link noprefixroute
valid_lft forever preferred_lft forever
20: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
link/ether 52:54:00:09:b3:32 brd ff:ff:ff:ff:ff:ff
inet 192.188.3.10/24 brd 192.188.3.255 scope global noprefixroute eth0
valid_lft forever preferred_lft forever
inet6 fe80::5054:ff:fe09:b332/64 scope link noprefixroute
valid_lft forever preferred_lft forever
21: eth2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
link/ether 52:54:00:95:de:fd brd ff:ff:ff:ff:ff:ff
inet 192.188.4.10/24 brd 192.188.4.255 scope global noprefixroute eth2
valid_lft forever preferred_lft forever
inet6 fe80::5054:ff:fe95:defd/64 scope link noprefixroute
valid_lft forever preferred_lft forever
测试小伙伴查看网卡,发现同样是virtio_net设备,为啥有的是正常的eth0,有的是ens47。
[root]# ethtool -i ens47
**driver: virtio_net**
version: 1.0.0
firmware-version:
expansion-rom-version:
bus-info: 0000:c1:00.0
supports-statistics: yes
supports-test: no
supports-eeprom-access: no
supports-register-dump: no
supports-priv-flags: no
[root]# ethtool -i eth0
**driver: virtio_net**
version: 1.0.0
firmware-version:
expansion-rom-version:
bus-info: 0000:c3:00.0
supports-statistics: yes
supports-test: no
supports-eeprom-access: no
supports-register-dump: no
supports-priv-flags: no
virtio_net 驱动中关于网卡name的调用流程是:
static int virtnet_probe(struct virtio_device *vdev)
{
....
/* Allocate ourselves a network device with room for our info */
dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);//caq:里面会设置name的格式eth%d
....
err = register_netdev(dev);//caq:满足一个net_dev的语义
}
具体来看下 alloc_etherdev_mq 的行为:
struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
unsigned int rxqs)
{
return alloc_netdev_mqs(sizeof_priv, "eth%d", NET_NAME_UNKNOWN,
ether_setup, txqs, rxqs);//caq:命名为eth,同时传参设置 name_assign_type 为 NET_NAME_UNKNOWN
}
/* interface name assignment types (sysfs name_assign_type attribute) */
#define NET_NAME_UNKNOWN 0 /* unknown origin (not exposed to userspace) *///caq:请注意,提示了不暴露给用户态
#define NET_NAME_ENUM 1 /* enumerated by kernel */
#define NET_NAME_PREDICTABLE 2 /* predictably named by the kernel */
#define NET_NAME_USER 3 /* provided by user-space */
#define NET_NAME_RENAMED 4 /* renamed by user-space */
在alloc_netdev_mqs中:
struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
unsigned char name_assign_type,
void (*setup)(struct net_device *),
unsigned int txqs, unsigned int rxqs)
{
//caq:关于命名的两行关键代码:
strcpy(dev->name, name);//caq:拷贝传入的eth%d字符串
dev->name_assign_type = name_assign_type;//caq:此时为0
}
接下来,查看 register_netdev 的流程,
register_netdev语义中,关于name部分的流程为:
register_netdev--->dev_get_valid_name
int dev_get_valid_name(struct net *net, struct net_device *dev,
const char *name)
{
BUG_ON(!net);
if (!dev_valid_name(name))
return -EINVAL;
if (strchr(name, '%'))//caq:一般走这,比如eth%d
return dev_alloc_name_ns(net, dev, name);
else if (__dev_get_by_name(net, name))
return -EEXIST;
else if (dev->name != name)
strlcpy(dev->name, name, IFNAMSIZ);
return 0;
}
由于我们设置的设备初始name是eth%,所以走 dev_alloc_name_ns 分支,
dev_alloc_name_ns 会进一步调用 __dev_alloc_name:
dev_alloc_name_ns--->__dev_alloc_name
static int __dev_alloc_name(struct net *net, const char *name, char *buf)
{
int i = 0;
const char *p;
const int max_netdevices = 8*PAGE_SIZE;
unsigned long *inuse;
struct net_device *d;
if (!dev_valid_name(name))
return -EINVAL;
p = strchr(name, '%');
if (p) {
/*
* Verify the string as this thing may have come from
* the user. There must be either one "%d" and no other "%"
* characters.
*/
if (p[1] != 'd' || strchr(p + 2, '%'))
return -EINVAL;
/* Use one page as a bit array of possible slots */
inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
if (!inuse)
return -ENOMEM;
for_each_netdev(net, d) {//caq:在同一个net_namespace中查找
if (!sscanf(d->name, name, &i))
continue;
if (i < 0 || i >= max_netdevices)
continue;
/* avoid cases where sscanf is not exact inverse of printf */
snprintf(buf, IFNAMSIZ, name, i);
if (!strncmp(buf, d->name, IFNAMSIZ))
set_bit(i, inuse);//caq:先遍历一遍,把所有的已被使用的编号设置到inuse中
}
i = find_first_zero_bit(inuse, max_netdevices);//caq:然后从inuse中取未使用的第一个id
free_page((unsigned long) inuse);
}
snprintf(buf, IFNAMSIZ, name, i);//caq:如果经历过上面循环,这个时候取的i肯定是没人使用的,如果没有经历循环,则不一定
if (!__dev_get_by_name(net, buf))//caq:再次校验
return i;//caq:返回NULL,说明找到的i是有效的
/* It is possible to run out of possible slots
* when the name is long and there isn't enough space left
* for the digits, or if all bits are used.
*/
return -ENFILE;
}
以上就是获取ethx之类名字的流程。
了解了以上流程,下面进一步查看,为什么 我们的设备上出现了 测试同学认为的正常的eth0,eth1之类,也出现了所谓异常的ens47。
那是不是内核设置完eth之后,用户就直接使用eth呢?这个在之前说法是成立的,但是从以上的逻辑也可以看出,这个eth0和eth1,没有必然的先后顺序,
之后申请的先后顺序,有时候会出现复位之后eth0和eth1的顺序相反的情况,或者热插拔之后name被别人占用的情况。
所以,centos7之后,由于使用了systemd,systemd和udev引入了一种新的网络设备命名方式:一致网络设备命名(CONSISTENT NETWORK DEVICE NAMING)。
我们来查看一下网卡的udev信息:
[root]# udevadm info /sys/class/net/eth0
P: /devices/pci0000:ae/0000:ae:00.0/0000:af:00.0/0000:b0:12.0/0000:c3:00.0/virtio6/net/eth0
E: DEVPATH=/devices/pci0000:ae/0000:ae:00.0/0000:af:00.0/0000:b0:12.0/0000:c3:00.0/virtio6/net/eth0
E: ID_BUS=pci
E: ID_MODEL_FROM_DATABASE=Virtio network device
E: ID_MODEL_ID=0x1000
E: ID_NET_DRIVER=virtio_net
E: ID_NET_LINK_FILE=/usr/lib/systemd/network/99-default.link
**E: ID_NET_NAME=ens47**
**E: ID_NET_NAME_MAC=enx52540009b332**
**E: ID_NET_NAME_PATH=enp195s0**
**E: ID_NET_NAME_SLOT=ens47**
E: ID_NET_NAMING_SCHEME=rhel-8.0
E: ID_PATH=pci-0000:c3:00.0
E: ID_PATH_TAG=pci-0000_c3_00_0
E: ID_PCI_CLASS_FROM_DATABASE=Network controller
E: ID_PCI_SUBCLASS_FROM_DATABASE=Ethernet controller
E: ID_VENDOR_FROM_DATABASE=Red Hat, Inc.
E: ID_VENDOR_ID=0x1af4
E: IFINDEX=20
E: INTERFACE=eth0
E: SUBSYSTEM=net
E: SYSTEMD_ALIAS=/sys/subsystem/net/devices/ens47
E: TAGS=:systemd:
E: USEC_INITIALIZED=1080595193
[root]# udevadm info /sys/class/net/eth1
P: /devices/pci0000:ae/0000:ae:00.0/0000:af:00.0/0000:b0:11.0/0000:c2:00.0/virtio5/net/eth1
E: DEVPATH=/devices/pci0000:ae/0000:ae:00.0/0000:af:00.0/0000:b0:11.0/0000:c2:00.0/virtio5/net/eth1
E: ID_BUS=pci
E: ID_MODEL_FROM_DATABASE=Virtio network device
E: ID_MODEL_ID=0x1000
E: ID_NET_DRIVER=virtio_net
E: ID_NET_LINK_FILE=/usr/lib/systemd/network/99-default.link
**E: ID_NET_NAME=ens47**
**E: ID_NET_NAME_MAC=enx5254006bc6c7**
**E: ID_NET_NAME_PATH=enp194s0**
**E: ID_NET_NAME_SLOT=ens47**
E: ID_NET_NAMING_SCHEME=rhel-8.0
E: ID_PATH=pci-0000:c2:00.0
E: ID_PATH_TAG=pci-0000_c2_00_0
E: ID_PCI_CLASS_FROM_DATABASE=Network controller
E: ID_PCI_SUBCLASS_FROM_DATABASE=Ethernet controller
E: ID_VENDOR_FROM_DATABASE=Red Hat, Inc.
E: ID_VENDOR_ID=0x1af4
E: IFINDEX=19
E: INTERFACE=eth1
E: SUBSYSTEM=net
E: SYSTEMD_ALIAS=/sys/subsystem/net/devices/ens47
E: TAGS=:systemd:
E: USEC_INITIALIZED=1080405318
[root]# udevadm info /sys/class/net/eth3
Unknown device, --name=, --path=, or absolute path in /dev/ or /sys expected.
[root@192-188-2-10 network-scripts]# udevadm info /sys/class/net/eth2
P: /devices/pci0000:ae/0000:ae:00.0/0000:af:00.0/0000:b0:13.0/0000:c4:00.0/virtio7/net/eth2
E: DEVPATH=/devices/pci0000:ae/0000:ae:00.0/0000:af:00.0/0000:b0:13.0/0000:c4:00.0/virtio7/net/eth2
E: ID_BUS=pci
E: ID_MODEL_FROM_DATABASE=Virtio network device
E: ID_MODEL_ID=0x1000
E: ID_NET_DRIVER=virtio_net
E: ID_NET_LINK_FILE=/usr/lib/systemd/network/99-default.link
**E: ID_NET_NAME=ens47**
**E: ID_NET_NAME_MAC=enx52540095defd**
**E: ID_NET_NAME_PATH=enp196s0**
**E: ID_NET_NAME_SLOT=ens47**
E: ID_NET_NAMING_SCHEME=rhel-8.0
E: ID_PATH=pci-0000:c4:00.0
E: ID_PATH_TAG=pci-0000_c4_00_0
E: ID_PCI_CLASS_FROM_DATABASE=Network controller
E: ID_PCI_SUBCLASS_FROM_DATABASE=Ethernet controller
E: ID_VENDOR_FROM_DATABASE=Red Hat, Inc.
E: ID_VENDOR_ID=0x1af4
E: IFINDEX=21
E: INTERFACE=eth2
E: SUBSYSTEM=net
E: SYSTEMD_ALIAS=/sys/subsystem/net/devices/ens47
E: TAGS=:systemd:
E: USEC_INITIALIZED=1080613166
然后查看对应的日志:
10:06:08 localhost.localdomain kernel: **virtio_net virtio4 ens47: renamed from eth0**//caq:将eth0重命名为ens47
10:06:08 localhost.localdomain NetworkManager[1819]: <info> [1641827168.5899] device (eth0): interface index 18 renamed iface from 'eth0' to 'ens47'//caq:这个是重命名的发起方
10:06:08 localhost.localdomain systemd-udevd[3465]: could not rename interface '19' from 'eth1' to 'ens47': File exists//caq:eth1根据规则,也需要重命名为ens47,但由于ens47已经被占用,所以命名失败
10:06:08 localhost.localdomain systemd-udevd[3468]: could not rename interface '20' from 'eth0' to 'ens47': File exists//caq:eth1根据规则,也需要重命名为ens47,但由于ens47已经被占用,所以命名失败
10:06:08 localhost.localdomain systemd-udevd[3474]: could not rename interface '21' from 'eth2' to 'ens47': File exists//caq:eth1根据规则,也需要重命名为ens47,但由于ens47已经被占用,所以命名失败
而ens47的来源是因为如下的命名规则:
二、Centos7中的命名策略
Scheme 1: 如果从BIOS中能够取到可用的,板载网卡的索引号,则使用这个索引号命名,例如: eno1,如不能则尝试Scheme 2
Scheme 2: 如果从BIOS中能够取到可以用的,网卡所在的PCI-E热插拔插槽的索引号,则使用这个索引号命名,例如: ens1,如不能则尝试Scheme 3
Scheme 3:如果能拿到设备所连接的物理位置信息,则使用这个信息命名,例如:enp2s0,如不能则尝试Scheme 4
Scheme 4 使用网卡的MAC地址来命名,这个方法一般不使用。
Scheme 5:传统的kernel命名方法,例如: eth0,这种命名方法的结果不可预知的,即可能第二块网卡对应eth0,第一块网卡对应eth1。
参考资料:
https://www.cnblogs.com/zyd112/p/8143464.html
https://www.cnblogs.com/yinfutao/p/9634350.html
https://www.freedesktop.org/wiki/Software/systemd/PredictableNetworkInterfaceNames/