一,什么是kni,为什么要有kni
Kni(Kernel NIC Interface)内核网卡接口,是DPDK允许用户态和内核态交换报文的解决方案,模拟了一个虚拟的网口,提供dpdk的应用程序和linux内核之间通讯。kni接口允许报文从用户态接收后转发到linu协议栈去。
为什么要弄一个kni接口,虽然dpdk的高速转发性能很出色,但是也有自己的一些缺点,比如没有协议栈就是其中一项缺陷,当然也可能当时设计时就将没有将协议栈考虑进去,毕竟协议栈需要将报文转发处理,可能会使
处理报文的能力大大降低。
直接上图:
上图是kni的mbuf使用流程图,也可以看出报文的流向,因为报文在代码中其实就是一个个内存指针。其中rx_q右边是用户态,左边是内核态。最后通过调用netif_rx()将报文送入linux协议栈,这其中需要将dpdk的mbuf转换成skb_buf。
当linux向kni端口发送报文时,调用回调函数kni_net_tx(),然后报文经过转换之后发送到端口上。
二:主要代码分析:
1,和igb uio模块一样,kni模块分成内核以及用户态代码,内核模块在编译出来之后为rte_kni.ko,首先插入内核,dpdk提供了一个用户态的例子。首先看下kni内核模块代码:
在kni_misc.c中,ko代码入口为
module_init(kni_init);
可以看到函数从kni_init进入:
1 static int __init 2 kni_init(void) 3 { 4 int rc; 5 6 KNI_PRINT("######## DPDK kni module loading ######## "); 7 8 if (kni_parse_kthread_mode() < 0) { //kni的线程模式、单线程还是多线程 9 KNI_ERR("Invalid parameter for kthread_mode "); 10 return -EINVAL; 11 } 12 13 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS 14 rc = register_pernet_subsys(&kni_net_ops); 15 #else 16 rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); 17 #endif 18 if (rc) 19 return -EPERM; 20 21 rc = misc_register(&kni_misc); 22 if (rc != 0) { 23 KNI_ERR("Misc registration failed "); 24 goto out; 25 } 26 27 /* Configure the lo mode according to the input parameter */ 28 kni_net_config_lo_mode(lo_mode); 29 30 KNI_PRINT("######## DPDK kni module loaded ######## "); 31 32 return 0; 33 34 out: 35 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS 36 unregister_pernet_subsys(&kni_net_ops); 37 #else 38 register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); 39 #endif 40 return rc; 41 }
代码比较简单,首先选择kni的线程模式,分为单线程还是多线程,所谓单线程是指所有的kni端口收发都由一个线程守护,多线程只是每一个kni端口分为由一个线程守护,这部分是在插入模块时带入参数选择。
接着调用注册函数misc_register,将kni注册为一个混杂设备。其中kni_misc结构体里面定义了该混杂设备的一些操作
1 static struct miscdevice kni_misc = { 2 .minor = MISC_DYNAMIC_MINOR, 3 .name = KNI_DEVICE, 4 .fops = &kni_fops, 5 };
这里主要看.fops里面的结构体
1 static struct file_operations kni_fops = { 2 .owner = THIS_MODULE, 3 .open = kni_open, 4 .release = kni_release, 5 .unlocked_ioctl = (void *)kni_ioctl, 6 .compat_ioctl = (void *)kni_compat_ioctl, 7 };
这里涉及的主要操作有kni_open,kni_release,以及kni_ioctl,分别对应几个函数
1 static int 2 kni_open(struct inode *inode, struct file *file) 3 { 4 struct net *net = current->nsproxy->net_ns; 5 struct kni_net *knet = net_generic(net, kni_net_id); 6 7 /* kni device can be opened by one user only per netns */ 8 if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use)) 9 return -EBUSY; 10 11 /* Create kernel thread for single mode */ 12 if (multiple_kthread_on == 0) { 13 KNI_PRINT("Single kernel thread for all KNI devices "); 14 /* Create kernel thread for RX */ 15 knet->kni_kthread = kthread_run(kni_thread_single, (void *)knet, 16 "kni_single"); 17 if (IS_ERR(knet->kni_kthread)) { 18 KNI_ERR("Unable to create kernel threaed "); 19 return PTR_ERR(knet->kni_kthread); 20 } 21 } else 22 KNI_PRINT("Multiple kernel thread mode enabled "); 23 24 file->private_data = get_net(net); 25 KNI_PRINT("/dev/kni opened "); 26 27 return 0; 28 }
kni_open时如果是单线程模式则会创建一个内核线程,并打开dev/kni,这个时候在host的dev下能看到kni文件夹
1 static int 2 kni_ioctl(struct inode *inode, 3 unsigned int ioctl_num, 4 unsigned long ioctl_param) 5 { 6 int ret = -EINVAL; 7 struct net *net = current->nsproxy->net_ns; 8 9 KNI_DBG("IOCTL num=0x%0x param=0x%0lx ", ioctl_num, ioctl_param); 10 11 /* 12 * Switch according to the ioctl called 13 */ 14 switch (_IOC_NR(ioctl_num)) { 15 case _IOC_NR(RTE_KNI_IOCTL_TEST): 16 /* For test only, not used */ 17 break; 18 case _IOC_NR(RTE_KNI_IOCTL_CREATE): 19 ret = kni_ioctl_create(net, ioctl_num, ioctl_param); 20 break; 21 case _IOC_NR(RTE_KNI_IOCTL_RELEASE): 22 ret = kni_ioctl_release(net, ioctl_num, ioctl_param); 23 break; 24 default: 25 KNI_DBG("IOCTL default "); 26 break; 27 } 28 29 return ret; 30 }
kni_ioctl函数是与用户态通信的一个接口,主要是的是kni_ioctl_create函数:
1 static int 2 kni_ioctl_create(struct net *net, 3 unsigned int ioctl_num, unsigned long ioctl_param) 4 { 5 struct kni_net *knet = net_generic(net, kni_net_id); 6 int ret; 7 struct rte_kni_device_info dev_info; 8 struct pci_dev *pci = NULL; 9 struct pci_dev *found_pci = NULL; 10 struct net_device *net_dev = NULL; 11 struct net_device *lad_dev = NULL; 12 struct kni_dev *kni, *dev, *n; 13 14 printk(KERN_INFO "KNI: Creating kni... "); 15 /* Check the buffer size, to avoid warning */ 16 if (_IOC_SIZE(ioctl_num) > sizeof(dev_info)) 17 return -EINVAL; 18 19 /* Copy kni info from user space */ 20 ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info)); 21 if (ret) { 22 KNI_ERR("copy_from_user in kni_ioctl_create"); 23 return -EIO; 24 } 25 26 /** 27 * Check if the cpu core id is valid for binding, 28 * for multiple kernel thread mode. 29 */ 30 if (multiple_kthread_on && dev_info.force_bind && 31 !cpu_online(dev_info.core_id)) { 32 KNI_ERR("cpu %u is not online ", dev_info.core_id); 33 return -EINVAL; 34 } 35 36 /* Check if it has been created */ 37 down_read(&knet->kni_list_lock); 38 list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) { 39 if (kni_check_param(dev, &dev_info) < 0) { 40 up_read(&knet->kni_list_lock); 41 return -EINVAL; 42 } 43 } 44 up_read(&knet->kni_list_lock); 45 46 net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name, 47 #ifdef NET_NAME_UNKNOWN 48 NET_NAME_UNKNOWN, 49 #endif 50 kni_net_init); 51 if (net_dev == NULL) { 52 KNI_ERR("error allocating device "%s" ", dev_info.name); 53 return -EBUSY; 54 } 55 56 dev_net_set(net_dev, net); 57 58 kni = netdev_priv(net_dev); 59 60 kni->net_dev = net_dev; 61 kni->group_id = dev_info.group_id; 62 kni->core_id = dev_info.core_id; 63 strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE); 64 65 /* Translate user space info into kernel space info */ 66 kni->tx_q = phys_to_virt(dev_info.tx_phys); 67 kni->rx_q = phys_to_virt(dev_info.rx_phys); 68 kni->alloc_q = phys_to_virt(dev_info.alloc_phys); 69 kni->free_q = phys_to_virt(dev_info.free_phys); 70 71 kni->req_q = phys_to_virt(dev_info.req_phys); 72 kni->resp_q = phys_to_virt(dev_info.resp_phys); 73 kni->sync_va = dev_info.sync_va; 74 kni->sync_kva = phys_to_virt(dev_info.sync_phys); 75 76 kni->mbuf_kva = phys_to_virt(dev_info.mbuf_phys); 77 kni->mbuf_va = dev_info.mbuf_va; 78 79 #ifdef RTE_KNI_VHOST 80 kni->vhost_queue = NULL; 81 kni->vq_status = BE_STOP; 82 #endif 83 kni->mbuf_size = dev_info.mbuf_size; 84 85 KNI_PRINT("tx_phys: 0x%016llx, tx_q addr: 0x%p ", 86 (unsigned long long) dev_info.tx_phys, kni->tx_q); 87 KNI_PRINT("rx_phys: 0x%016llx, rx_q addr: 0x%p ", 88 (unsigned long long) dev_info.rx_phys, kni->rx_q); 89 KNI_PRINT("alloc_phys: 0x%016llx, alloc_q addr: 0x%p ", 90 (unsigned long long) dev_info.alloc_phys, kni->alloc_q); 91 KNI_PRINT("free_phys: 0x%016llx, free_q addr: 0x%p ", 92 (unsigned long long) dev_info.free_phys, kni->free_q); 93 KNI_PRINT("req_phys: 0x%016llx, req_q addr: 0x%p ", 94 (unsigned long long) dev_info.req_phys, kni->req_q); 95 KNI_PRINT("resp_phys: 0x%016llx, resp_q addr: 0x%p ", 96 (unsigned long long) dev_info.resp_phys, kni->resp_q); 97 KNI_PRINT("mbuf_phys: 0x%016llx, mbuf_kva: 0x%p ", 98 (unsigned long long) dev_info.mbuf_phys, kni->mbuf_kva); 99 KNI_PRINT("mbuf_va: 0x%p ", dev_info.mbuf_va); 100 KNI_PRINT("mbuf_size: %u ", kni->mbuf_size); 101 102 KNI_DBG("PCI: %02x:%02x.%02x %04x:%04x ", 103 dev_info.bus, 104 dev_info.devid, 105 dev_info.function, 106 dev_info.vendor_id, 107 dev_info.device_id); 108 109 pci = pci_get_device(dev_info.vendor_id, dev_info.device_id, NULL); 110 111 /* Support Ethtool */ 112 while (pci) { 113 KNI_PRINT("pci_bus: %02x:%02x:%02x ", 114 pci->bus->number, 115 PCI_SLOT(pci->devfn), 116 PCI_FUNC(pci->devfn)); 117 118 if ((pci->bus->number == dev_info.bus) && 119 (PCI_SLOT(pci->devfn) == dev_info.devid) && 120 (PCI_FUNC(pci->devfn) == dev_info.function)) { 121 found_pci = pci; 122 switch (dev_info.device_id) { 123 #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev): 124 #include <rte_pci_dev_ids.h> 125 ret = igb_kni_probe(found_pci, &lad_dev); 126 break; 127 #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) 128 case (dev): 129 #include <rte_pci_dev_ids.h> 130 ret = ixgbe_kni_probe(found_pci, &lad_dev); 131 break; 132 default: 133 ret = -1; 134 break; 135 } 136 137 KNI_DBG("PCI found: pci=0x%p, lad_dev=0x%p ", 138 pci, lad_dev); 139 if (ret == 0) { 140 kni->lad_dev = lad_dev; 141 kni_set_ethtool_ops(kni->net_dev); 142 } else { 143 KNI_ERR("Device not supported by ethtool"); 144 kni->lad_dev = NULL; 145 } 146 147 kni->pci_dev = found_pci; 148 kni->device_id = dev_info.device_id; 149 break; 150 } 151 pci = pci_get_device(dev_info.vendor_id, 152 dev_info.device_id, pci); 153 } 154 if (pci) 155 pci_dev_put(pci); 156 157 if (kni->lad_dev) 158 memcpy(net_dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN); 159 else 160 /* 161 * Generate random mac address. eth_random_addr() is the newer 162 * version of generating mac address in linux kernel. 163 */ 164 //random_ether_addr(net_dev->dev_addr); 165 memcpy(net_dev->dev_addr, &dev_info.kni_mac,ETH_ALEN); 166 167 168 ret = register_netdev(net_dev); 169 if (ret) { 170 KNI_ERR("error %i registering device "%s" ", 171 ret, dev_info.name); 172 kni_dev_remove(kni); 173 return -ENODEV; 174 } 175 176 #ifdef RTE_KNI_VHOST 177 kni_vhost_init(kni); 178 #endif 179 180 /** 181 * Create a new kernel thread for multiple mode, set its core affinity, 182 * and finally wake it up. 183 */ 184 if (multiple_kthread_on) { 185 kni->pthread = kthread_create(kni_thread_multiple, 186 (void *)kni, 187 "kni_%s", kni->name); 188 if (IS_ERR(kni->pthread)) { 189 kni_dev_remove(kni); 190 return -ECANCELED; 191 } 192 if (dev_info.force_bind) 193 kthread_bind(kni->pthread, kni->core_id); 194 wake_up_process(kni->pthread); 195 } 196 197 down_write(&knet->kni_list_lock); 198 list_add(&kni->list, &knet->kni_list_head); 199 up_write(&knet->kni_list_lock); 200 201 return 0; 202 }
ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));这条语句会拷贝从用户态传过来的消息,dev_info主要存放了虚拟kni网口的相关参数,接下来就会根据dev_info中的参数注册一个kni网口ret = register_netdev(net_dev);
这个函数完成创建,这样就虚拟出一个网口出来。其中165行是自己修改的,因为我发现按照文档提供的方法根本不能ping通报文,我将生成kni的mac地址修改成dpdk接管的网口mac即可贯通。原生态代码是随时生成一个mac。
2,用户态代码主要分析dpdk提供的example,
1 int 2 main(int argc, char** argv) 3 { 4 int ret; 5 uint8_t nb_sys_ports, port; 6 unsigned i; 7 8 /* Associate signal_hanlder function with USR signals */ 9 signal(SIGUSR1, signal_handler); 10 signal(SIGUSR2, signal_handler); 11 signal(SIGRTMIN, signal_handler); 12 signal(SIGINT, signal_handler); 13 14 /* Initialise EAL */ 15 ret = rte_eal_init(argc, argv); 16 if (ret < 0) 17 rte_exit(EXIT_FAILURE, "Could not initialise EAL (%d) ", ret); 18 argc -= ret; 19 argv += ret; 20 21 /* Parse application arguments (after the EAL ones) */ 22 ret = parse_args(argc, argv); 23 if (ret < 0) 24 rte_exit(EXIT_FAILURE, "Could not parse input parameters "); 25 26 /* Create the mbuf pool */ 27 pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF, 28 MEMPOOL_CACHE_SZ, 0, MBUF_DATA_SZ, rte_socket_id()); 29 if (pktmbuf_pool == NULL) { 30 rte_exit(EXIT_FAILURE, "Could not initialise mbuf pool "); 31 return -1; 32 } 33 34 /* Get number of ports found in scan */ 35 nb_sys_ports = rte_eth_dev_count(); 36 if (nb_sys_ports == 0) 37 rte_exit(EXIT_FAILURE, "No supported Ethernet device found "); 38 39 /* Check if the configured port ID is valid */ 40 for (i = 0; i < RTE_MAX_ETHPORTS; i++) 41 if (kni_port_params_array[i] && i >= nb_sys_ports) 42 rte_exit(EXIT_FAILURE, "Configured invalid " 43 "port ID %u ", i); 44 45 /* Initialize KNI subsystem */ 46 init_kni(); 47 48 /* Initialise each port */ 49 for (port = 0; port < nb_sys_ports; port++) { 50 /* Skip ports that are not enabled */ 51 if (!(ports_mask & (1 << port))) 52 continue; 53 init_port(port); 54 55 if (port >= RTE_MAX_ETHPORTS) 56 rte_exit(EXIT_FAILURE, "Can not use more than " 57 "%d ports for kni ", RTE_MAX_ETHPORTS); 58 59 kni_alloc(port); 60 } 61 check_all_ports_link_status(nb_sys_ports, ports_mask); 62 63 /* Launch per-lcore function on every lcore */ 64 rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER); 65 RTE_LCORE_FOREACH_SLAVE(i) { 66 if (rte_eal_wait_lcore(i) < 0) 67 return -1; 68 } 69 70 /* Release resources */ 71 for (port = 0; port < nb_sys_ports; port++) { 72 if (!(ports_mask & (1 << port))) 73 continue; 74 kni_free_kni(port); 75 } 76 #ifdef RTE_LIBRTE_XEN_DOM0 77 rte_kni_close(); 78 #endif 79 for (i = 0; i < RTE_MAX_ETHPORTS; i++) 80 if (kni_port_params_array[i]) { 81 rte_free(kni_port_params_array[i]); 82 kni_port_params_array[i] = NULL; 83 } 84 85 return 0; 86 }
main函数进来进行一些eal的初始化,随后创建一个pktmbuf_pool,重点看一下init_kni();以及kni_alloc(port);rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);函数。其中init_kni()函数是初始化kni子系统
1 static void 2 init_kni(void) 3 { 4 unsigned int num_of_kni_ports = 0, i; 5 struct kni_port_params **params = kni_port_params_array; 6 7 /* Calculate the maximum number of KNI interfaces that will be used */ 8 for (i = 0; i < RTE_MAX_ETHPORTS; i++) { 9 if (kni_port_params_array[i]) { 10 num_of_kni_ports += (params[i]->nb_lcore_k ? 11 params[i]->nb_lcore_k : 1); 12 } 13 } 14 15 /* Invoke rte KNI init to preallocate the ports */ 16 rte_kni_init(num_of_kni_ports); 17 }
主要代码在rte_kni_init里面
1 void 2 rte_kni_init(unsigned int max_kni_ifaces) 3 { 4 uint32_t i; 5 struct rte_kni_memzone_slot *it; 6 const struct rte_memzone *mz; 7 #define OBJNAMSIZ 32 8 char obj_name[OBJNAMSIZ]; 9 char mz_name[RTE_MEMZONE_NAMESIZE]; 10 11 /* Immediately return if KNI is already initialized */ 12 if (kni_memzone_pool.initialized) { 13 RTE_LOG(WARNING, KNI, "Double call to rte_kni_init()"); 14 return; 15 } 16 17 if (max_kni_ifaces == 0) { 18 RTE_LOG(ERR, KNI, "Invalid number of max_kni_ifaces %d ", 19 max_kni_ifaces); 20 rte_panic("Unable to initialize KNI "); 21 } 22 23 /* Check FD and open */ 24 if (kni_fd < 0) { 25 kni_fd = open("/dev/" KNI_DEVICE, O_RDWR); 26 if (kni_fd < 0) 27 rte_panic("Can not open /dev/%s ", KNI_DEVICE); 28 } 29 30 /* Allocate slot objects */ 31 kni_memzone_pool.slots = (struct rte_kni_memzone_slot *) 32 rte_malloc(NULL, 33 sizeof(struct rte_kni_memzone_slot) * 34 max_kni_ifaces, 35 0); 36 KNI_MEM_CHECK(kni_memzone_pool.slots == NULL); 37 38 /* Initialize general pool variables */ 39 kni_memzone_pool.initialized = 1; 40 kni_memzone_pool.max_ifaces = max_kni_ifaces; 41 kni_memzone_pool.free = &kni_memzone_pool.slots[0]; 42 rte_spinlock_init(&kni_memzone_pool.mutex); 43 44 /* Pre-allocate all memzones of all the slots; panic on error */ 45 for (i = 0; i < max_kni_ifaces; i++) { 46 47 /* Recover current slot */ 48 it = &kni_memzone_pool.slots[i]; 49 it->id = i; 50 51 /* Allocate KNI context */ 52 snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "KNI_INFO_%d", i); 53 mz = kni_memzone_reserve(mz_name, sizeof(struct rte_kni), 54 SOCKET_ID_ANY, 0); 55 KNI_MEM_CHECK(mz == NULL); 56 it->m_ctx = mz; 57 58 /* TX RING */ 59 snprintf(obj_name, OBJNAMSIZ, "kni_tx_%d", i); 60 mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, 61 SOCKET_ID_ANY, 0); 62 KNI_MEM_CHECK(mz == NULL); 63 it->m_tx_q = mz; 64 65 /* RX RING */ 66 snprintf(obj_name, OBJNAMSIZ, "kni_rx_%d", i); 67 mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, 68 SOCKET_ID_ANY, 0); 69 KNI_MEM_CHECK(mz == NULL); 70 it->m_rx_q = mz; 71 72 /* ALLOC RING */ 73 snprintf(obj_name, OBJNAMSIZ, "kni_alloc_%d", i); 74 mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, 75 SOCKET_ID_ANY, 0); 76 KNI_MEM_CHECK(mz == NULL); 77 it->m_alloc_q = mz; 78 79 /* FREE RING */ 80 snprintf(obj_name, OBJNAMSIZ, "kni_free_%d", i); 81 mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, 82 SOCKET_ID_ANY, 0); 83 KNI_MEM_CHECK(mz == NULL); 84 it->m_free_q = mz; 85 86 /* Request RING */ 87 snprintf(obj_name, OBJNAMSIZ, "kni_req_%d", i); 88 mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, 89 SOCKET_ID_ANY, 0); 90 KNI_MEM_CHECK(mz == NULL); 91 it->m_req_q = mz; 92 93 /* Response RING */ 94 snprintf(obj_name, OBJNAMSIZ, "kni_resp_%d", i); 95 mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, 96 SOCKET_ID_ANY, 0); 97 KNI_MEM_CHECK(mz == NULL); 98 it->m_resp_q = mz; 99 100 /* Req/Resp sync mem area */ 101 snprintf(obj_name, OBJNAMSIZ, "kni_sync_%d", i); 102 mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, 103 SOCKET_ID_ANY, 0); 104 KNI_MEM_CHECK(mz == NULL); 105 it->m_sync_addr = mz; 106 107 if ((i+1) == max_kni_ifaces) { 108 it->next = NULL; 109 kni_memzone_pool.free_tail = it; 110 } else 111 it->next = &kni_memzone_pool.slots[i+1]; 112 } 113 114 return; 115 116 kni_fail: 117 rte_panic("Unable to allocate memory for max_kni_ifaces:%d. Increase the amount of hugepages memory ", 118 max_kni_ifaces); 119 }
对上图中所有的fifo分配内存。
1 static int 2 kni_alloc(uint8_t port_id) 3 { 4 uint8_t i; 5 struct rte_kni *kni; 6 struct rte_kni_conf conf; 7 struct kni_port_params **params = kni_port_params_array; 8 9 if (port_id >= RTE_MAX_ETHPORTS || !params[port_id]) 10 return -1; 11 12 params[port_id]->nb_kni = params[port_id]->nb_lcore_k ? 13 params[port_id]->nb_lcore_k : 1; 14 15 for (i = 0; i < params[port_id]->nb_kni; i++) { 16 /* Clear conf at first */ 17 memset(&conf, 0, sizeof(conf)); 18 if (params[port_id]->nb_lcore_k) { 19 snprintf(conf.name, RTE_KNI_NAMESIZE, 20 "vEth%u_%u", port_id, i); 21 conf.core_id = params[port_id]->lcore_k[i]; 22 conf.force_bind = 1; 23 } else 24 snprintf(conf.name, RTE_KNI_NAMESIZE, 25 "vEth%u", port_id); 26 conf.group_id = (uint16_t)port_id; 27 conf.mbuf_size = MAX_PACKET_SZ; 28 rte_eth_macaddr_get(port_id, (struct ether_addr *)&conf.kni_mac); 29 /* 30 * The first KNI device associated to a port 31 * is the master, for multiple kernel thread 32 * environment. 33 */ 34 if (i == 0) { 35 struct rte_kni_ops ops; 36 struct rte_eth_dev_info dev_info; 37 38 memset(&dev_info, 0, sizeof(dev_info)); 39 rte_eth_dev_info_get(port_id, &dev_info); 40 conf.addr = dev_info.pci_dev->addr; 41 conf.id = dev_info.pci_dev->id; 42 43 memset(&ops, 0, sizeof(ops)); 44 ops.port_id = port_id; 45 ops.change_mtu = kni_change_mtu; 46 ops.config_network_if = kni_config_network_interface; 47 48 kni = rte_kni_alloc(pktmbuf_pool, &conf, &ops); 49 } else 50 kni = rte_kni_alloc(pktmbuf_pool, &conf, NULL); 51 52 if (!kni) 53 rte_exit(EXIT_FAILURE, "Fail to create kni for " 54 "port: %d ", port_id); 55 params[port_id]->kni[i] = kni; 56 } 57 58 return 0; 59 }
1 struct rte_kni * 2 rte_kni_alloc(struct rte_mempool *pktmbuf_pool, 3 const struct rte_kni_conf *conf, 4 struct rte_kni_ops *ops) 5 { 6 int ret; 7 struct rte_kni_device_info dev_info; 8 struct rte_kni *ctx; 9 char intf_name[RTE_KNI_NAMESIZE]; 10 char mz_name[RTE_MEMZONE_NAMESIZE]; 11 const struct rte_memzone *mz; 12 const struct rte_mempool *mp; 13 struct rte_kni_memzone_slot *slot = NULL; 14 15 if (!pktmbuf_pool || !conf || !conf->name[0]) 16 return NULL; 17 18 /* Check if KNI subsystem has been initialized */ 19 if (kni_memzone_pool.initialized != 1) { 20 RTE_LOG(ERR, KNI, "KNI subsystem has not been initialized. Invoke rte_kni_init() first "); 21 return NULL; 22 } 23 24 /* Get an available slot from the pool */ 25 slot = kni_memzone_pool_alloc(); 26 if (!slot) { 27 RTE_LOG(ERR, KNI, "Cannot allocate more KNI interfaces; increase the number of max_kni_ifaces(current %d) or release unusued ones. ", 28 kni_memzone_pool.max_ifaces); 29 return NULL; 30 } 31 32 /* Recover ctx */ 33 ctx = slot->m_ctx->addr; 34 snprintf(intf_name, RTE_KNI_NAMESIZE, "%s", conf->name); 35 36 if (ctx->in_use) { 37 RTE_LOG(ERR, KNI, "KNI %s is in use ", ctx->name); 38 return NULL; 39 } 40 memset(ctx, 0, sizeof(struct rte_kni)); 41 if (ops) 42 memcpy(&ctx->ops, ops, sizeof(struct rte_kni_ops)); 43 44 memset(&dev_info, 0, sizeof(dev_info)); 45 dev_info.bus = conf->addr.bus; 46 dev_info.devid = conf->addr.devid; 47 dev_info.function = conf->addr.function; 48 dev_info.vendor_id = conf->id.vendor_id; 49 dev_info.device_id = conf->id.device_id; 50 dev_info.core_id = conf->core_id; 51 dev_info.force_bind = conf->force_bind; 52 dev_info.group_id = conf->group_id; 53 dev_info.mbuf_size = conf->mbuf_size; 54 55 snprintf(ctx->name, RTE_KNI_NAMESIZE, "%s", intf_name); 56 snprintf(dev_info.name, RTE_KNI_NAMESIZE, "%s", intf_name); 57 58 RTE_LOG(INFO, KNI, "pci: %02x:%02x:%02x %02x:%02x ", 59 dev_info.bus, dev_info.devid, dev_info.function, 60 dev_info.vendor_id, dev_info.device_id); 61 /* TX RING */ 62 mz = slot->m_tx_q; 63 ctx->tx_q = mz->addr; 64 kni_fifo_init(ctx->tx_q, KNI_FIFO_COUNT_MAX); 65 dev_info.tx_phys = mz->phys_addr; 66 67 /* RX RING */ 68 mz = slot->m_rx_q; 69 ctx->rx_q = mz->addr; 70 kni_fifo_init(ctx->rx_q, KNI_FIFO_COUNT_MAX); 71 dev_info.rx_phys = mz->phys_addr; 72 73 /* ALLOC RING */ 74 mz = slot->m_alloc_q; 75 ctx->alloc_q = mz->addr; 76 kni_fifo_init(ctx->alloc_q, KNI_FIFO_COUNT_MAX); 77 dev_info.alloc_phys = mz->phys_addr; 78 79 /* FREE RING */ 80 mz = slot->m_free_q; 81 ctx->free_q = mz->addr; 82 kni_fifo_init(ctx->free_q, KNI_FIFO_COUNT_MAX); 83 dev_info.free_phys = mz->phys_addr; 84 85 /* Request RING */ 86 mz = slot->m_req_q; 87 ctx->req_q = mz->addr; 88 kni_fifo_init(ctx->req_q, KNI_FIFO_COUNT_MAX); 89 dev_info.req_phys = mz->phys_addr; 90 91 /* Response RING */ 92 mz = slot->m_resp_q; 93 ctx->resp_q = mz->addr; 94 kni_fifo_init(ctx->resp_q, KNI_FIFO_COUNT_MAX); 95 dev_info.resp_phys = mz->phys_addr; 96 97 /* Req/Resp sync mem area */ 98 mz = slot->m_sync_addr; 99 ctx->sync_addr = mz->addr; 100 dev_info.sync_va = mz->addr; 101 dev_info.sync_phys = mz->phys_addr; 102 103 104 /* MBUF mempool */ 105 snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT, 106 pktmbuf_pool->name); 107 mz = rte_memzone_lookup(mz_name); 108 KNI_MEM_CHECK(mz == NULL); 109 mp = (struct rte_mempool *)mz->addr; 110 /* KNI currently requires to have only one memory chunk */ 111 if (mp->nb_mem_chunks != 1) 112 goto kni_fail; 113 114 dev_info.mbuf_va = STAILQ_FIRST(&mp->mem_list)->addr; 115 dev_info.mbuf_phys = STAILQ_FIRST(&mp->mem_list)->phys_addr; 116 ctx->pktmbuf_pool = pktmbuf_pool; 117 ctx->group_id = conf->group_id; 118 ctx->slot_id = slot->id; 119 ctx->mbuf_size = conf->mbuf_size; 120 121 dev_info.kni_mac = conf->kni_mac; 122 123 ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info); 124 KNI_MEM_CHECK(ret < 0); 125 126 ctx->in_use = 1; 127 128 /* Allocate mbufs and then put them into alloc_q */ 129 kni_allocate_mbufs(ctx); 130 131 return ctx; 132 133 kni_fail: 134 if (slot) 135 kni_memzone_pool_release(&kni_memzone_pool.slots[slot->id]); 136 137 return NULL; 138 }
其中ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);就是讲dev_info传入内核。
1 static int 2 main_loop(__rte_unused void *arg) 3 { 4 uint8_t i, nb_ports = rte_eth_dev_count(); 5 int32_t f_stop; 6 const unsigned lcore_id = rte_lcore_id(); 7 enum lcore_rxtx { 8 LCORE_NONE, 9 LCORE_RX, 10 LCORE_TX, 11 LCORE_MAX 12 }; 13 enum lcore_rxtx flag = LCORE_NONE; 14 15 for (i = 0; i < nb_ports; i++) { 16 if (!kni_port_params_array[i]) 17 continue; 18 if (kni_port_params_array[i]->lcore_rx == (uint8_t)lcore_id) { 19 flag = LCORE_RX; 20 break; 21 } else if (kni_port_params_array[i]->lcore_tx == 22 (uint8_t)lcore_id) { 23 flag = LCORE_TX; 24 break; 25 } 26 } 27 28 if (flag == LCORE_RX) { 29 RTE_LOG(INFO, APP, "Lcore %u is reading from port %d ", 30 kni_port_params_array[i]->lcore_rx, 31 kni_port_params_array[i]->port_id); 32 while (1) { 33 f_stop = rte_atomic32_read(&kni_stop); 34 if (f_stop) 35 break; 36 kni_ingress(kni_port_params_array[i]); 37 } 38 } else if (flag == LCORE_TX) { 39 RTE_LOG(INFO, APP, "Lcore %u is writing to port %d ", 40 kni_port_params_array[i]->lcore_tx, 41 kni_port_params_array[i]->port_id); 42 while (1) { 43 f_stop = rte_atomic32_read(&kni_stop); 44 if (f_stop) 45 break; 46 kni_egress(kni_port_params_array[i]); 47 } 48 } else 49 RTE_LOG(INFO, APP, "Lcore %u has nothing to do ", lcore_id); 50 51 return 0; 52 }
进入循环收发包,
1 static void 2 kni_ingress(struct kni_port_params *p) 3 { 4 uint8_t i, port_id; 5 unsigned nb_rx, num; 6 uint32_t nb_kni; 7 struct rte_mbuf *pkts_burst[PKT_BURST_SZ]; 8 9 if (p == NULL) 10 return; 11 12 nb_kni = p->nb_kni; 13 port_id = p->port_id; 14 for (i = 0; i < nb_kni; i++) { 15 /* Burst rx from eth */ 16 nb_rx = rte_eth_rx_burst(port_id, 0, pkts_burst, PKT_BURST_SZ); 17 if (unlikely(nb_rx > PKT_BURST_SZ)) { 18 RTE_LOG(ERR, APP, "Error receiving from eth "); 19 return; 20 } 21 /* Burst tx to kni */ 22 num = rte_kni_tx_burst(p->kni[i], pkts_burst, nb_rx); 23 kni_stats[port_id].rx_packets += num; 24 //if(kni_stats[port_id].rx_packets != 0 && kni_stats[port_id].rx_packets%20 == 0 && num > 0) 25 // printf("recv packet num : %"PRIu64" ",kni_stats[port_id].rx_packets); 26 rte_kni_handle_request(p->kni[i]); 27 if (unlikely(num < nb_rx)) { 28 /* Free mbufs not tx to kni interface */ 29 kni_burst_free_mbufs(&pkts_burst[num], nb_rx - num); 30 kni_stats[port_id].rx_dropped += nb_rx - num; 31 } 32 } 33 }
1 static void 2 kni_egress(struct kni_port_params *p) 3 { 4 uint8_t i, port_id; 5 unsigned nb_tx, num; 6 uint32_t nb_kni; 7 struct rte_mbuf *pkts_burst[PKT_BURST_SZ]; 8 9 if (p == NULL) 10 return; 11 12 nb_kni = p->nb_kni; 13 port_id = p->port_id; 14 for (i = 0; i < nb_kni; i++) { 15 /* Burst rx from kni */ 16 num = rte_kni_rx_burst(p->kni[i], pkts_burst, PKT_BURST_SZ); 17 if (unlikely(num > PKT_BURST_SZ)) { 18 RTE_LOG(ERR, APP, "Error receiving from KNI "); 19 return; 20 } 21 /* Burst tx to eth */ 22 nb_tx = rte_eth_tx_burst(port_id, 0, pkts_burst, (uint16_t)num); 23 kni_stats[port_id].tx_packets += nb_tx; 24 if (unlikely(nb_tx < num)) { 25 /* Free mbufs not tx to NIC */ 26 kni_burst_free_mbufs(&pkts_burst[nb_tx], num - nb_tx); 27 kni_stats[port_id].tx_dropped += num - nb_tx; 28 } 29 } 30 }
代码就守护在这个kni网口进行收发包。篇幅有限,后面再整理。