• DPDK — 网卡初始化流程(Intel 82599 ixgbe 网卡驱动示例)


    目录

    总览

    在这里插入图片描述

    • rte_eth_dev_count:获取被 DPDK App 接管的 eth 网卡数量。
    • rte_eth_dev_configure:根据 DPDK App 的需要,配置被 App 接管的 eth 网卡,实际上是配置 eth 网卡的驱动程序。
    • rte_eth_rx_queue_setup/rte_eth_tx_queue_setup:为网卡分配 接收/发送 队列。
    • rte_eth_dev_start:启动网卡。
    • rte_eth_rx_burst/rte_eth_tx_burst:为指定网卡的指定队列的 接收/发送 数据包函数。
    • ixgbe_dev_*:Intel 82599(Intel x500 系列)网卡的驱动程序,作为该型号网卡的 PMD 与 igb_uio 驱动一起实现 kernel-bypass 的效果。

    rte_eth_dev/rte_eth_dev_data 数据结构

    DPDK 定义了一个 rte_eth_devices 数组,元素类型为 struct rte_eth_dev,一个元素代表一块网卡。struct rte_eth_dev 有四个重要的成员:

    1. rx_pkt_burst:网卡的 burst 收包函数;
    2. tx_pkt_burst:网卡的 burst 发包函数;
    3. dev_ops:网卡驱动注册函数表,类型为 struct eth_dev_ops;
    4. data:包含了网卡的主要信息,类型为 struct rte_eth_dev_data。
    struct rte_eth_dev {
    	/* 在 rte_bus_probe() 中注册 rx/tx_pkt_burst */
    	eth_rx_burst_t rx_pkt_burst;	/**< Pointer to PMD receive function. */
    	eth_tx_burst_t tx_pkt_burst; 	/**< Pointer to PMD transmit function. */
    	eth_tx_prep_t tx_pkt_prepare; 	/**< Pointer to PMD transmit prepare function. */
    	struct rte_eth_dev_data *data;  /**< Pointer to device data */
    	
    	/* 在 rte_bus_probe() 中注册 dev_ops */
    	const struct eth_dev_ops *dev_ops; 	 /**< Functions exported by PMD */
    	struct rte_device *device;			 /**< Backing device */
    	struct rte_intr_handle *intr_handle; /**< Device interrupt handle */
    ...
    } __rte_cache_aligned;
    
    struct rte_eth_dev_data {
    	char name[RTE_ETH_NAME_MAX_LEN]; /**< Unique identifier name */
    
    	/* 接收队列数组 */
    	void **rx_queues; 		/**< Array of pointers to RX queues. */
    	/* 发送队列数组 */
    	void **tx_queues; 		/**< Array of pointers to TX queues. */
    	/* 接收队列数组长度 */
    	uint16_t nb_rx_queues;  /**< Number of RX queues. */
    	/* 发送队列数组长度 */
    	uint16_t nb_tx_queues;  /**< Number of TX queues. */
    ...
    };
    
    struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];
    
    static struct rte_eth_dev_data *rte_eth_dev_data;
    

    rte_eth_dev_count 函数

    uint8_trte_eth_dev_count(void)
    {
    	uint8_t p;
    	uint8_t count;
    
    	count = 0;
    
    	RTE_ETH_FOREACH_DEV(p)
    		count++;
    
    	return count;
    }
    
    #define RTE_ETH_FOREACH_DEV(p)					
    	for (p = rte_eth_find_next(0);				
    	     (unsigned int)p < (unsigned int)RTE_MAX_ETHPORTS;	
    	     p = rte_eth_find_next(p + 1))
    
    /**
     * 从 port_id 开始遍历 rte_eth_device 数组,找到第一个状态为 RTE_ETH_DEV_ATTACHED 的设备。
     * 这些设备使用指令 dpdk-devbind.py --bind 进行绑定。
     */
    uint8_t
    rte_eth_find_next(uint8_t port_id)
    {
    	while (port_id < RTE_MAX_ETHPORTS &&
    	       rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED)
    		port_id++;
    
    	if (port_id >= RTE_MAX_ETHPORTS)
    		return RTE_MAX_ETHPORTS;
    
    	return port_id;
    }
    

    rte_eth_dev_configure 函数

    rte_eth_dev_configure 函数的主要工作是分配接收/发送队列数组,一个数组元素表示一个接收/发送队列,类型为 void *。

    int
    rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
    		      const struct rte_eth_conf *dev_conf)
    {
    	struct rte_eth_dev *dev;
    	struct rte_eth_dev_info dev_info;
    	int diag;
    
    	/* 检查 port_id 是否合法 */
    	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
    
    	/* 检查接收队列数是否大于 DPDK 的上限 */
    	if (nb_rx_q > RTE_MAX_QUEUES_PER_PORT) {
    		RTE_PMD_DEBUG_TRACE(
    			"Number of RX queues requested (%u) is greater than max supported(%d)
    ",
    			nb_rx_q, RTE_MAX_QUEUES_PER_PORT);
    		return -EINVAL;
    	}
    
    	/* 检查发送队列数是否大于 DPDK 上限 */
    	if (nb_tx_q > RTE_MAX_QUEUES_PER_PORT) {
    		RTE_PMD_DEBUG_TRACE(
    			"Number of TX queues requested (%u) is greater than max supported(%d)
    ",
    			nb_tx_q, RTE_MAX_QUEUES_PER_PORT);
    		return -EINVAL;
    	}
    
    	/* 得到 port_id 对应的设备 */
    	dev = &rte_eth_devices[port_id];
    
    	/* 检查 dev_infos_get 和 dev_configure 是否定义 */
    	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
    	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_configure, -ENOTSUP);
    
    	/* 检查设备是否已启动 */
    	if (dev->data->dev_started) {
    		RTE_PMD_DEBUG_TRACE(
    		    "port %d must be stopped to allow configuration
    ", port_id);
    		return -EBUSY;
    	}
    
    	/**
    	 * 复制 dev_conf 到 dev->data->dev_conf。
    	 *   - dev_conf 由 DPDK App 定义。
    	 *   - dev 结构体将传入到网卡驱动程序。
    	 */
    	memcpy(&dev->data->dev_conf, dev_conf, sizeof(dev->data->dev_conf));
    
    	/*
    	 * Check that the numbers of RX and TX queues are not greater
    	 * than the maximum number of RX and TX queues supported by the
    	 * configured device.
    	 */
    	/**
    	 * dev->dev_ops 注册了网卡驱动的函数。
    	 * 如果是 ixgbe 驱动,则调用 ixgbe_dev_info_get() 函数。
    	 */
    	(*dev->dev_ops->dev_infos_get)(dev, &dev_info);
    
    	/* 检查接收/发送队列数是否同时为 0 */
    	if (nb_rx_q == 0 && nb_tx_q == 0) {
    		RTE_PMD_DEBUG_TRACE("ethdev port_id=%d both rx and tx queue cannot be 0
    ", port_id);
    		return -EINVAL;
    	}
    
    	/* 检查接收队列数是否大于网卡上限 */
    	if (nb_rx_q > dev_info.max_rx_queues) {
    		RTE_PMD_DEBUG_TRACE("ethdev port_id=%d nb_rx_queues=%d > %d
    ",
    				port_id, nb_rx_q, dev_info.max_rx_queues);
    		return -EINVAL;
    	}
    
    	/* 检查发送队列数是否大于网卡上限 */
    	if (nb_tx_q > dev_info.max_tx_queues) {
    		RTE_PMD_DEBUG_TRACE("ethdev port_id=%d nb_tx_queues=%d > %d
    ",
    				port_id, nb_tx_q, dev_info.max_tx_queues);
    		return -EINVAL;
    	}
    
    	/* Check that the device supports requested interrupts */
    	if ((dev_conf->intr_conf.lsc == 1) &&
    		(!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) {
    			RTE_PMD_DEBUG_TRACE("driver %s does not support lsc
    ",
    					dev->device->driver->name);
    			return -EINVAL;
    	}
    	if ((dev_conf->intr_conf.rmv == 1) &&
    	    (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_RMV))) {
    		RTE_PMD_DEBUG_TRACE("driver %s does not support rmv
    ",
    				    dev->device->driver->name);
    		return -EINVAL;
    	}
    
    	/*
    	 * If jumbo frames are enabled, check that the maximum RX packet
    	 * length is supported by the configured device.
    	 */
    	if (dev_conf->rxmode.jumbo_frame == 1) {
    		if (dev_conf->rxmode.max_rx_pkt_len >
    		    dev_info.max_rx_pktlen) {
    			RTE_PMD_DEBUG_TRACE("ethdev port_id=%d max_rx_pkt_len %u"
    				" > max valid value %u
    ",
    				port_id,
    				(unsigned)dev_conf->rxmode.max_rx_pkt_len,
    				(unsigned)dev_info.max_rx_pktlen);
    			return -EINVAL;
    		} else if (dev_conf->rxmode.max_rx_pkt_len < ETHER_MIN_LEN) {
    			RTE_PMD_DEBUG_TRACE("ethdev port_id=%d max_rx_pkt_len %u"
    				" < min valid value %u
    ",
    				port_id,
    				(unsigned)dev_conf->rxmode.max_rx_pkt_len,
    				(unsigned)ETHER_MIN_LEN);
    			return -EINVAL;
    		}
    	} else {
    		if (dev_conf->rxmode.max_rx_pkt_len < ETHER_MIN_LEN ||
    			dev_conf->rxmode.max_rx_pkt_len > ETHER_MAX_LEN) /* 小于64或大于1518 */
    			/* Use default value */
    			dev->data->dev_conf.rxmode.max_rx_pkt_len =
    							ETHER_MAX_LEN; /* 默认值为1518 */
    	}
    
    
    	/* 分配接收队列数组,地址赋给 dev->data->rx_queues,长度赋给 dev->data->nb_rx_queues */
    	diag = rte_eth_dev_rx_queue_config(dev, nb_rx_q);
    	if (diag != 0) {
    		RTE_PMD_DEBUG_TRACE("port%d rte_eth_dev_rx_queue_config = %d
    ",
    				port_id, diag);
    		return diag;
    	}
    
    	/* 分配发送队列数组,地址赋给 dev->data->tx_queues,长度赋给 dev->data->nb_tx_queues */
    	diag = rte_eth_dev_tx_queue_config(dev, nb_tx_q);
    	if (diag != 0) {
    		RTE_PMD_DEBUG_TRACE("port%d rte_eth_dev_tx_queue_config = %d
    ",
    				port_id, diag);
    		rte_eth_dev_rx_queue_config(dev, 0);
    		return diag;
    	}
    
    	/* 注册了 ixgbe 驱动的 ixgbe_dev_configure() 函数 */
    	diag = (*dev->dev_ops->dev_configure)(dev);
    	if (diag != 0) {
    		RTE_PMD_DEBUG_TRACE("port%d dev_configure = %d
    ",
    				port_id, diag);
    		rte_eth_dev_rx_queue_config(dev, 0);
    		rte_eth_dev_tx_queue_config(dev, 0);
    		return diag;
    	}
    
    	return 0;
    }
    

    ixgbe_dev_configure 函数

    static int
    ixgbe_dev_configure(struct rte_eth_dev *dev)
    {
    	...
    	/* multipe queue mode checking */
    	ret  = ixgbe_check_mq_mode(dev);
    	...
    	/*
    	 * Initialize to TRUE. If any of Rx queues doesn't meet the bulk
    	 * allocation or vector Rx preconditions we will reset it.
    	 */
    	adapter->rx_bulk_alloc_allowed = true;
    	adapter->rx_vec_allowed = true;
    	...
    }
    

    rte_eth_dev_rx_queue_config/rte_eth_dev_tx_queue_config 函数

    static int
    rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
    {
    	...
    	dev->data->rx_queues = rte_zmalloc("ethdev->rx_queues",
    			sizeof(dev->data->rx_queues[0]) * nb_queues,
    			RTE_CACHE_LINE_SIZE);
    	...
    	dev->data->nb_rx_queues = nb_queues; /* 更新nb_rx_queues */
    	...
    }
    
    
    static int
    rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
    {
    	...
    	dev->data->tx_queues = rte_zmalloc("ethdev->tx_queues",
    					   sizeof(dev->data->tx_queues[0]) * nb_queues,
    					   RTE_CACHE_LINE_SIZE);
    	...
    	dev->data->nb_tx_queues = nb_queues; /* 更新nb_tx_queues */
    	...
    }
    

    rte_eth_rx_queue_setup 函数

    int
    rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id,
    		       uint16_t nb_rx_desc, unsigned int socket_id,
    		       const struct rte_eth_rxconf *rx_conf,
    		       struct rte_mempool *mp)
    {
    	int ret;
    	uint32_t mbp_buf_size;
    	struct rte_eth_dev *dev;
    	struct rte_eth_dev_info dev_info;
    	void **rxq;
    
    	/* 检查 port_id 是否合法 */
    	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
    
    	/* 得到 port_id 对应的设备 */
    	dev = &rte_eth_devices[port_id];
    	if (rx_queue_id >= dev->data->nb_rx_queues) {
    		RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d
    ", rx_queue_id);
    		return -EINVAL;
    	}
    
    	/* 检查设备是否已启动 */
    	if (dev->data->dev_started) {
    		RTE_PMD_DEBUG_TRACE(
    		    "port %d must be stopped to allow configuration
    ", port_id);
    		return -EBUSY;
    	}
    
    	/* 检查 dev_infos_get 和 rx_queue_setup 是否定义 */
    	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
    	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_setup, -ENOTSUP);
    
    	/*
    	 * Check the size of the mbuf data buffer.
    	 * This value must be provided in the private data of the memory pool.
    	 * First check that the memory pool has a valid private data.
    	 */
    	rte_eth_dev_info_get(port_id, &dev_info);
    	if (mp->private_data_size < sizeof(struct rte_pktmbuf_pool_private)) {
    		RTE_PMD_DEBUG_TRACE("%s private_data_size %d < %d
    ",
    				mp->name, (int) mp->private_data_size,
    				(int) sizeof(struct rte_pktmbuf_pool_private));
    		return -ENOSPC;
    	}
    	mbp_buf_size = rte_pktmbuf_data_room_size(mp);
    
    	if ((mbp_buf_size - RTE_PKTMBUF_HEADROOM) < dev_info.min_rx_bufsize) {
    		RTE_PMD_DEBUG_TRACE("%s mbuf_data_room_size %d < %d "
    				"(RTE_PKTMBUF_HEADROOM=%d + min_rx_bufsize(dev)"
    				"=%d)
    ",
    				mp->name,
    				(int)mbp_buf_size,
    				(int)(RTE_PKTMBUF_HEADROOM +
    				      dev_info.min_rx_bufsize),
    				(int)RTE_PKTMBUF_HEADROOM,
    				(int)dev_info.min_rx_bufsize);
    		return -EINVAL;
    	}
    
    	/* 检查 nb_rx_desc 是否大于网卡上限 */
    	if (nb_rx_desc > dev_info.rx_desc_lim.nb_max ||
    			nb_rx_desc < dev_info.rx_desc_lim.nb_min ||
    			nb_rx_desc % dev_info.rx_desc_lim.nb_align != 0) {
    
    		RTE_PMD_DEBUG_TRACE("Invalid value for nb_rx_desc(=%hu), "
    			"should be: <= %hu, = %hu, and a product of %hu
    ",
    			nb_rx_desc,
    			dev_info.rx_desc_lim.nb_max,
    			dev_info.rx_desc_lim.nb_min,
    			dev_info.rx_desc_lim.nb_align);
    		return -EINVAL;
    	}
    
    	/* 得到接收队列数组 */
    	rxq = dev->data->rx_queues;
    	if (rxq[rx_queue_id]) {
    		/* 检查 rx_queue_release 是否定义 */
    		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release,
    					-ENOTSUP);
    		/* ixgbe 驱动程序注册了 ixgbe_dev_rx_queue_release() 函数 */
    		(*dev->dev_ops->rx_queue_release)(rxq[rx_queue_id]);
    		rxq[rx_queue_id] = NULL;
    	}
    
    	if (rx_conf == NULL)
    		rx_conf = &dev_info.default_rxconf;
    
    	/* ixgbe 驱动程序注册了 ixgbe_dev_rx_queue_setup() 函数 */
    	ret = (*dev->dev_ops->rx_queue_setup)(dev, rx_queue_id, nb_rx_desc,
    					      socket_id, rx_conf, mp);
    	if (!ret) {
    		if (!dev->data->min_rx_buf_size ||
    		    dev->data->min_rx_buf_size > mbp_buf_size)
    			dev->data->min_rx_buf_size = mbp_buf_size;
    	}
    
    	return ret;
    }
    

    ixgbe_dev_rx_queue_setup 函数

    int __attribute__((cold))
    ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
    			 uint16_t queue_idx,
    			 uint16_t nb_desc,
    			 unsigned int socket_id,
    			 const struct rte_eth_rxconf *rx_conf,
    			 struct rte_mempool *mp)
    {
    	...
    	/* 分配 ixgbe_rx_queue */
    	rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
    				 RTE_CACHE_LINE_SIZE, socket_id);
    	...
    	
    	/* 初始化 rxq */
    	rxq->mb_pool = mp;
    	rxq->nb_rx_desc = nb_desc;
    	rxq->rx_free_thresh = rx_conf->rx_free_thresh;
    	rxq->queue_id = queue_idx;
    	rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
    		queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
    	rxq->port_id = dev->data->port_id;
    	rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
    							0 : ETHER_CRC_LEN);
    	rxq->drop_en = rx_conf->rx_drop_en;
    	rxq->rx_deferred_start = rx_conf->rx_deferred_start;
    	...
    	
    	/**
    	 * 分配 desc(skb 描述符)数组,数组元素类型为 union ixgbe_adv_rx_desc1
    	 * (IXGBE_MAX_RING_DESC + RTE_PMD_IXGBE_RX_MAX_BURST) * sizeof(union ixgbe_adv_rx_desc)
    	 * (4096 + 32) * sizeof(union ixgbe_adv_rx_desc)
    	 */
    	rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
    				      RX_RING_SZ, IXGBE_ALIGN, socket_id);
    	...
    
    	/* 清零 desc 数组 */
    	memset(rz->addr, 0, RX_RING_SZ); 
    	...
    	
    	/* 设置 rdt_reg_addr 为 RDT 寄存器的地址 */
    	rxq->rdt_reg_addr =
    		IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
    	
    	/* 设置 rdh_reg_addr 为 RDH 寄存器的地址 */
    	rxq->rdh_reg_addr =
    		IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
    	...
    	
    	/* rx_ring_phys_addr 指向 desc 数组的总线地址 */
    	rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
    
    	/* rx_ring 指向 desc 数组的虚拟地址 */
    	rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
    	...
    	
    	/* 分配 entry 数组,地址赋给 sw_ring */
    	rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
    					  sizeof(struct ixgbe_rx_entry) * len,
    					  RTE_CACHE_LINE_SIZE, socket_id);
    	...
    	
    	/* rx_queues[queue_idx] 指向 ixgbe_rx_queue */
    	dev->data->rx_queues[queue_idx] = rxq;
    	...
    	
    	/* 设置接收队列参数 */
    	ixgbe_reset_rx_queue(adapter, rxq);
    	...
    }
    
    static void __attribute__((cold))
    ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
    {
    	...
    	rxq->rx_nb_avail = 0;
    	rxq->rx_next_avail = 0;
    	rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
    	rxq->rx_tail = 0;
    	rxq->nb_rx_hold = 0;
    	rxq->pkt_first_seg = NULL;
    	rxq->pkt_last_seg = NULL;
    	...
    }
    

    rte_eth_tx_queue_setup 函数

    int
    rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id,
    		       uint16_t nb_tx_desc, unsigned int socket_id,
    		       const struct rte_eth_txconf *tx_conf)
    {
    	struct rte_eth_dev *dev;
    	struct rte_eth_dev_info dev_info;
    	void **txq;
    
    	/* 检查 port_id 是否合法 */
    	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
    
    	/* 得到 port_id 对应的设备 */
    	dev = &rte_eth_devices[port_id];
    	if (tx_queue_id >= dev->data->nb_tx_queues) {
    		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d
    ", tx_queue_id);
    		return -EINVAL;
    	}
    
    	/* 检查设备是否已启动 */
    	if (dev->data->dev_started) {
    		RTE_PMD_DEBUG_TRACE(
    		    "port %d must be stopped to allow configuration
    ", port_id);
    		return -EBUSY;
    	}
    
    	/* 检查 dev_infos_get 和 tx_queue_setup 是否定义 */
    	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
    	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_setup, -ENOTSUP);
    
    	rte_eth_dev_info_get(port_id, &dev_info);
    
    	/* 检查 nb_tx_desc 是否大于网卡上限 */
    	if (nb_tx_desc > dev_info.tx_desc_lim.nb_max ||
    	    nb_tx_desc < dev_info.tx_desc_lim.nb_min ||
    	    nb_tx_desc % dev_info.tx_desc_lim.nb_align != 0) {
    		RTE_PMD_DEBUG_TRACE("Invalid value for nb_tx_desc(=%hu), "
    				"should be: <= %hu, = %hu, and a product of %hu
    ",
    				nb_tx_desc,
    				dev_info.tx_desc_lim.nb_max,
    				dev_info.tx_desc_lim.nb_min,
    				dev_info.tx_desc_lim.nb_align);
    		return -EINVAL;
    	}
    
    	/* 得到发送队列数组 */
    	txq = dev->data->tx_queues;
    	if (txq[tx_queue_id]) {
    		/* 检查 tx_queue_release 是否定义 */
    		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release,
    					-ENOTSUP);
    					
    		/* ixgbe 驱动程序注册了 ixgbe_dev_tx_queue_release() 函数 */
    		(*dev->dev_ops->tx_queue_release)(txq[tx_queue_id]);
    		txq[tx_queue_id] = NULL;
    	}
    
    	if (tx_conf == NULL)
    		tx_conf = &dev_info.default_txconf;
    
    	/* ixgbe 驱动程序注册了 ixgbe_dev_tx_queue_setup() 函数 */
    	return (*dev->dev_ops->tx_queue_setup)(dev, tx_queue_id, nb_tx_desc,
    					       socket_id, tx_conf);
    }
    

    ixgbe_dev_tx_queue_setup 函数

    int __attribute__((cold))
    ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
    			 uint16_t queue_idx,
    			 uint16_t nb_desc,
    			 unsigned int socket_id,
    			 const struct rte_eth_txconf *tx_conf)
    {
    	...
    	/* 分配 ixgbe_tx_queue */
    	txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
    				 RTE_CACHE_LINE_SIZE, socket_id);
    	...
    	
    	/**
    	 * 分配 desc(skb 描述符)数组,数组元素类型为 union ixgbe_adv_tx_desc
    	 * sizeof(union ixgbe_adv_tx_desc) * 4096
    	 */
    	tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
    			sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
    			IXGBE_ALIGN, socket_id);
    	...
    	
    	/* 初始化 txq */
    	txq->nb_tx_desc = nb_desc;
    	txq->tx_rs_thresh = tx_rs_thresh;
    	txq->tx_free_thresh = tx_free_thresh;
    	txq->pthresh = tx_conf->tx_thresh.pthresh;
    	txq->hthresh = tx_conf->tx_thresh.hthresh;
    	txq->wthresh = tx_conf->tx_thresh.wthresh;
    	txq->queue_id = queue_idx;
    	txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
    		queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
    	txq->port_id = dev->data->port_id;
    	txq->txq_flags = tx_conf->txq_flags;
    	txq->ops = &def_txq_ops;
    	txq->tx_deferred_start = tx_conf->tx_deferred_start;
    	...
    	
    	/* 设置 tdt_reg_addr 为 TDT 寄存器的地址 */
    	txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
    	...
    	
    	/* tx_ring_phys_addr 指向 desc 数组的总线地址 */
    	txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
    
    	/* tx_ring 指向 desc 数组的虚拟地址 */
    	txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
    	...
    	
    	/* 分配 entry 数组,地址赋给 sw_ring */
    	txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
    				sizeof(struct ixgbe_tx_entry) * nb_desc,
    				RTE_CACHE_LINE_SIZE, socket_id);
    	...
    	
    	/**
    	 * ixgbe_reset_tx_queue() 设置发送队列参数
    	 */
    	txq->ops->reset(txq);
    	...
    	
    	/* tx_queues[queue_idx] 指向 ixgbe_tx_queue */
    	dev->data->tx_queues[queue_idx] = txq;
    	...
    }
    
    static void __attribute__((cold))
    ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
    {
    	...
    	prev = (uint16_t) (txq->nb_tx_desc - 1);
    	for (i = 0; i < txq->nb_tx_desc; i++) {
    		...
    		txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
    		txe[i].mbuf = NULL;
    		txe[i].last_id = i;
    		txe[prev].next_id = i;
    		prev = i;
    	}
    	...
    	txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
    	txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
    	...
    	txq->tx_tail = 0;
    	txq->nb_tx_used = 0;
    	...
    	txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
    	txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
    	...
    }
    

    rte_eth_dev_start 函数

    int
    rte_eth_dev_start(uint8_t port_id)
    {
    	struct rte_eth_dev *dev;
    	int diag;
    
    	/* 检查 port_id 是否合法 */
    	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
    
    	/* 得到 port_id 对应的设备 */
    	dev = &rte_eth_devices[port_id];
    
    	/* 检查 dev_start 是否定义 */
    	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_start, -ENOTSUP);
    
    	/* 检查设备是否已启动 */
    	if (dev->data->dev_started != 0) {
    		RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu8
    			" already started
    ",
    			port_id);
    		return 0;
    	}
    
    	/* ixgbe 驱动程序注册了 ixgbe_dev_start() 函数 */
    	diag = (*dev->dev_ops->dev_start)(dev);
    	if (diag == 0)
    		dev->data->dev_started = 1;
    	else
    		return diag;
    
    	/* 保存配置 */
    	rte_eth_dev_config_restore(port_id);
    
    	if (dev->data->dev_conf.intr_conf.lsc == 0) {
    		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->link_update, -ENOTSUP);
    		(*dev->dev_ops->link_update)(dev, 0);
    	}
    	return 0;
    }
    

    ixgbe_dev_start 函数

    static int
    ixgbe_dev_start(struct rte_eth_dev *dev)
    {
    	...
    	/* 为每个 ixgbe_tx_queue 配置网卡的 TDBAL、TDBAH、TDLEN、TDH、TDT */
    	ixgbe_dev_tx_init(dev);
    	...
    	
    	/* 为每个 ixgbe_rx_queue 配置网卡的 RDBAL、RDBAH、RDLEN、RDH、RDT */
    	err = ixgbe_dev_rx_init(dev);
    	...
    	err = ixgbe_dev_rxtx_start(dev);
    	...
    }
    
    void __attribute__((cold))
    ixgbe_dev_tx_init(struct rte_eth_dev *dev)
    {
    	...
    	for (i = 0; i < dev->data->nb_tx_queues; i++) {
    		txq = dev->data->tx_queues[i];
    		...
    		
    		/* desc 数组的总线地址 */
    		bus_addr = txq->tx_ring_phys_addr; 
    		/* 将 desc 数组的总线地址写入网卡寄存器
    		 * TDBAL(TX Descriptor Base Address Low)
    		 * TDBAH(TX Descriptor Base Address High)
    		 * TDLEN(TX Descriptor Length)
    		 * TDH(TX Descriptor Head)
    		 * TDT(TX Descriptor Tail)
    		 * #define IXGBE_TDBAL(_i)		(0x06000 + ((_i) * 0x40))
    		 * #define IXGBE_TDBAH(_i)		(0x06004 + ((_i) * 0x40))
    		 * #define IXGBE_TDLEN(_i)		(0x06008 + ((_i) * 0x40))
    		 * #define IXGBE_TDH(_i)		(0x06010 + ((_i) * 0x40))
    		 * #define IXGBE_TDT(_i)		(0x06018 + ((_i) * 0x40)) */
    		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
    				(uint32_t)(bus_addr & 0x00000000ffffffffULL));
    		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
    				(uint32_t)(bus_addr >> 32));
    		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
    				txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));		/* desc 数组的长度 */
    		/* Setup the HW Tx Head and TX Tail descriptor pointers */
    		IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0); 				/* 写 TDH 为 0 */
    		IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0); 				/* 写 TDT 为 0 */
    		...
    	}
    	...
    }
    
    int __attribute__((cold))
    ixgbe_dev_rx_init(struct rte_eth_dev *dev)
    {
    	...
    	for (i = 0; i < dev->data->nb_rx_queues; i++) {
    		rxq = dev->data->rx_queues[i];
    		...
    		
    		/* desc 数组的总线地址 */
    		bus_addr = rxq->rx_ring_phys_addr; 
    		/* 将 desc 数组的总线地址写入网卡寄存器
    		 * RDBAL(RX Descriptor Base Address Low)
    		 * RDBAH(RX Descriptor Base Address High)
    		 * RDLEN(RX Descriptor Length)
    		 * RDH(RX Descriptor Head)
    		 * RDT(RX Descriptor Tail)
    		 * #define IXGBE_RDBAL(_i)	(((_i) < 64) ? (0x01000 + ((_i) * 0x40)) : 
    		 * 				(0x0D000 + (((_i) - 64) * 0x40)))
    		 * #define IXGBE_RDBAH(_i)	(((_i) < 64) ? (0x01004 + ((_i) * 0x40)) : 
    		 * 				(0x0D004 + (((_i) - 64) * 0x40)))
    		 * #define IXGBE_RDLEN(_i)	(((_i) < 64) ? (0x01008 + ((_i) * 0x40)) : 
    		 * 				(0x0D008 + (((_i) - 64) * 0x40)))
    		 * #define IXGBE_RDH(_i)	(((_i) < 64) ? (0x01010 + ((_i) * 0x40)) : 
    		 * 				(0x0D010 + (((_i) - 64) * 0x40)))
    		 * #define IXGBE_RDT(_i)	(((_i) < 64) ? (0x01018 + ((_i) * 0x40)) : 
    		 * 				(0x0D018 + (((_i) - 64) * 0x40))) */
    		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
    				(uint32_t)(bus_addr & 0x00000000ffffffffULL));
    		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
    				(uint32_t)(bus_addr >> 32));
    		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
    				rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc)); 	/* desc 数组的长度 */
    		IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0); 				/* 写 RDH 为 0 */
    		IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0); 				/* 写 RDT 为 0 */
    		...
    	}
    	...
    }
    
    int __attribute__((cold))
    ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
    {
    	...
    	for (i = 0; i < dev->data->nb_tx_queues; i++) {
    		...
    		ret = ixgbe_dev_tx_queue_start(dev, i);
    		...
    	}
    	...
    	for (i = 0; i < dev->data->nb_rx_queues; i++) {
    		...
    		ret = ixgbe_dev_rx_queue_start(dev, i);
    		...
    	}
    	...
    }
    
    int __attribute__((cold))
    ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
    {
    	...
    	/* 使能发送 */
    	txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
    	txdctl |= IXGBE_TXDCTL_ENABLE;
    	IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
    	...
    	/* 写 TDH 为 0 */
    	IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
    	/* 写 TDT 为 0 */
    	IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
    	/* 设置发送队列状态为 RTE_ETH_QUEUE_STATE_STARTED */
    	dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
    	...
    }
    
    int __attribute__((cold))
    ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
    {
    	...
    	/* 为每个接收队列分配 mbuf */
    	if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
    	...
    	/* 使能接收 */
    	rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
    	rxdctl |= IXGBE_RXDCTL_ENABLE;
    	IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
    	...
    	/* 写 RDH 为 0 */
    	IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
    	/* 写 RDT 为 rxq->nb_rx_desc - 1 */
    	IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
    	/* 设置接收队列状态为 RTE_ETH_QUEUE_STATE_STARTED */
    	dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
    	...
    }
    
    static int __attribute__((cold))
    ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
    {
    	struct ixgbe_rx_entry *rxe = rxq->sw_ring;
    	uint64_t dma_addr;
    	unsigned int i;
    
    	/* Initialize software ring entries */
    	for (i = 0; i < rxq->nb_rx_desc; i++) {
    		volatile union ixgbe_adv_rx_desc *rxd;
    		struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool); /* 分配mbuf */
    
    		if (mbuf == NULL) {
    			PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
    				     (unsigned) rxq->queue_id);
    			return -ENOMEM;
    		}
    
    		mbuf->data_off = RTE_PKTMBUF_HEADROOM;
    		mbuf->port = rxq->port_id;
    
    		dma_addr =
    			rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf)); /* mbuf的总线地址 */
    		rxd = &rxq->rx_ring[i];
    		rxd->read.hdr_addr = 0;
    		rxd->read.pkt_addr = dma_addr;  /* 总线地址赋给 rxd->read.pkt_addr */
    		rxe[i].mbuf = mbuf; 			/* 将 mbuf 挂载到 rxe */
    	}
    
    	return 0;
    }
    

    参考文档

    https://blog.csdn.net/hz5034/article/details/88367518

    相关阅读:

  • 相关阅读:
    数据采集,微软控件分页问题的处理
    固定表头和列头
    字符串转时间类型
    js url加密解密
    Chosen—jquery选择框插件
    拉动滚动条加载数据
    webservse导出excel和word
    文本框与autocomplete结合使用
    jQuery的Select操作集合
    (转)常用算法大全-贪婪算法
  • 原文地址:https://www.cnblogs.com/hzcya1995/p/13309163.html
Copyright © 2020-2023  润新知