net_device结构是二层中一个非常重要的结构,其结构中成员很多,包含了硬件信息,接口信息,其他辅助信息,以及设备操作函数等等;下面对其中重要的字段含义进行了标注,目前仍在阅读代码过程中,其中某些字段含义会在后面进行补充或者更新;
1 struct net_device { 2 /* 设备名称,如eth0 */ 3 char name[IFNAMSIZ]; 4 /* 名称hash */ 5 struct hlist_node name_hlist; 6 char *ifalias; 7 /* 8 * I/O specific fields 9 * FIXME: Merge these and struct ifmap into one 10 */ 11 /* 12 描述设备所用的共享内存,用于设备与内核沟通 13 其初始化和访问只会在设备驱动程序内进行 14 */ 15 unsigned long mem_end; 16 unsigned long mem_start; 17 18 /* 设备自有内存映射到I/O内存的起始地址 */ 19 unsigned long base_addr; 20 21 /* 22 设备与内核对话的中断编号,此值可由多个设备共享 23 驱动程序使用request_irq函数分配此变量,使用free_irq予以释放 24 */ 25 int irq; 26 27 /* 侦测网络状态的改变次数 */ 28 atomic_t carrier_changes; 29 30 /* 31 * Some hardware also needs these fields (state,dev_list, 32 * napi_list,unreg_list,close_list) but they are not 33 * part of the usual set specified in Space.c. 34 */ 35 36 /* 37 网络队列子系统使用的一组标识 38 由__LINK_STATE_xxx标识 39 */ 40 unsigned long state; 41 42 struct list_head dev_list; 43 struct list_head napi_list; 44 struct list_head unreg_list; 45 struct list_head close_list; 46 47 /* 当前设备所有协议的链表 */ 48 struct list_head ptype_all; 49 /* 当前设备特定协议的链表 */ 50 struct list_head ptype_specific; 51 52 struct { 53 struct list_head upper; 54 struct list_head lower; 55 } adj_list; 56 57 /* 58 用于存在其他一些设备功能 59 可报告适配卡的功能,以便与CPU通信 60 使用NETIF_F_XXX标识功能特性 61 */ 62 netdev_features_t features; 63 netdev_features_t hw_features; 64 netdev_features_t wanted_features; 65 netdev_features_t vlan_features; 66 netdev_features_t hw_enc_features; 67 netdev_features_t mpls_features; 68 netdev_features_t gso_partial_features; 69 70 /* 网络设备索引号 */ 71 int ifindex; 72 73 /* 设备组,默认都属于0组 */ 74 int group; 75 76 struct net_device_stats stats; 77 78 atomic_long_t rx_dropped; 79 atomic_long_t tx_dropped; 80 atomic_long_t rx_nohandler; 81 82 #ifdef CONFIG_WIRELESS_EXT 83 const struct iw_handler_def *wireless_handlers; 84 struct iw_public_data *wireless_data; 85 #endif 86 /* 设备操作接口 */ 87 const struct net_device_ops *netdev_ops; 88 /* ethtool操作接口 */ 89 const struct ethtool_ops *ethtool_ops; 90 #ifdef CONFIG_NET_SWITCHDEV 91 const struct switchdev_ops *switchdev_ops; 92 #endif 93 #ifdef CONFIG_NET_L3_MASTER_DEV 94 const struct l3mdev_ops *l3mdev_ops; 95 #endif 96 #if IS_ENABLED(CONFIG_IPV6) 97 const struct ndisc_ops *ndisc_ops; 98 #endif 99 100 #ifdef CONFIG_XFRM 101 const struct xfrmdev_ops *xfrmdev_ops; 102 #endif 103 104 /* 头部一些操作,如链路层缓存,校验等 */ 105 const struct header_ops *header_ops; 106 107 /* 标识接口特性,IFF_XXX,如IFF_UP */ 108 unsigned int flags; 109 110 /* 111 用于存储用户空间不可见的标识 112 由VLAN和Bridge虚拟设备使用 113 */ 114 unsigned int priv_flags; 115 116 /* 几乎不使用,为了兼容保留 */ 117 unsigned short gflags; 118 119 /* 结构对齐填充 */ 120 unsigned short padded; 121 122 /* 与interface group mib中的IfOperStatus相关 */ 123 unsigned char operstate; 124 unsigned char link_mode; 125 126 /* 127 接口使用的端口类型 128 */ 129 unsigned char if_port; 130 131 /* 132 设备使用的DMA通道 133 并非所有设备都可以用DMA,有些总线不支持DMA 134 */ 135 unsigned char dma; 136 137 /* 138 最大传输单元,标识设备能处理帧的最大尺寸 139 Ethernet-1500 140 */ 141 unsigned int mtu; 142 /* 最小mtu,Ethernet-68 */ 143 unsigned int min_mtu; 144 /* 最大mut,Ethernet-65535 */ 145 unsigned int max_mtu; 146 147 /* 设备所属类型 148 ARP模块中,用type判断接口的硬件地址类型 149 以太网接口为ARPHRD_ETHER 150 */ 151 unsigned short type; 152 /* 153 设备头部长度 154 Ethernet报头是ETH_HLEN=14字节 155 */ 156 unsigned short hard_header_len; 157 unsigned char min_header_len; 158 159 /* 必须的头部空间 */ 160 unsigned short needed_headroom; 161 unsigned short needed_tailroom; 162 163 /* Interface address info. */ 164 /* 硬件地址,通常在初始化过程中从硬件读取 */ 165 unsigned char perm_addr[MAX_ADDR_LEN]; 166 unsigned char addr_assign_type; 167 /* 硬件地址长度 */ 168 unsigned char addr_len; 169 unsigned short neigh_priv_len; 170 unsigned short dev_id; 171 unsigned short dev_port; 172 spinlock_t addr_list_lock; 173 /* 设备名赋值类型,如NET_NAME_UNKNOWN */ 174 unsigned char name_assign_type; 175 bool uc_promisc; 176 struct netdev_hw_addr_list uc; 177 struct netdev_hw_addr_list mc; 178 struct netdev_hw_addr_list dev_addrs; 179 180 #ifdef CONFIG_SYSFS 181 struct kset *queues_kset; 182 #endif 183 /* 混杂模式开启数量 */ 184 unsigned int promiscuity; 185 186 /* 非零值时,设备监听所有多播地址 */ 187 unsigned int allmulti; 188 189 190 /* Protocol-specific pointers */ 191 /* 特定协议的指针 */ 192 #if IS_ENABLED(CONFIG_VLAN_8021Q) 193 struct vlan_info __rcu *vlan_info; 194 #endif 195 #if IS_ENABLED(CONFIG_NET_DSA) 196 struct dsa_switch_tree *dsa_ptr; 197 #endif 198 #if IS_ENABLED(CONFIG_TIPC) 199 struct tipc_bearer __rcu *tipc_ptr; 200 #endif 201 void *atalk_ptr; 202 /* ip指向in_device结构 */ 203 struct in_device __rcu *ip_ptr; 204 struct dn_dev __rcu *dn_ptr; 205 struct inet6_dev __rcu *ip6_ptr; 206 void *ax25_ptr; 207 struct wireless_dev *ieee80211_ptr; 208 struct wpan_dev *ieee802154_ptr; 209 #if IS_ENABLED(CONFIG_MPLS_ROUTING) 210 struct mpls_dev __rcu *mpls_ptr; 211 #endif 212 213 /* 214 * Cache lines mostly used on receive path (including eth_type_trans()) 215 */ 216 /* Interface address info used in eth_type_trans() */ 217 unsigned char *dev_addr; 218 219 #ifdef CONFIG_SYSFS 220 /* 接收队列 */ 221 struct netdev_rx_queue *_rx; 222 223 /* 接收队列数 */ 224 unsigned int num_rx_queues; 225 unsigned int real_num_rx_queues; 226 #endif 227 228 struct bpf_prog __rcu *xdp_prog; 229 unsigned long gro_flush_timeout; 230 231 /* 如网桥等的收包回调 */ 232 rx_handler_func_t __rcu *rx_handler; 233 /* 回调参数 */ 234 void __rcu *rx_handler_data; 235 236 #ifdef CONFIG_NET_CLS_ACT 237 struct tcf_proto __rcu *ingress_cl_list; 238 #endif 239 struct netdev_queue __rcu *ingress_queue; 240 #ifdef CONFIG_NETFILTER_INGRESS 241 /* netfilter入口 */ 242 struct nf_hook_entry __rcu *nf_hooks_ingress; 243 #endif 244 245 /* 链路层广播地址 */ 246 unsigned char broadcast[MAX_ADDR_LEN]; 247 #ifdef CONFIG_RFS_ACCEL 248 struct cpu_rmap *rx_cpu_rmap; 249 #endif 250 /* 接口索引hash */ 251 struct hlist_node index_hlist; 252 253 /* 254 * Cache lines mostly used on transmit path 255 */ 256 /* 发送队列 */ 257 struct netdev_queue *_tx ____cacheline_aligned_in_smp; 258 /* 发送队列数 */ 259 unsigned int num_tx_queues; 260 unsigned int real_num_tx_queues; 261 /* 排队规则 */ 262 struct Qdisc *qdisc; 263 #ifdef CONFIG_NET_SCHED 264 DECLARE_HASHTABLE (qdisc_hash, 4); 265 #endif 266 /* 267 可在设备发送队列中排队的最大数据包数 268 */ 269 unsigned long tx_queue_len; 270 spinlock_t tx_global_lock; 271 272 /* 网络层确定传输超时, 273 调用驱动程序tx_timeout接口的最短时间 274 */ 275 int watchdog_timeo; 276 277 #ifdef CONFIG_XPS 278 struct xps_dev_maps __rcu *xps_maps; 279 #endif 280 #ifdef CONFIG_NET_CLS_ACT 281 struct tcf_proto __rcu *egress_cl_list; 282 #endif 283 284 /* These may be needed for future network-power-down code. */ 285 /* watchdog定时器 */ 286 struct timer_list watchdog_timer; 287 288 /* 引用计数 */ 289 int __percpu *pcpu_refcnt; 290 291 /* 网络设备的注册和除名以两步进行, 292 该字段用于处理第二步 293 */ 294 struct list_head todo_list; 295 296 struct list_head link_watch_list; 297 298 /* 设备的注册状态 */ 299 enum { NETREG_UNINITIALIZED=0, 300 NETREG_REGISTERED, /* completed register_netdevice */ 301 NETREG_UNREGISTERING, /* called unregister_netdevice */ 302 NETREG_UNREGISTERED, /* completed unregister todo */ 303 NETREG_RELEASED, /* called free_netdev */ 304 NETREG_DUMMY, /* dummy device for NAPI poll */ 305 } reg_state:8; 306 307 /* 设备要被释放标记 */ 308 bool dismantle; 309 310 enum { 311 RTNL_LINK_INITIALIZED, 312 RTNL_LINK_INITIALIZING, 313 } rtnl_link_state:16; 314 315 bool needs_free_netdev; 316 void (*priv_destructor)(struct net_device *dev); 317 318 #ifdef CONFIG_NETPOLL 319 struct netpoll_info __rcu *npinfo; 320 #endif 321 322 possible_net_t nd_net; 323 324 /* mid-layer private */ 325 union { 326 void *ml_priv; 327 struct pcpu_lstats __percpu *lstats; 328 struct pcpu_sw_netstats __percpu *tstats; 329 struct pcpu_dstats __percpu *dstats; 330 struct pcpu_vstats __percpu *vstats; 331 }; 332 333 #if IS_ENABLED(CONFIG_GARP) 334 struct garp_port __rcu *garp_port; 335 #endif 336 #if IS_ENABLED(CONFIG_MRP) 337 struct mrp_port __rcu *mrp_port; 338 #endif 339 340 struct device dev; 341 const struct attribute_group *sysfs_groups[4]; 342 const struct attribute_group *sysfs_rx_queue_group; 343 344 const struct rtnl_link_ops *rtnl_link_ops; 345 346 /* for setting kernel sock attribute on TCP connection setup */ 347 #define GSO_MAX_SIZE 65536 348 unsigned int gso_max_size; 349 #define GSO_MAX_SEGS 65535 350 u16 gso_max_segs; 351 352 #ifdef CONFIG_DCB 353 const struct dcbnl_rtnl_ops *dcbnl_ops; 354 #endif 355 u8 num_tc; 356 struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; 357 u8 prio_tc_map[TC_BITMASK + 1]; 358 359 #if IS_ENABLED(CONFIG_FCOE) 360 unsigned int fcoe_ddp_xid; 361 #endif 362 #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) 363 struct netprio_map __rcu *priomap; 364 #endif 365 struct phy_device *phydev; 366 struct lock_class_key *qdisc_tx_busylock; 367 struct lock_class_key *qdisc_running_key; 368 bool proto_down; 369 };
上述net_device结构中的netdev_ops成员对应设备操作函数结构,用来初始化,销毁,开启,关闭,修改某些变量值等相关操作,结构如下,其中函数操作并未注释,在阅读代码的过程中,遇到具体实现具体分析;
1 struct net_device_ops { 2 int (*ndo_init)(struct net_device *dev); 3 void (*ndo_uninit)(struct net_device *dev); 4 int (*ndo_open)(struct net_device *dev); 5 int (*ndo_stop)(struct net_device *dev); 6 netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb, 7 struct net_device *dev); 8 netdev_features_t (*ndo_features_check)(struct sk_buff *skb, 9 struct net_device *dev, 10 netdev_features_t features); 11 u16 (*ndo_select_queue)(struct net_device *dev, 12 struct sk_buff *skb, 13 void *accel_priv, 14 select_queue_fallback_t fallback); 15 void (*ndo_change_rx_flags)(struct net_device *dev, 16 int flags); 17 void (*ndo_set_rx_mode)(struct net_device *dev); 18 int (*ndo_set_mac_address)(struct net_device *dev, 19 void *addr); 20 int (*ndo_validate_addr)(struct net_device *dev); 21 int (*ndo_do_ioctl)(struct net_device *dev, 22 struct ifreq *ifr, int cmd); 23 int (*ndo_set_config)(struct net_device *dev, 24 struct ifmap *map); 25 int (*ndo_change_mtu)(struct net_device *dev, 26 int new_mtu); 27 int (*ndo_neigh_setup)(struct net_device *dev, 28 struct neigh_parms *); 29 void (*ndo_tx_timeout) (struct net_device *dev); 30 31 void (*ndo_get_stats64)(struct net_device *dev, 32 struct rtnl_link_stats64 *storage); 33 bool (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id); 34 int (*ndo_get_offload_stats)(int attr_id, 35 const struct net_device *dev, 36 void *attr_data); 37 struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); 38 39 int (*ndo_vlan_rx_add_vid)(struct net_device *dev, 40 __be16 proto, u16 vid); 41 int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, 42 __be16 proto, u16 vid); 43 #ifdef CONFIG_NET_POLL_CONTROLLER 44 void (*ndo_poll_controller)(struct net_device *dev); 45 int (*ndo_netpoll_setup)(struct net_device *dev, 46 struct netpoll_info *info); 47 void (*ndo_netpoll_cleanup)(struct net_device *dev); 48 #endif 49 int (*ndo_set_vf_mac)(struct net_device *dev, 50 int queue, u8 *mac); 51 int (*ndo_set_vf_vlan)(struct net_device *dev, 52 int queue, u16 vlan, 53 u8 qos, __be16 proto); 54 int (*ndo_set_vf_rate)(struct net_device *dev, 55 int vf, int min_tx_rate, 56 int max_tx_rate); 57 int (*ndo_set_vf_spoofchk)(struct net_device *dev, 58 int vf, bool setting); 59 int (*ndo_set_vf_trust)(struct net_device *dev, 60 int vf, bool setting); 61 int (*ndo_get_vf_config)(struct net_device *dev, 62 int vf, 63 struct ifla_vf_info *ivf); 64 int (*ndo_set_vf_link_state)(struct net_device *dev, 65 int vf, int link_state); 66 int (*ndo_get_vf_stats)(struct net_device *dev, 67 int vf, 68 struct ifla_vf_stats 69 *vf_stats); 70 int (*ndo_set_vf_port)(struct net_device *dev, 71 int vf, 72 struct nlattr *port[]); 73 int (*ndo_get_vf_port)(struct net_device *dev, 74 int vf, struct sk_buff *skb); 75 int (*ndo_set_vf_guid)(struct net_device *dev, 76 int vf, u64 guid, 77 int guid_type); 78 int (*ndo_set_vf_rss_query_en)( 79 struct net_device *dev, 80 int vf, bool setting); 81 int (*ndo_setup_tc)(struct net_device *dev, 82 u32 handle, 83 __be16 protocol, 84 struct tc_to_netdev *tc); 85 #if IS_ENABLED(CONFIG_FCOE) 86 int (*ndo_fcoe_enable)(struct net_device *dev); 87 int (*ndo_fcoe_disable)(struct net_device *dev); 88 int (*ndo_fcoe_ddp_setup)(struct net_device *dev, 89 u16 xid, 90 struct scatterlist *sgl, 91 unsigned int sgc); 92 int (*ndo_fcoe_ddp_done)(struct net_device *dev, 93 u16 xid); 94 int (*ndo_fcoe_ddp_target)(struct net_device *dev, 95 u16 xid, 96 struct scatterlist *sgl, 97 unsigned int sgc); 98 int (*ndo_fcoe_get_hbainfo)(struct net_device *dev, 99 struct netdev_fcoe_hbainfo *hbainfo); 100 #endif 101 102 #if IS_ENABLED(CONFIG_LIBFCOE) 103 #define NETDEV_FCOE_WWNN 0 104 #define NETDEV_FCOE_WWPN 1 105 int (*ndo_fcoe_get_wwn)(struct net_device *dev, 106 u64 *wwn, int type); 107 #endif 108 109 #ifdef CONFIG_RFS_ACCEL 110 int (*ndo_rx_flow_steer)(struct net_device *dev, 111 const struct sk_buff *skb, 112 u16 rxq_index, 113 u32 flow_id); 114 #endif 115 int (*ndo_add_slave)(struct net_device *dev, 116 struct net_device *slave_dev); 117 int (*ndo_del_slave)(struct net_device *dev, 118 struct net_device *slave_dev); 119 netdev_features_t (*ndo_fix_features)(struct net_device *dev, 120 netdev_features_t features); 121 int (*ndo_set_features)(struct net_device *dev, 122 netdev_features_t features); 123 int (*ndo_neigh_construct)(struct net_device *dev, 124 struct neighbour *n); 125 void (*ndo_neigh_destroy)(struct net_device *dev, 126 struct neighbour *n); 127 128 int (*ndo_fdb_add)(struct ndmsg *ndm, 129 struct nlattr *tb[], 130 struct net_device *dev, 131 const unsigned char *addr, 132 u16 vid, 133 u16 flags); 134 int (*ndo_fdb_del)(struct ndmsg *ndm, 135 struct nlattr *tb[], 136 struct net_device *dev, 137 const unsigned char *addr, 138 u16 vid); 139 int (*ndo_fdb_dump)(struct sk_buff *skb, 140 struct netlink_callback *cb, 141 struct net_device *dev, 142 struct net_device *filter_dev, 143 int *idx); 144 145 int (*ndo_bridge_setlink)(struct net_device *dev, 146 struct nlmsghdr *nlh, 147 u16 flags); 148 int (*ndo_bridge_getlink)(struct sk_buff *skb, 149 u32 pid, u32 seq, 150 struct net_device *dev, 151 u32 filter_mask, 152 int nlflags); 153 int (*ndo_bridge_dellink)(struct net_device *dev, 154 struct nlmsghdr *nlh, 155 u16 flags); 156 int (*ndo_change_carrier)(struct net_device *dev, 157 bool new_carrier); 158 int (*ndo_get_phys_port_id)(struct net_device *dev, 159 struct netdev_phys_item_id *ppid); 160 int (*ndo_get_phys_port_name)(struct net_device *dev, 161 char *name, size_t len); 162 void (*ndo_udp_tunnel_add)(struct net_device *dev, 163 struct udp_tunnel_info *ti); 164 void (*ndo_udp_tunnel_del)(struct net_device *dev, 165 struct udp_tunnel_info *ti); 166 void* (*ndo_dfwd_add_station)(struct net_device *pdev, 167 struct net_device *dev); 168 void (*ndo_dfwd_del_station)(struct net_device *pdev, 169 void *priv); 170 171 netdev_tx_t (*ndo_dfwd_start_xmit) (struct sk_buff *skb, 172 struct net_device *dev, 173 void *priv); 174 int (*ndo_get_lock_subclass)(struct net_device *dev); 175 int (*ndo_set_tx_maxrate)(struct net_device *dev, 176 int queue_index, 177 u32 maxrate); 178 int (*ndo_get_iflink)(const struct net_device *dev); 179 int (*ndo_change_proto_down)(struct net_device *dev, 180 bool proto_down); 181 int (*ndo_fill_metadata_dst)(struct net_device *dev, 182 struct sk_buff *skb); 183 void (*ndo_set_rx_headroom)(struct net_device *dev, 184 int needed_headroom); 185 int (*ndo_xdp)(struct net_device *dev, 186 struct netdev_xdp *xdp); 187 };
上述net_device结构中的header_ops成员用来进行链路头部操作,邻居子系统在发送数据包时会用到该结构的成员函数,比如以太网的实现,会通过cache函数将以太头缓存到邻居子系统的hh中,数据包发送前,直接拷贝缓存的以太头即可,无需重新组装;
1 struct header_ops { 2 int (*create) (struct sk_buff *skb, struct net_device *dev, 3 unsigned short type, const void *daddr, 4 const void *saddr, unsigned int len); 5 int (*parse)(const struct sk_buff *skb, unsigned char *haddr); 6 int (*cache)(const struct neighbour *neigh, struct hh_cache *hh, __be16 type); 7 void (*cache_update)(struct hh_cache *hh, 8 const struct net_device *dev, 9 const unsigned char *haddr); 10 bool (*validate)(const char *ll_header, unsigned int len); 11 };