在注销网络设备时,会调用pci_driver->remove函数,以e100网卡驱动为例,实际调用e100_remove,该函数调用函数unregister_netdev进行设备注销操作;
函数调用关系图如下,注销分为两步:
(1) 回滚注册操作,进行初步注销,过程中如果设备尚未关闭,则需先调用dev_close_many将设备关闭;
(2) 将设备加入到net_todo_list,然后通过netdev_run_todo进行设备最终的注销,其中包括调用netdev_wait_allrefs来等待所有对设备的引用结束;
1 /** 2 * 设备注销第一步,回滚注销调用关系 3 * e100_remove 4 * |-->unregister_netdev 5 * |-->unregister_netdevice 6 * |-->unregister_netdevice_queue 7 * |-->rollback_registered 8 * | |-->rollback_registered_many 9 * | |-->dev_close_many-->__dev_close_many 10 * |---->net_set_todo 11 * 12 * 设备注销第二步,todo_list处理调用关系 13 * rtnl_unlock 14 * |-->netdev_run_todo 15 * |-->netdev_wait_allrefs 16 */
unregister_netdev为注销设备的直接调用函数,其中在锁的包围之下,调用unregister_netdevice进行注销设备,而rtnl_unlock解锁函数调用时,也会触发对net_todo_list的处理;
1 /** 2 * unregister_netdev - remove device from the kernel 3 * @dev: device 4 * 5 * This function shuts down a device interface and removes it 6 * from the kernel tables. 7 * 8 * This is just a wrapper for unregister_netdevice that takes 9 * the rtnl semaphore. In general you want to use this and not 10 * unregister_netdevice. 11 */ 12 void unregister_netdev(struct net_device *dev) 13 { 14 rtnl_lock(); 15 unregister_netdevice(dev); 16 rtnl_unlock(); 17 }
1 static inline void unregister_netdevice(struct net_device *dev) 2 { 3 unregister_netdevice_queue(dev, NULL); 4 }
unregister_netdevice_queue函数执行两个操作,rollback_registered对注册过程进行回滚,net_set_todo将设备加入到net_todo_list;
1 /** 2 * unregister_netdevice_queue - remove device from the kernel 3 * @dev: device 4 * @head: list 5 * 6 * This function shuts down a device interface and removes it 7 * from the kernel tables. 8 * If head not NULL, device is queued to be unregistered later. 9 * 10 * Callers must hold the rtnl semaphore. You may want 11 * unregister_netdev() instead of this. 12 */ 13 14 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head) 15 { 16 ASSERT_RTNL(); 17 18 if (head) { 19 list_move_tail(&dev->unreg_list, head); 20 } 21 /* 22 可见,下面注销分两个步骤: 23 1. rollback_registered 24 2. 加入todo_list,后续在rtnl_unlock中调用netdev_run_todo 25 */ 26 else { 27 /* 回滚注册 */ 28 rollback_registered(dev); 29 /* Finish processing unregister after unlock */ 30 /* 设备的todolist加入到todolist中 */ 31 net_set_todo(dev); 32 } 33 }
rollback_registered将设备加入到新链表中,然后调用回滚多个设备的函数;
1 static void rollback_registered(struct net_device *dev) 2 { 3 /* 生成single链表 */ 4 LIST_HEAD(single); 5 6 /* 加入链表 */ 7 list_add(&dev->unreg_list, &single); 8 9 /* 注销 */ 10 rollback_registered_many(&single); 11 12 /* 删除链表 */ 13 list_del(&single); 14 }
rollback_registered_many为核心的回滚注册函数,其会遍历传入设备链表,对每个设备进行注销操作,并且发送相关通知消息;
1 static void rollback_registered_many(struct list_head *head) 2 { 3 struct net_device *dev, *tmp; 4 LIST_HEAD(close_head); 5 6 BUG_ON(dev_boot_phase); 7 ASSERT_RTNL(); 8 9 list_for_each_entry_safe(dev, tmp, head, unreg_list) { 10 /* Some devices call without registering 11 * for initialization unwind. Remove those 12 * devices and proceed with the remaining. 13 */ 14 /* 设备为为初始化状态 */ 15 if (dev->reg_state == NETREG_UNINITIALIZED) { 16 pr_debug("unregister_netdevice: device %s/%p never was registered ", 17 dev->name, dev); 18 19 WARN_ON(1); 20 //删除节点,继续下一个设备 21 list_del(&dev->unreg_list); 22 continue; 23 } 24 25 //标记设备要被释放 26 dev->dismantle = true; 27 BUG_ON(dev->reg_state != NETREG_REGISTERED); 28 } 29 30 /* If device is running, close it first. */ 31 /* 如果设备正在运行,先将其关闭 */ 32 list_for_each_entry(dev, head, unreg_list) 33 list_add_tail(&dev->close_list, &close_head); 34 35 /* 关闭设备 */ 36 dev_close_many(&close_head, true); 37 38 /* 从各种链表中移除当前设备 */ 39 list_for_each_entry(dev, head, unreg_list) { 40 /* And unlink it from device chain. */ 41 unlist_netdevice(dev); 42 43 dev->reg_state = NETREG_UNREGISTERING; 44 } 45 46 /* 清理工作 */ 47 flush_all_backlogs(); 48 49 /* 同步rcu */ 50 synchronize_net(); 51 52 list_for_each_entry(dev, head, unreg_list) { 53 struct sk_buff *skb = NULL; 54 55 /* Shutdown queueing discipline. */ 56 /* 关闭排队规则 */ 57 dev_shutdown(dev); 58 59 60 /* Notify protocols, that we are about to destroy 61 * this device. They should clean all the things. 62 */ 63 /* 通知设备注销 */ 64 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 65 66 /* 构建dellink消息 */ 67 if (!dev->rtnl_link_ops || 68 dev->rtnl_link_state == RTNL_LINK_INITIALIZED) 69 skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 70 GFP_KERNEL); 71 72 /* 73 * Flush the unicast and multicast chains 74 */ 75 /* 清理单播和组播链表 */ 76 dev_uc_flush(dev); 77 dev_mc_flush(dev); 78 79 /* 设备的uninit操作 */ 80 if (dev->netdev_ops->ndo_uninit) 81 dev->netdev_ops->ndo_uninit(dev); 82 83 /* 发送netlink消息 */ 84 if (skb) 85 rtmsg_ifinfo_send(skb, dev, GFP_KERNEL); 86 87 /* Notifier chain MUST detach us all upper devices. */ 88 WARN_ON(netdev_has_any_upper_dev(dev)); 89 WARN_ON(netdev_has_any_lower_dev(dev)); 90 91 /* Remove entries from kobject tree */ 92 /* 从虚拟文件系统中移除设备 */ 93 netdev_unregister_kobject(dev); 94 #ifdef CONFIG_XPS 95 /* Remove XPS queueing entries */ 96 netif_reset_xps_queues_gt(dev, 0); 97 #endif 98 } 99 100 /* 同步rcu */ 101 synchronize_net(); 102 103 /* 减少设备引用 */ 104 list_for_each_entry(dev, head, unreg_list) 105 dev_put(dev); 106 }
注销过程中,如果发现有设备尚未关闭,则需要调用dev_close_many将设备进行关闭;
1 /* 关闭设备 */ 2 int dev_close_many(struct list_head *head, bool unlink) 3 { 4 struct net_device *dev, *tmp; 5 6 /* Remove the devices that don't need to be closed */ 7 /* 移除未运行的设备 */ 8 list_for_each_entry_safe(dev, tmp, head, close_list) 9 if (!(dev->flags & IFF_UP)) 10 list_del_init(&dev->close_list); 11 12 /* 关闭设备 */ 13 __dev_close_many(head); 14 15 list_for_each_entry_safe(dev, tmp, head, close_list) { 16 /* netlink消息 */ 17 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL); 18 19 /* 通知设备关闭 */ 20 call_netdevice_notifiers(NETDEV_DOWN, dev); 21 22 /* 移除设备节点 */ 23 if (unlink) 24 list_del_init(&dev->close_list); 25 } 26 27 return 0; 28 }
1 static int __dev_close_many(struct list_head *head) 2 { 3 struct net_device *dev; 4 5 ASSERT_RTNL(); 6 might_sleep(); 7 8 list_for_each_entry(dev, head, close_list) { 9 /* Temporarily disable netpoll until the interface is down */ 10 /* 禁用netpoll */ 11 netpoll_poll_disable(dev); 12 13 /* 通知设备正在关闭 */ 14 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); 15 16 /* 清除start标志位 */ 17 clear_bit(__LINK_STATE_START, &dev->state); 18 19 /* Synchronize to scheduled poll. We cannot touch poll list, it 20 * can be even on different cpu. So just clear netif_running(). 21 * 22 * dev->stop() will invoke napi_disable() on all of it's 23 * napi_struct instances on this device. 24 */ 25 smp_mb__after_atomic(); /* Commit netif_running(). */ 26 } 27 28 /* 未发送完的数据发送完 */ 29 dev_deactivate_many(head); 30 31 list_for_each_entry(dev, head, close_list) { 32 const struct net_device_ops *ops = dev->netdev_ops; 33 34 /* 35 * Call the device specific close. This cannot fail. 36 * Only if device is UP 37 * 38 * We allow it to be called even after a DETACH hot-plug 39 * event. 40 */ 41 /* 调用设备关闭操作 */ 42 if (ops->ndo_stop) 43 ops->ndo_stop(dev); 44 45 /* 标记设备关闭 */ 46 dev->flags &= ~IFF_UP; 47 /* 启用netpoll */ 48 netpoll_poll_enable(dev); 49 } 50 51 return 0; 52 }
对于net_todo_list的处理,则是在解锁的时候进行,它将调用netdev_run_todo来处理todo_list;
1 void rtnl_unlock(void) 2 { 3 /* This fellow will unlock it for us. */ 4 netdev_run_todo(); 5 }
netdev_run_todo需要在等待所有对设备的引用结束之后,再对设备进行销毁;
1 /* The sequence is: 2 * 3 * rtnl_lock(); 4 * ... 5 * register_netdevice(x1); 6 * register_netdevice(x2); 7 * ... 8 * unregister_netdevice(y1); 9 * unregister_netdevice(y2); 10 * ... 11 * rtnl_unlock(); 12 * free_netdev(y1); 13 * free_netdev(y2); 14 * 15 * We are invoked by rtnl_unlock(). 16 * This allows us to deal with problems: 17 * 1) We can delete sysfs objects which invoke hotplug 18 * without deadlocking with linkwatch via keventd. 19 * 2) Since we run with the RTNL semaphore not held, we can sleep 20 * safely in order to wait for the netdev refcnt to drop to zero. 21 * 22 * We must not return until all unregister events added during 23 * the interval the lock was held have been completed. 24 */ 25 void netdev_run_todo(void) 26 { 27 struct list_head list; 28 29 /* Snapshot list, allow later requests */ 30 list_replace_init(&net_todo_list, &list); 31 32 __rtnl_unlock(); 33 34 35 /* Wait for rcu callbacks to finish before next phase */ 36 if (!list_empty(&list)) 37 rcu_barrier(); 38 39 /* 遍历链表 */ 40 while (!list_empty(&list)) { 41 struct net_device *dev 42 = list_first_entry(&list, struct net_device, todo_list); 43 44 /* 移除设备 */ 45 list_del(&dev->todo_list); 46 47 /* 通知设备进行最后的注销 */ 48 rtnl_lock(); 49 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); 50 __rtnl_unlock(); 51 52 /* 检查设备状态 */ 53 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { 54 pr_err("network todo '%s' but state %d ", 55 dev->name, dev->reg_state); 56 dump_stack(); 57 continue; 58 } 59 60 /* 设置设备状态为注销 */ 61 dev->reg_state = NETREG_UNREGISTERED; 62 63 /* 等待所有引用结束 */ 64 netdev_wait_allrefs(dev); 65 66 /* paranoia */ 67 BUG_ON(netdev_refcnt_read(dev)); 68 BUG_ON(!list_empty(&dev->ptype_all)); 69 BUG_ON(!list_empty(&dev->ptype_specific)); 70 WARN_ON(rcu_access_pointer(dev->ip_ptr)); 71 WARN_ON(rcu_access_pointer(dev->ip6_ptr)); 72 WARN_ON(dev->dn_ptr); 73 74 /* 调用设备销毁 */ 75 if (dev->priv_destructor) 76 dev->priv_destructor(dev); 77 78 /* 需要释放设备,则释放 */ 79 if (dev->needs_free_netdev) 80 free_netdev(dev); 81 82 /* Report a network device has been unregistered */ 83 /* 报告设备被注销 */ 84 rtnl_lock(); 85 dev_net(dev)->dev_unreg_count--; 86 __rtnl_unlock(); 87 wake_up(&netdev_unregistering_wq); 88 89 /* Free network device */ 90 /* 减少设备对象引用 */ 91 kobject_put(&dev->dev.kobj); 92 } 93 }
netdev_wait_allrefs会等待设备引用计数不为0的设备引用结束;
1 /** 2 * netdev_wait_allrefs - wait until all references are gone. 3 * @dev: target net_device 4 * 5 * This is called when unregistering network devices. 6 * 7 * Any protocol or device that holds a reference should register 8 * for netdevice notification, and cleanup and put back the 9 * reference if they receive an UNREGISTER event. 10 * We can get stuck here if buggy protocols don't correctly 11 * call dev_put. 12 */ 13 /* 等待所有的引用结束 */ 14 static void netdev_wait_allrefs(struct net_device *dev) 15 { 16 unsigned long rebroadcast_time, warning_time; 17 int refcnt; 18 19 /* 从linkwatch中删除设备 */ 20 linkwatch_forget_dev(dev); 21 22 rebroadcast_time = warning_time = jiffies; 23 24 /* 统计所有引用数 */ 25 refcnt = netdev_refcnt_read(dev); 26 27 /* 引用数不为0 */ 28 while (refcnt != 0) { 29 /* 每1s中进行一次通知 */ 30 /* 到达通知广播时间窗口 */ 31 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { 32 rtnl_lock(); 33 34 /* Rebroadcast unregister notification */ 35 /* 通知设备注销 */ 36 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 37 38 __rtnl_unlock(); 39 rcu_barrier(); 40 rtnl_lock(); 41 42 /* 通知设备最终注销 */ 43 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); 44 45 /* 连接改变事件处理 */ 46 if (test_bit(__LINK_STATE_LINKWATCH_PENDING, 47 &dev->state)) { 48 /* We must not have linkwatch events 49 * pending on unregister. If this 50 * happens, we simply run the queue 51 * unscheduled, resulting in a noop 52 * for this device. 53 */ 54 linkwatch_run_queue(); 55 } 56 57 __rtnl_unlock(); 58 59 /* 设定通知时间为当前时间 */ 60 rebroadcast_time = jiffies; 61 } 62 63 msleep(250); 64 65 /* 重新读取引用数 */ 66 refcnt = netdev_refcnt_read(dev); 67 68 /* 10s钟仍未结束,则打印告警 */ 69 if (time_after(jiffies, warning_time + 10 * HZ)) { 70 pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d ", 71 dev->name, refcnt); 72 warning_time = jiffies; 73 } 74 } 75 }