软中断
首先明确一个概念软中断(不是软件中断int n)。总来来说软中断就是内核在启动时为每一个内核创建了一个特殊的进程,这个进程会不停的poll检查是否有软中断需要执行,如果需要执行则调用注册的接口函数。所以软中断是运行在进程上下文的,而且可能并发执行在不同CPU上。所谓的软中断就是内核利用内核线程配合抽象的数据结构进行管理线程合适时间调用注册的接口的一套软件管理机制。
先看管理软中断的数据结构因为数据结构最能说明逻辑内核对软件中断抽象的数据结构主要有如下几个部分。
中断服务接口管理
在内核中声明在\kernel\softirq.c中如下
#ifndef __ARCH_IRQ_STAT irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned; EXPORT_SYMBOL(irq_stat); #endif static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; struct softirq_action { void (*action)(struct softirq_action *); };
其中的NR_SOFTIRQS由软中断类型的枚举对象提供如下定义:
enum { HI_SOFTIRQ=0, TIMER_SOFTIRQ, NET_TX_SOFTIRQ, NET_RX_SOFTIRQ, BLOCK_SOFTIRQ, BLOCK_IOPOLL_SOFTIRQ, TASKLET_SOFTIRQ, SCHED_SOFTIRQ, HRTIMER_SOFTIRQ, RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ NR_SOFTIRQS };
之所以综上可以知道内核维护了一个struct softirq_action类型的软中断接口数组,而软中断的状态则是由前面的 irq_cpustat_t 类型的数组管理,由定义可以知道状态是和CPU关联的,表示某一个CPU上的软中断状态。下面看看irq_cpustat_t 的定义,也非常的的简单主要就是 其中的 __softirq_pending成员,这个成员的每一个bit表示一种类型的中断类型的状态信息,并且低bit的中断类型的中断优先级高。
typedef struct { unsigned int __softirq_pending;//标记是否有软中断悬起 long idle_timestamp; /* 统计信息 */ /* Hard interrupt statistics. */ unsigned int irq_timer_count; unsigned int irq_syscall_count; unsigned int irq_resched_count; unsigned int irq_hv_flush_count; unsigned int irq_call_count; unsigned int irq_hv_msg_count; unsigned int irq_dev_intr_count; } ____cacheline_aligned irq_cpustat_t;
在通过Tasklet接接口中断的创建就可以知道软件中断的注册(open_softirq)过程就是修改前面定义的softirq_vec数组,就可以完成软件中断的注册,而驱动开发人员也很少直接使用软件中断。
//接口中的nr就是上面枚举值,action就是软中断服务函数 open_softirq(int nr,void(*action)(struct softirq_action *));
再看内核在启动时为每个CPU创建的线程操作:
static struct notifier_block cpu_nfb = { .notifier_call = cpu_callback }; static struct smp_hotplug_thread softirq_threads = { .store = &ksoftirqd, .thread_should_run = ksoftirqd_should_run, .thread_fn = run_ksoftirqd, .thread_comm = "ksoftirqd/%u", }; static __init int spawn_ksoftirqd(void) { register_cpu_notifier(&cpu_nfb); BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); return 0; } early_initcall(spawn_ksoftirqd);
重点是这个接口函数 smpboot_register_percpu_thread如下:
/** * smpboot_register_percpu_thread - Register a per_cpu thread related to hotplug * @plug_thread: Hotplug thread descriptor * * Creates and starts the threads on all online cpus. */ int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread) { unsigned int cpu; int ret = 0; get_online_cpus(); mutex_lock(&smpboot_threads_lock); for_each_online_cpu(cpu) { ret = __smpboot_create_thread(plug_thread, cpu); if (ret) { smpboot_destroy_threads(plug_thread); goto out; } smpboot_unpark_thread(plug_thread, cpu); } list_add(&plug_thread->list, &hotplug_threads); out: mutex_unlock(&smpboot_threads_lock); put_online_cpus(); return ret; }
传进来的参数是 softirq_threads,先获取在线即激活的CPU然后遍历调用__smpboot_create_thread 参数同样是前面定义的softirq_threads继续向下看:
tatic int __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu) { struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); struct smpboot_thread_data *td; if (tsk) return 0; td = kzalloc_node(sizeof(*td), GFP_KERNEL, cpu_to_node(cpu)); if (!td) return -ENOMEM; td->cpu = cpu; td->ht = ht; tsk = kthread_create_on_cpu(smpboot_thread_fn, td, cpu, ht->thread_comm); if (IS_ERR(tsk)) { kfree(td); return PTR_ERR(tsk); } get_task_struct(tsk); *per_cpu_ptr(ht->store, cpu) = tsk; if (ht->create) { /* * Make sure that the task has actually scheduled out * into park position, before calling the create * callback. At least the migration thread callback * requires that the task is off the runqueue. */ if (!wait_task_inactive(tsk, TASK_PARKED)) WARN_ON(1); else ht->create(cpu); } return 0; }
看创建了一个内核线程在特定CPU上通过kthread_create_on_cpu(smpboot_thread_fn, td, cpu,ht->thread_comm)接口,不在往深入继续看,这里只需要创建了一个绑定CPU的线程,线程函数是smpboot_thread_fn这个比较重要需要详细看一下。传入的data就是一个struct smpboot_thread_data类型的数据这个数据中保存了softirq_threads在ht中如下,进程开始运行时先关闭抢占,检查是否需要停止当前线程如果需要则立马停止当前线程,这里肯定不需要停止除非是关机(我的理解)。然就是检查是否要暂停,因为用户的软中断接口可能调用阻塞接口会阻塞当前内尔后进程所以需要暂停当前线程最后的恢复也是有用户软件中断服务函数完成(我的理解)最后部分源码注释如下:
static int smpboot_thread_fn(void *data) { struct smpboot_thread_data *td = data; struct smp_hotplug_thread *ht = td->ht; while (1) { set_current_state(TASK_INTERRUPTIBLE);
//关闭内核抢占机制 preempt_disable();
//是否需要停止当前线程关机时才执行?? if (kthread_should_stop()) { __set_current_state(TASK_RUNNING); preempt_enable(); if (ht->cleanup) ht->cleanup(td->cpu, cpu_online(td->cpu)); kfree(td); return 0; } if (kthread_should_park()) { __set_current_state(TASK_RUNNING); preempt_enable(); if (ht->park && td->status == HP_THREAD_ACTIVE) { BUG_ON(td->cpu != smp_processor_id()); ht->park(td->cpu); td->status = HP_THREAD_PARKED; } kthread_parkme(); /* We might have been woken for stop */ continue; } BUG_ON(td->cpu != smp_processor_id()); /* Check for state change setup */ switch (td->status) { case HP_THREAD_NONE: __set_current_state(TASK_RUNNING); preempt_enable(); if (ht->setup) ht->setup(td->cpu); td->status = HP_THREAD_ACTIVE; continue; case HP_THREAD_PARKED: __set_current_state(TASK_RUNNING); preempt_enable(); if (ht->unpark) ht->unpark(td->cpu); td->status = HP_THREAD_ACTIVE; continue; } /* * 就是通过调用ksoftirqd_should_run 这是在一开始定义的softirq_threads中指定的,检查当前CPU上维护的软件中断数组中是否有中断 * 的置起了从而决定当前的软件中断线程是否需要执行,不需要执行则放弃时间片 */ if (!ht->thread_should_run(td->cpu)) {
/*
*没有需要的软件中断需要执行,则放弃时间片
*/ preempt_enable_no_resched(); schedule(); } else { /* * 有中断需要执行则直接调用 run_ksoftirqd 执行软件中断注册的接口的调用 */ __set_current_state(TASK_RUNNING); preempt_enable(); //这个接口在上面初始化时绑定为run_ksoftirqd ht->thread_fn(td->cpu); } } }
可以看到run_ksoftirqd如下:
static void run_ksoftirqd(unsigned int cpu) { local_irq_disable(); if (local_softirq_pending()) { /* * We can safely run softirq on inline stack, as we are not deep * in the task stack here. */ __do_softirq(); local_irq_enable(); cond_resched_rcu_qs(); return; } local_irq_enable(); }
关闭本CPU上的硬中断然后执行__do_softirq();这个是软件中断的重点接口如下,注释了一部分:
asmlinkage __visible void __do_softirq(void) { unsigned long end = jiffies + MAX_SOFTIRQ_TIME; unsigned long old_flags = current->flags; int max_restart = MAX_SOFTIRQ_RESTART; struct softirq_action *h; bool in_hardirq; __u32 pending; int softirq_bit; /* * Mask out PF_MEMALLOC s current task context is borrowed for the * softirq. A softirq handled such as network RX might set PF_MEMALLOC * again if the socket is related to swap */ current->flags &= ~PF_MEMALLOC; //保存悬起的软件中断的位图 pending = local_softirq_pending(); account_irq_enter_time(current); //标记进入软件中断上下文 __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); in_hardirq = lockdep_softirq_start(); restart: /* Reset the pending bitmask before enabling irqs */ //清除悬起的软件中断的位图 set_softirq_pending(0); //开启硬件中断 local_irq_enable(); //取软件中断的全局中断接口链表 h = softirq_vec; //判断是否有悬起的软件中断bit,返回地最低置起的bit位置 1开始而不是0,软中断也是由优先级的低bit优先 while ((softirq_bit = ffs(pending))) { unsigned int vec_nr; int prev_count; //取出对应的中断对象 h += softirq_bit - 1; //取出对应的中断index vec_nr = h - softirq_vec; prev_count = preempt_count(); kstat_incr_softirqs_this_cpu(vec_nr); trace_softirq_entry(vec_nr); //执行软件中断注册的接口函数 h->action(h); trace_softirq_exit(vec_nr); if (unlikely(prev_count != preempt_count())) { pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n", vec_nr, softirq_to_name[vec_nr], h->action, prev_count, preempt_count()); preempt_count_set(prev_count); } //清除刚才处理过的中断bit并右移动整个位图,然后移动软件中断句柄 h++; pending >>= softirq_bit; //移动后继续回去处理剩下置起的bit } //到这里说明本次进来时置起的bit全部处理完了 rcu_bh_qs(); local_irq_disable(); //再检查在处理期间有无新置起的软件中断,如果有则需要继续处理软件中断 pending = local_softirq_pending(); if (pending) { /* *又有新的软件标志置起需要处理,则开始处理,这里有一个保护机制,因为软件中断的优先级是很高的相对于用户进程如果软件中断 *源源不断则需要进行保护避免其他进程无法运行而导致系统实时性差,这里有三个条件一个步满足就会会停止本次的软件中断的执行 *而先去执行其他进程调度 *1、软中断处理时间不超过2jiffies,200Hz的系统对应10ms; *2、当前没有有进程需要调度,即!need_resched(); *3、这种循环不超过MAX_SOFTIRQ_RESTART次 一般是10 */ if (time_before(jiffies, end) && !need_resched() && --max_restart) goto restart; //不满足其中一个条件则重新唤醒ksoftirq内核线程来处理软中断,因为这个函数可能在中断上下文执行所以需要进行限制 wakeup_softirqd(); } lockdep_softirq_end(in_hardirq); account_irq_exit_time(current); //使能中断底半部 __local_bh_enable(SOFTIRQ_OFFSET); WARN_ON_ONCE(in_interrupt()); tsk_restore_flags(current, old_flags, PF_MEMALLOC); }
注意软件中断的处理过程对软中断连续执行的时间进行了限制其实是有原因的,因为上述软中断处理部分的代码执行机会有可能在中断上下文irq_exit()具体的调用链就是irq_exit()->invoke_softirq()->wakeup_softirq()如下(可参考硬中断的分析过程):
void irq_exit(void) { #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED local_irq_disable(); #else WARN_ON_ONCE(!irqs_disabled()); #endif account_irq_exit_time(current); preempt_count_sub(HARDIRQ_OFFSET); if (!in_interrupt() && local_softirq_pending()) invoke_softirq(); tick_irq_exit(); rcu_irq_exit(); trace_hardirq_exit(); /* must be last! */ } static inline void invoke_softirq(void) { if (!force_irqthreads) { #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK /* * We can safely execute softirq on the current stack if * it is the irq stack, because it should be near empty * at this stage. */ __do_softirq(); #else /* * Otherwise, irq_exit() is called on the task stack that can * be potentially deep already. So call softirq in its own stack * to prevent from any overrun. */ do_softirq_own_stack(); #endif } else { wakeup_softirqd(); } }
asmlinkage __visible void __do_softirq(void) { unsigned long end = jiffies + MAX_SOFTIRQ_TIME; unsigned long old_flags = current->flags; int max_restart = MAX_SOFTIRQ_RESTART; struct softirq_action *h; bool in_hardirq; __u32 pending; int softirq_bit; /* * Mask out PF_MEMALLOC s current task context is borrowed for the * softirq. A softirq handled such as network RX might set PF_MEMALLOC * again if the socket is related to swap */ current->flags &= ~PF_MEMALLOC; pending = local_softirq_pending();------------------------------获取当前CPU的软中断寄存器__softirq_pending值到局部变量pending。 account_irq_enter_time(current); __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);----------------增加preempt_count中的softirq域计数,表明当前在软中断上下文中。 in_hardirq = lockdep_softirq_start(); restart: /* Reset the pending bitmask before enabling irqs */ set_softirq_pending(0);-----------------------------------------清除软中断寄存器__softirq_pending。 local_irq_enable();---------------------------------------------打开本地中断 h = softirq_vec;------------------------------------------------指向softirq_vec第一个元素,即软中断HI_SOFTIRQ对应的处理函数。 while ((softirq_bit = ffs(pending))) {--------------------------ffs()找到pending中第一个置位的比特位,返回值是第一个为1的位序号。这里的位是从低位开始,这也和优先级相吻合,低位优先得到执行。如果没有则返回0,退出循环。 unsigned int vec_nr; int prev_count; h += softirq_bit - 1;---------------------------------------根据sofrirq_bit找到对应的软中断描述符,即软中断处理函数。 vec_nr = h - softirq_vec;-----------------------------------软中断序号 prev_count = preempt_count(); kstat_incr_softirqs_this_cpu(vec_nr); trace_softirq_entry(vec_nr); h->action(h);-----------------------------------------------执行对应软中断函数 trace_softirq_exit(vec_nr); if (unlikely(prev_count != preempt_count())) { pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n", vec_nr, softirq_to_name[vec_nr], h->action, prev_count, preempt_count()); preempt_count_set(prev_count); } h++;-------------------------------------------------------h递增,指向下一个软中断 pending >>= softirq_bit;-----------------------------------pending右移softirq_bit位 } rcu_bh_qs(); local_irq_disable();-------------------------------------------关闭本地中断 pending = local_softirq_pending();-----------------------------再次检查是否有软中断产生,在上一次检查至此这段时间有新软中断产生。 if (pending) { if (time_before(jiffies, end) && !need_resched() && --max_restart)-----------------------------------------再次触发软中断执行的三个条件:1.软中断处理时间不超过2jiffies,200Hz的系统对应10ms;2.当前没有有进程需要调度,即!need_resched();3.这种循环不超过10次。 goto restart; wakeup_softirqd();-----------------------------------------如果上面的条件不满足,则唤醒ksoftirq内核线程来处理软中断。 } lockdep_softirq_end(in_hardirq); account_irq_exit_time(current); __local_bh_enable(SOFTIRQ_OFFSET);----------------------------减少preempt_count的softirq域计数,和前面增加计数呼应。表示这段代码处于软中断上下文。 WARN_ON_ONCE(in_interrupt()); tsk_restore_flags(current, old_flags, PF_MEMALLOC); }
wakeup_softirq()首先获取当前CPU的ksoftirqd线程的task_struct。
如果当前task不处于TASK_RUNNING,则去唤醒此进程。