kernel:3.6
硬件:
一般soc会有多个sp804外部timer,假设现在timer0作全部时钟设备,timer1作为clocksource。
arm smp local timer。
核心数据结构对象:
1. struct clock_event_device 时钟设备抽象类型,其中set_next_event可以设置下次中断时间,
2. event_handler是中断处理回调一般是tick_handle_periodic或hrtimer_interrupt核心方法。
sp804 timer0, smp local timer都会抽象成clock_event_device并且注册到系统中。
3. struct clocksource 时钟源抽象类型,通过read方法可以读取当前时间。系统中sp804 timer1,
纯软件的jiffies抽象成clocksource注册到系统中。
4. struct timekeeper timekeeper全局变量,管理clocksource,配合保存的xtime值提供读取
当前时间功能。
5. struct timespec xtime全局变量,保存了当前时间,在tick中断时更新。
整体流程:
1.start_kernel init_timers()/hrtimers_init() 初始化wheel timer和hrtimer。
2.start_kernel->timekeeping_init() 初始化timekeeper结构,并且clock=jiffies clocksource。
3.start_kernel->time_init() 板级代码注册sp804 timer0 clock_event_device。
初始化sp804 timer,注册中断设置每cpu的tick_cpu_device->evtdev为sp804 timer。之后中断就生效了。
4.start_kernel->time_init() 板级代码注册sp804 timer1 clocksource。
5.start_kernel->reset_init()...kernel_init()->smp_prepare_cpus() 注册cpu0 local timer clock_event_device。
其中会close 之前的sp804timer。之后localtimer中断就生效了(timer0中断不会有了)。
设置每cpu的tick_cpu_device->evtdev为local timer。
6.secondary_start_kernel()->percpu_timer_setup() 注册cpuX local timer clock_event_device。同cpu0。
7.do_init_calls()->init_jiffies_clocksource()注册jiffies clocksource。
触发timekeeper的clock设置为优先级更高的sp804 timer1 clocksource。
从而后面timer软中断就会从periodic模式切换为oneshot模式。
8.cpu0,cpuX的TIMER_SOFTIRQ软中断,会将每cpu的tick_cpu_device->evtdev模式设置为oneshot模式,
event_handler方法也变成了hrtimer_interrupt。
clock_event_device注册
static struct clock_event_device sp804_clockevent = { .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .set_mode = sp804_set_mode, .set_next_event = sp804_set_next_event, .rating = 300, .cpumask = cpu_all_mask, }; static struct irqaction sp804_timer_irq = { .name = "timer", .flags = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL, .handler = sp804_timer_interrupt, .dev_id = &sp804_clockevent, }; void __init sp804_clockevents_init(void __iomem *base, unsigned int irq, const char *name) { struct clock_event_device *evt = &sp804_clockevent; long rate = sp804_get_clock_rate(name); if (rate < 0) return; clkevt_base = base; clkevt_reload = DIV_ROUND_CLOSEST(rate, HZ); evt->name = name; evt->irq = irq; setup_irq(irq, &sp804_timer_irq); clockevents_config_and_register(evt, rate, 0xf, 0xffffffff); }
在kernel提供好的sp804_clockevents_init方法中,实际上主要是注册了一个sp804_clockevent。
参数base是板级相关的timer基地址,irq是中断号。
然后关键的是通过setup_irq设置中断处理函数sp804_timer_irq,
以及clockevents_config_and_register注册clock_event_device。
sp804_timer_irq主要是调用了关联的sp804_clockevent的event_handler方法。
当然这个event_handler方法是在clockevents_register_device过程中初始化的
static irqreturn_t sp804_timer_interrupt(int irq, void *dev_id) { struct clock_event_device *evt = dev_id; /* clear the interrupt */ writel(1, clkevt_base + TIMER_INTCLR); //这个是在clockevents_config_and_register中初始化的方法。 evt->event_handler(evt); return IRQ_HANDLED; } void clockevents_config_and_register(struct clock_event_device *dev, u32 freq, unsigned long min_delta, unsigned long max_delta) { dev->min_delta_ticks = min_delta; dev->max_delta_ticks = max_delta; clockevents_config(dev, freq); clockevents_register_device(dev); } void clockevents_register_device(struct clock_event_device *dev) { unsigned long flags; BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); if (!dev->cpumask) { WARN_ON(num_possible_cpus() > 1); dev->cpumask = cpumask_of(smp_processor_id()); } raw_spin_lock_irqsave(&clockevents_lock, flags); list_add(&dev->list, &clockevent_devices); clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); clockevents_notify_released(); raw_spin_unlock_irqrestore(&clockevents_lock, flags); }
通过clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev)会通知调用tick_notifier来处理。
static struct notifier_block tick_notifier = { .notifier_call = tick_notify, }; static int tick_notify(struct notifier_block *nb, unsigned long reason, void *dev) { switch (reason) { case CLOCK_EVT_NOTIFY_ADD: return tick_check_new_device(dev); 。。。 }
新的clock_event_device加入的时候通过这个方法来初始化。
当然其实这是我们系统中的第一个clock_event_device。这个时候cpu1还处于wfi状态。
此时这个newdev不是cpu local device。每cpu的tick_cpu_device->evtdev的clock_event_device也指向的null。
关键的通过tick_setup_device设置这个newdev。
static int tick_check_new_device(struct clock_event_device *newdev) { struct clock_event_device *curdev; struct tick_device *td; int cpu, ret = NOTIFY_OK; unsigned long flags; raw_spin_lock_irqsave(&tick_device_lock, flags); cpu = smp_processor_id(); if (!cpumask_test_cpu(cpu, newdev->cpumask)) goto out_bc; td = &per_cpu(tick_cpu_device, cpu); curdev = td->evtdev; /* cpu local device ? */ if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) { /* * If the cpu affinity of the device interrupt can not * be set, ignore it. */ if (!irq_can_set_affinity(newdev->irq)) goto out_bc; /* * If we have a cpu local device already, do not replace it * by a non cpu local device */ if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu))) goto out_bc; } /* * If we have an active device, then check the rating and the oneshot * feature. */ if (curdev) { /* * Prefer one shot capable devices ! */ if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) && !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) goto out_bc; /* * Check the rating */ if (curdev->rating >= newdev->rating) goto out_bc; } /* * Replace the eventually existing device by the new * device. If the current device is the broadcast device, do * not give it back to the clockevents layer ! */ if (tick_is_broadcast_device(curdev)) { clockevents_shutdown(curdev); curdev = NULL; } clockevents_exchange_device(curdev, newdev); tick_setup_device(td, newdev, cpu, cpumask_of(cpu)); if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) tick_oneshot_notify(); raw_spin_unlock_irqrestore(&tick_device_lock, flags); return NOTIFY_STOP; out_bc: /* * Can the new device be used as a broadcast device ? */ if (tick_check_broadcast_device(newdev)) ret = NOTIFY_STOP; raw_spin_unlock_irqrestore(&tick_device_lock, flags); return ret; }
tick_setup_device中会将我们的sp804 timer作为每cpu的tick_cpu_device->evtdev.
由于原来的每cpu的tick_cpu_device->evtdev是空的,所以会初始化tick周期,设置下次tick时间,
并且设置当前cpu(cpu0)作为do_timer处理时间工作的cpu。
之后会通过tick_setup_periodic来设置clock_event_device的event_handler方法,
中断处理函数实际上就是调用的这个event_handler方法,到这里才设置的。
static void tick_setup_device(struct tick_device *td, struct clock_event_device *newdev, int cpu, const struct cpumask *cpumask) { ktime_t next_event; void (*handler)(struct clock_event_device *) = NULL; /* * First device setup ? */ if (!td->evtdev) { /* * If no cpu took the do_timer update, assign it to * this cpu: */ if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) { tick_do_timer_cpu = cpu; tick_next_period = ktime_get(); tick_period = ktime_set(0, NSEC_PER_SEC / HZ); } /* * Startup in periodic mode first. */ td->mode = TICKDEV_MODE_PERIODIC; } else { handler = td->evtdev->event_handler; next_event = td->evtdev->next_event; td->evtdev->event_handler = clockevents_handle_noop; } td->evtdev = newdev; /* * When the device is not per cpu, pin the interrupt to the * current cpu: */ if (!cpumask_equal(newdev->cpumask, cpumask)) irq_set_affinity(newdev->irq, cpumask); /* * When global broadcasting is active, check if the current * device is registered as a placeholder for broadcast mode. * This allows us to handle this x86 misfeature in a generic * way. */ if (tick_device_uses_broadcast(newdev, cpu)) return; if (td->mode == TICKDEV_MODE_PERIODIC) tick_setup_periodic(newdev, 0); else tick_setup_oneshot(newdev, handler, next_event); }
ick_set_periodic_handler将event_handler设置为tick_handle_periodic。
然后设置clock_event_device模式为CLOCK_EVT_MODE_PERIODIC)。
void tick_setup_periodic(struct clock_event_device *dev, int broadcast) { tick_set_periodic_handler(dev, broadcast); /* Broadcast setup ? */ if (!tick_device_is_functional(dev)) return; if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) && !tick_broadcast_oneshot_active()) { clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC); } else { unsigned long seq; ktime_t next; do { seq = read_seqbegin(&xtime_lock); next = tick_next_period; } while (read_seqretry(&xtime_lock, seq)); clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); for (; { if (!clockevents_program_event(dev, next, false)) return; next = ktime_add(next, tick_period); } } } void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast) { if (!broadcast) dev->event_handler = tick_handle_periodic; else dev->event_handler = tick_handle_periodic_broadcast; }
event_handler periodic中断处理函数。kernel的一个core方法。
tick_periodic是具体的处理。
void tick_handle_periodic(struct clock_event_device *dev) { int cpu = smp_processor_id(); ktime_t next; tick_periodic(cpu); if (dev->mode != CLOCK_EVT_MODE_ONESHOT) return; /* * Setup the next period for devices, which do not have * periodic mode: */ next = ktime_add(dev->next_event, tick_period); for (; { if (!clockevents_program_event(dev, next, false)) return; /* * Have to be careful here. If we're in oneshot mode, * before we call tick_periodic() in a loop, we need * to be sure we're using a real hardware clocksource. * Otherwise we could get trapped in an infinite * loop, as the tick_periodic() increments jiffies, * when then will increment time, posibly causing * the loop to trigger again and again. */ if (timekeeping_valid_for_hres()) tick_periodic(cpu); next = ktime_add(next, tick_period); } }
如果是cpu0的话,调用do_timer做更新时间等操作。
不管哪个cpu都要调用update_process_times更新cpu使用信息,以及处理timer wheel软中断。
static void tick_periodic(int cpu) { if (tick_do_timer_cpu == cpu) { write_seqlock(&xtime_lock); /* Keep track of the next tick event */ tick_next_period = ktime_add(tick_next_period, tick_period); do_timer(1); write_sequnlock(&xtime_lock); } update_process_times(user_mode(get_irq_regs())); profile_tick(CPU_PROFILING); } void do_timer(unsigned long ticks) { jiffies_64 += ticks; update_wall_time(); calc_global_load(ticks); }
如果当前cpu负责更新时间,则通过do_timer进行以下操作:
- 更新jiffies_64变量;
- 更新墙上时钟;
- 每10个tick,更新一次cpu的负载信息;
调用update_peocess_times,完成以下事情:
- 更新进程的时间统计信息;account_process_tick更新cpu统计信息。
- 触发TIMER_SOFTIRQ软件中断,以便系统处理传统的低分辨率定时器;
- 检查rcu的callback;
- 通过scheduler_tick触发调度系统进行进程统计和调度工作;
void update_process_times(int user_tick) { struct task_struct *p = current; int cpu = smp_processor_id(); /* Note: this timer irq context must be accounted for as well. */ account_process_tick(p, user_tick); run_local_timers(); rcu_check_callbacks(cpu, user_tick); printk_tick(); #ifdef CONFIG_IRQ_WORK if (in_irq()) irq_work_run(); #endif scheduler_tick(); run_posix_cpu_timers(p); }
到这里为止,cpu0的timer中断已经是准备好并且开始工作了。jiffies_64也在不断的增加了。
大概只需要10来个jiffies(HZ=100)之后,arm的local timer也参与到kernel中来了。
sched_clock_register
void __init sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) { ... struct clock_read_data rd; rd.read_sched_clock = read; ... update_clock_read_data(&rd); }
在 sched_clock_register 中,填充一个 struct clock_read_data 结构 rd,将传入的读取 system counter 的回调函数赋值给 rd.read_sched_clock,并更新到系统。
而在调度器频繁使用的 sched_clock() 中,正是调用了该回调函数以获取 system counter 的时间:
unsigned long long notrace sched_clock(void) { ... cyc = (rd->read_sched_clock() - rd->epoch_cyc) & rd->sched_clock_mask; ... }jiffies &
clocksource
static void __inithi3536_clocksource_init(void __iomem *base, const char *name)
{
long rate = sp804_get_clock_rate(name); //获取定时器时钟62.5MHz
struct clocksource *clksrc = &hi3536_clocksource.clksrc;
if (rate < 0)
return;
clksrc->name = name; //name=timer0
clksrc->rating = 200; //时钟源精度值
clksrc->read =hi3536_clocksource_read; //获取计数值,系统主要调用该接口转化为系统时间
clksrc->mask =CLOCKSOURCE_MASK(32), //计数值32位
clksrc->flags = CLOCK_SOURCE_IS_CONTINUOUS, //持续的时钟源
clksrc->resume = hi3536_clocksource_resume,
hi3536_clocksource.base = base;
hi3536_clocksource_start(base); //初始化寄存器
clocksource_register_hz(clksrc, rate); //计算出mult和shift,为系统选择更好的时钟源
setup_sched_clock(hi3536_sched_clock_read, 32, rate); //通用sched_clock模块,这个模块主要是提供一个sched_clock的接口函数,获取当前时间点和系统启动之间的纳秒值。
}
read函数注册
clocksource_mmio_init(clkevt->value, name, rate, 200, 32, clocksource_mmio_readl_down);
int __init clocksource_mmio_init(void __iomem *base, const char *name, unsigned long hz, int rating, unsigned bits, u64 (*read)(struct clocksource *)) { struct clocksource_mmio *cs; if (bits > 64 || bits < 16) return -EINVAL; cs = kzalloc(sizeof(struct clocksource_mmio), GFP_KERNEL); if (!cs) return -ENOMEM; cs->reg = base; cs->clksrc.name = name; cs->clksrc.rating = rating; cs->clksrc.read = read; cs->clksrc.mask = CLOCKSOURCE_MASK(bits); cs->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS; return clocksource_register_hz(&cs->clksrc, hz); }