arm 时间系统 1-冯金伟博客园

kernel:3.6

硬件:
一般soc会有多个sp804外部timer,假设现在timer0作全部时钟设备,timer1作为clocksource。
arm smp local timer。

核心数据结构对象:
1. struct clock_event_device 时钟设备抽象类型,其中set_next_event可以设置下次中断时间,
2. event_handler是中断处理回调一般是tick_handle_periodic或hrtimer_interrupt核心方法。
sp804 timer0, smp local timer都会抽象成clock_event_device并且注册到系统中。
3. struct clocksource 时钟源抽象类型,通过read方法可以读取当前时间。系统中sp804 timer1,
纯软件的jiffies抽象成clocksource注册到系统中。
4. struct timekeeper timekeeper全局变量,管理clocksource,配合保存的xtime值提供读取
当前时间功能。
5. struct timespec xtime全局变量,保存了当前时间,在tick中断时更新。

整体流程:
1.start_kernel init_timers()/hrtimers_init() 初始化wheel timer和hrtimer。
2.start_kernel->timekeeping_init() 初始化timekeeper结构,并且clock=jiffies clocksource。
3.start_kernel->time_init() 板级代码注册sp804 timer0 clock_event_device。
初始化sp804 timer,注册中断设置每cpu的tick_cpu_device->evtdev为sp804 timer。之后中断就生效了。
4.start_kernel->time_init() 板级代码注册sp804 timer1 clocksource。
5.start_kernel->reset_init()…kernel_init()->smp_prepare_cpus() 注册cpu0 local timer clock_event_device。
其中会close 之前的sp804timer。之后localtimer中断就生效了(timer0中断不会有了)。
设置每cpu的tick_cpu_device->evtdev为local timer。
6.secondary_start_kernel()->percpu_timer_setup() 注册cpuX local timer clock_event_device。同cpu0。
7.do_init_calls()->init_jiffies_clocksource()注册jiffies clocksource。
触发timekeeper的clock设置为优先级更高的sp804 timer1 clocksource。
从而后面timer软中断就会从periodic模式切换为oneshot模式。
8.cpu0,cpuX的TIMER_SOFTIRQ软中断,会将每cpu的tick_cpu_device->evtdev模式设置为oneshot模式,
event_handler方法也变成了hrtimer_interrupt。

clock_event_device注册

static struct clock_event_device sp804_clockevent = {
        .features       = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
        .set_mode        = sp804_set_mode,
        .set_next_event        = sp804_set_next_event,
        .rating                = 300,
        .cpumask        = cpu_all_mask,
};
static struct irqaction sp804_timer_irq = {
        .name                = "timer",
        .flags                = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
        .handler        = sp804_timer_interrupt,
        .dev_id                = &sp804_clockevent,
};
void __init sp804_clockevents_init(void __iomem *base, unsigned int irq, const char *name)
{
        struct clock_event_device *evt = &sp804_clockevent;
        long rate = sp804_get_clock_rate(name);

        if (rate < 0)
                return;

        clkevt_base = base;
        clkevt_reload = DIV_ROUND_CLOSEST(rate, HZ);
        evt->name = name;
        evt->irq = irq;

        setup_irq(irq, &sp804_timer_irq);
        clockevents_config_and_register(evt, rate, 0xf, 0xffffffff);
}

在kernel提供好的sp804_clockevents_init方法中,实际上主要是注册了一个sp804_clockevent。
参数base是板级相关的timer基地址,irq是中断号。
然后关键的是通过setup_irq设置中断处理函数sp804_timer_irq,
以及clockevents_config_and_register注册clock_event_device。
sp804_timer_irq主要是调用了关联的sp804_clockevent的event_handler方法。
当然这个event_handler方法是在clockevents_register_device过程中初始化的

static irqreturn_t sp804_timer_interrupt(int irq, void *dev_id)
{
        struct clock_event_device *evt = dev_id;
        /* clear the interrupt */
        writel(1, clkevt_base + TIMER_INTCLR);
        //这个是在clockevents_config_and_register中初始化的方法。
        evt->event_handler(evt);
        return IRQ_HANDLED;
}

void clockevents_config_and_register(struct clock_event_device *dev,
                                     u32 freq, unsigned long min_delta,
                                     unsigned long max_delta)
{
        dev->min_delta_ticks = min_delta;
        dev->max_delta_ticks = max_delta;
        clockevents_config(dev, freq);
        clockevents_register_device(dev);
}
void clockevents_register_device(struct clock_event_device *dev)
{
        unsigned long flags;

        BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
        if (!dev->cpumask) {
                WARN_ON(num_possible_cpus() > 1);
                dev->cpumask = cpumask_of(smp_processor_id());
        }

        raw_spin_lock_irqsave(&clockevents_lock, flags);

        list_add(&dev->list, &clockevent_devices);
        clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
        clockevents_notify_released();

        raw_spin_unlock_irqrestore(&clockevents_lock, flags);
}

通过clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev)会通知调用tick_notifier来处理。

static struct notifier_block tick_notifier = {
        .notifier_call = tick_notify,
};
static int tick_notify(struct notifier_block *nb, unsigned long reason,
                               void *dev)
{
        switch (reason) {

        case CLOCK_EVT_NOTIFY_ADD:
                return tick_check_new_device(dev);
        。。。
}

新的clock_event_device加入的时候通过这个方法来初始化。
当然其实这是我们系统中的第一个clock_event_device。这个时候cpu1还处于wfi状态。
此时这个newdev不是cpu local device。每cpu的tick_cpu_device->evtdev的clock_event_device也指向的null。
关键的通过tick_setup_device设置这个newdev。

static int tick_check_new_device(struct clock_event_device *newdev)
{
        struct clock_event_device *curdev;
        struct tick_device *td;
        int cpu, ret = NOTIFY_OK;
        unsigned long flags;

        raw_spin_lock_irqsave(&tick_device_lock, flags);

        cpu = smp_processor_id();
        if (!cpumask_test_cpu(cpu, newdev->cpumask))
                goto out_bc;

        td = &per_cpu(tick_cpu_device, cpu);
        curdev = td->evtdev;

        /* cpu local device ? */
        if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) {

                /*
                 * If the cpu affinity of the device interrupt can not
                 * be set, ignore it.
                 */
                if (!irq_can_set_affinity(newdev->irq))
                        goto out_bc;

                /*
                 * If we have a cpu local device already, do not replace it
                 * by a non cpu local device
                 */
                if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu)))
                        goto out_bc;
        }

        /*
         * If we have an active device, then check the rating and the oneshot
         * feature.
         */
        if (curdev) {
                /*
                 * Prefer one shot capable devices !
                 */
                if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) &&
                    !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
                        goto out_bc;
                /*
                 * Check the rating
                 */
                if (curdev->rating >= newdev->rating)
                        goto out_bc;
        }

        /*
         * Replace the eventually existing device by the new
         * device. If the current device is the broadcast device, do
         * not give it back to the clockevents layer !
         */
        if (tick_is_broadcast_device(curdev)) {
                clockevents_shutdown(curdev);
                curdev = NULL;
        }
        clockevents_exchange_device(curdev, newdev);
        tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
        if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
                tick_oneshot_notify();

        raw_spin_unlock_irqrestore(&tick_device_lock, flags);
        return NOTIFY_STOP;

out_bc:
        /*
         * Can the new device be used as a broadcast device ?
         */
        if (tick_check_broadcast_device(newdev))
                ret = NOTIFY_STOP;

        raw_spin_unlock_irqrestore(&tick_device_lock, flags);

        return ret;
}

tick_setup_device中会将我们的sp804 timer作为每cpu的tick_cpu_device->evtdev.
由于原来的每cpu的tick_cpu_device->evtdev是空的,所以会初始化tick周期,设置下次tick时间,
并且设置当前cpu(cpu0)作为do_timer处理时间工作的cpu。
之后会通过tick_setup_periodic来设置clock_event_device的event_handler方法,
中断处理函数实际上就是调用的这个event_handler方法,到这里才设置的。

static void tick_setup_device(struct tick_device *td,
                              struct clock_event_device *newdev, int cpu,
                              const struct cpumask *cpumask)
{
        ktime_t next_event;
        void (*handler)(struct clock_event_device *) = NULL;

        /*
         * First device setup ?
         */
        if (!td->evtdev) {
                /*
                 * If no cpu took the do_timer update, assign it to
                 * this cpu:
                 */
                if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
                        tick_do_timer_cpu = cpu;
                        tick_next_period = ktime_get();
                        tick_period = ktime_set(0, NSEC_PER_SEC / HZ);
                }

                /*
                 * Startup in periodic mode first.
                 */
                td->mode = TICKDEV_MODE_PERIODIC;
        } else {
                handler = td->evtdev->event_handler;
                next_event = td->evtdev->next_event;
                td->evtdev->event_handler = clockevents_handle_noop;
        }

        td->evtdev = newdev;

        /*
         * When the device is not per cpu, pin the interrupt to the
         * current cpu:
         */
        if (!cpumask_equal(newdev->cpumask, cpumask))
                irq_set_affinity(newdev->irq, cpumask);

        /*
         * When global broadcasting is active, check if the current
         * device is registered as a placeholder for broadcast mode.
         * This allows us to handle this x86 misfeature in a generic
         * way.
         */
        if (tick_device_uses_broadcast(newdev, cpu))
                return;

        if (td->mode == TICKDEV_MODE_PERIODIC)
                tick_setup_periodic(newdev, 0);
        else
                tick_setup_oneshot(newdev, handler, next_event);
}

ick_set_periodic_handler将event_handler设置为tick_handle_periodic。
然后设置clock_event_device模式为CLOCK_EVT_MODE_PERIODIC)。

void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
{
        tick_set_periodic_handler(dev, broadcast);

        /* Broadcast setup ? */
        if (!tick_device_is_functional(dev))
                return;

        if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
            !tick_broadcast_oneshot_active()) {
                clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);
        } else {
                unsigned long seq;
                ktime_t next;

                do {
                        seq = read_seqbegin(&xtime_lock);
                        next = tick_next_period;
                } while (read_seqretry(&xtime_lock, seq));

                clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);

                for (;   {
                        if (!clockevents_program_event(dev, next, false))
                                return;
                        next = ktime_add(next, tick_period);
                }
        }
}
void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
{
        if (!broadcast)
                dev->event_handler = tick_handle_periodic;
        else
                dev->event_handler = tick_handle_periodic_broadcast;
}

event_handler periodic中断处理函数。kernel的一个core方法。
tick_periodic是具体的处理。

void tick_handle_periodic(struct clock_event_device *dev)
{
        int cpu = smp_processor_id();
        ktime_t next;

        tick_periodic(cpu);

        if (dev->mode != CLOCK_EVT_MODE_ONESHOT)
                return;
        /*
         * Setup the next period for devices, which do not have
         * periodic mode:
         */
        next = ktime_add(dev->next_event, tick_period);
        for (;   {
                if (!clockevents_program_event(dev, next, false))
                        return;
                /*
                 * Have to be careful here. If we're in oneshot mode,
                 * before we call tick_periodic() in a loop, we need
                 * to be sure we're using a real hardware clocksource.
                 * Otherwise we could get trapped in an infinite
                 * loop, as the tick_periodic() increments jiffies,
                 * when then will increment time, posibly causing
                 * the loop to trigger again and again.
                 */
                if (timekeeping_valid_for_hres())
                        tick_periodic(cpu);
                next = ktime_add(next, tick_period);
        }
}

如果是cpu0的话,调用do_timer做更新时间等操作。
不管哪个cpu都要调用update_process_times更新cpu使用信息,以及处理timer wheel软中断。

static void tick_periodic(int cpu)
{
        if (tick_do_timer_cpu == cpu) {
                write_seqlock(&xtime_lock);

                /* Keep track of the next tick event */
                tick_next_period = ktime_add(tick_next_period, tick_period);

                do_timer(1);
                write_sequnlock(&xtime_lock);
        }

        update_process_times(user_mode(get_irq_regs()));
        profile_tick(CPU_PROFILING);
}


void do_timer(unsigned long ticks)
{
        jiffies_64 += ticks;
        update_wall_time();
        calc_global_load(ticks);
}

如果当前cpu负责更新时间,则通过do_timer进行以下操作:

更新jiffies_64变量;
更新墙上时钟;
每10个tick,更新一次cpu的负载信息;

调用update_peocess_times,完成以下事情:

更新进程的时间统计信息;account_process_tick更新cpu统计信息。
触发TIMER_SOFTIRQ软件中断,以便系统处理传统的低分辨率定时器;
检查rcu的callback;
通过scheduler_tick触发调度系统进行进程统计和调度工作;

void update_process_times(int user_tick)
{
        struct task_struct *p = current;
        int cpu = smp_processor_id();

        /* Note: this timer irq context must be accounted for as well. */
        account_process_tick(p, user_tick);
        run_local_timers();
        rcu_check_callbacks(cpu, user_tick);
        printk_tick();
#ifdef CONFIG_IRQ_WORK
        if (in_irq())
                irq_work_run();
#endif
        scheduler_tick();
        run_posix_cpu_timers(p);
}

到这里为止,cpu0的timer中断已经是准备好并且开始工作了。jiffies_64也在不断的增加了。
大概只需要10来个jiffies(HZ=100)之后,arm的local timer也参与到kernel中来了。

 sched_clock_register

void __init
sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
{
    ...
    struct clock_read_data rd;
    rd.read_sched_clock = read;
    ...
    update_clock_read_data(&rd);
}

在 sched_clock_register 中,填充一个 struct clock_read_data 结构 rd,将传入的读取 system counter 的回调函数赋值给 rd.read_sched_clock,并更新到系统。

而在调度器频繁使用的 sched_clock() 中,正是调用了该回调函数以获取 system counter 的时间:

unsigned long long notrace sched_clock(void)
{
    ...
        cyc = (rd->read_sched_clock() - rd->epoch_cyc) &
              rd->sched_clock_mask;
    ...
}



jiffies & clocksource

static void __inithi3536_clocksource_init(void __iomem *base, const char *name)                                                                 


{                                                                                                                                                


   long rate = sp804_get_clock_rate(name);  //获取定时器时钟62.5MHz      


   struct clocksource *clksrc = &hi3536_clocksource.clksrc;   


   


   if (rate < 0)                                                                                                                                


       return;                                                                                                                                  


        


   clksrc->name   = name;  //name=timer0        


   clksrc->rating = 200; //时钟源精度值            


   clksrc->read   =hi3536_clocksource_read;   //获取计数值,系统主要调用该接口转化为系统时间         


   clksrc->mask   =CLOCKSOURCE_MASK(32),  //计数值32位


   clksrc->flags  = CLOCK_SOURCE_IS_CONTINUOUS, //持续的时钟源


   clksrc->resume = hi3536_clocksource_resume,      


   hi3536_clocksource.base = base;       


   hi3536_clocksource_start(base);  //初始化寄存器


    clocksource_register_hz(clksrc, rate);  //计算出mult和shift,为系统选择更好的时钟源  


   setup_sched_clock(hi3536_sched_clock_read, 32, rate); //通用sched_clock模块,这个模块主要是提供一个sched_clock的接口函数,获取当前时间点和系统启动之间的纳秒值。


}

 

read函数注册

clocksource_mmio_init(clkevt->value, name,
        rate, 200, 32, clocksource_mmio_readl_down);
int __init clocksource_mmio_init(void __iomem *base, const char *name,
        unsigned long hz, int rating, unsigned bits,
        u64 (*read)(struct clocksource *))
{
        struct clocksource_mmio *cs;

        if (bits > 64 || bits < 16)
                return -EINVAL;

        cs = kzalloc(sizeof(struct clocksource_mmio), GFP_KERNEL);
        if (!cs)
                return -ENOMEM;

        cs->reg = base;
        cs->clksrc.name = name;
        cs->clksrc.rating = rating;
        cs->clksrc.read = read;
        cs->clksrc.mask = CLOCKSOURCE_MASK(bits);
        cs->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS;

        return clocksource_register_hz(&cs->clksrc, hz);
}
 

jiffies & clocksource

ARM64 芯片的 Jiffies 更新流程

[时钟管理] arm 时间系统 1