Re: tip.today - scheduler bam boom crash (cpu hotplug)

From: Paolo Bonzini
Date: Mon Feb 27 2017 - 10:08:10 EST




On 27/02/2017 13:30, Wanpeng Li wrote:
> This results in sched clock always unstable for kvm guest since there
> is no invariant tsc cpuid bit exposed for kvm guest currently. The
> blockage happened for several reasons:
>
> 1) Migration: to host with different TSC frequency.
> 2) Savevm: It is not safe to use the TSC for wall clock timer services.

Right, the purpose of kvmclock is basically to ensure a stable clock
without using TSC. However, I know zilch about kernel/sched/clock.c so
I cannot really understand your patch.

Paolo

> How about something like below(untested):
>
> diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
> index bae6ea6..a61c477 100644
> --- a/arch/x86/kernel/kvmclock.c
> +++ b/arch/x86/kernel/kvmclock.c
> @@ -115,6 +115,7 @@ static inline void kvm_sched_clock_init(bool stable)
>
> kvm_sched_clock_offset = kvm_clock_read();
> pv_time_ops.sched_clock = kvm_sched_clock_read;
> + hypervisor_sched_clock_stable();
>
> printk(KERN_INFO "kvm-clock: using sched offset of %llu cycles\n",
> kvm_sched_clock_offset);
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 451e241..38c6edb 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -2499,6 +2499,10 @@ static inline void clear_sched_clock_stable(void)
> {
> }
>
> +static inline void hypervisor_sched_clock_stable(void)
> +{
> +}
> +
> static inline void sched_clock_idle_sleep_event(void)
> {
> }
> @@ -2526,6 +2530,7 @@ extern void sched_clock_init_late(void);
> */
> extern int sched_clock_stable(void);
> extern void clear_sched_clock_stable(void);
> +extern void hypervisor_sched_clock_stable(void);
>
> extern void sched_clock_tick(void);
> extern void sched_clock_idle_sleep_event(void);
> diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
> index ad64efe..a46639e 100644
> --- a/kernel/sched/clock.c
> +++ b/kernel/sched/clock.c
> @@ -77,9 +77,15 @@ EXPORT_SYMBOL_GPL(sched_clock);
>
> __read_mostly int sched_clock_running;
>
> +enum {
> + SCHED_CLOCK_INIT = 0,
> + HYPERVISOR_SCHED_CLOCK_STABLE,
> + SCHED_CLOCK_INIT_LATE
> +};
> +
> void sched_clock_init(void)
> {
> - sched_clock_running = 1;
> + sched_clock_running = SCHED_CLOCK_INIT;
> }
>
> #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
> @@ -170,13 +176,14 @@ void clear_sched_clock_stable(void)
>
> smp_mb(); /* matches sched_clock_init_late() */
>
> - if (sched_clock_running == 2)
> + if (sched_clock_running == SCHED_CLOCK_INIT_LATE)
> schedule_work(&sched_clock_work);
> }
>
> void sched_clock_init_late(void)
> {
> - sched_clock_running = 2;
> + if (sched_clock_running == SCHED_CLOCK_INIT)
> + sched_clock_running = SCHED_CLOCK_INIT_LATE;
> /*
> * Ensure that it is impossible to not do a static_key update.
> *
> @@ -186,8 +193,15 @@ void sched_clock_init_late(void)
> */
> smp_mb(); /* matches {set,clear}_sched_clock_stable() */
>
> - if (__sched_clock_stable_early)
> + if (__sched_clock_stable_early ||
> + sched_clock_running == HYPERVISOR_SCHED_CLOCK_STABLE) {
> __set_sched_clock_stable();
> + }
> +}
> +
> +void hypervisor_sched_clock_stable()
> +{
> + sched_clock_running = HYPERVISOR_SCHED_CLOCK_STABLE;
> }
>
> /*