Re: [PATCH v2] KVM: LAPIC: Periodically revaluate to get conservative lapic_timer_advance_ns

From: Wanpeng Li
Date: Wed Aug 21 2019 - 22:38:32 EST


ping,
On Thu, 15 Aug 2019 at 12:03, Wanpeng Li <kernellwp@xxxxxxxxx> wrote:
>
> From: Wanpeng Li <wanpengli@xxxxxxxxxxx>
>
> Even if for realtime CPUs, cache line bounces, frequency scaling, presence
> of higher-priority RT tasks, etc can still cause different response. These
> interferences should be considered and periodically revaluate whether
> or not the lapic_timer_advance_ns value is the best, do nothing if it is,
> otherwise recaluate again. Set lapic_timer_advance_ns to the minimal
> conservative value from all the estimated values.
>
> Testing on Skylake server, cat vcpu*/lapic_timer_advance_ns, before patch:
> 1628
> 4161
> 4321
> 3236
> ...
>
> Testing on Skylake server, cat vcpu*/lapic_timer_advance_ns, after patch:
> 1553
> 1499
> 1509
> 1489
> ...
>
> Testing on Haswell desktop, cat vcpu*/lapic_timer_advance_ns, before patch:
> 4617
> 3641
> 4102
> 4577
> ...
> Testing on Haswell desktop, cat vcpu*/lapic_timer_advance_ns, after patch:
> 2775
> 2892
> 2764
> 2775
> ...
>
> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> Cc: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
> Signed-off-by: Wanpeng Li <wanpengli@xxxxxxxxxxx>
> ---
> arch/x86/kvm/lapic.c | 34 ++++++++++++++++++++++++++++------
> arch/x86/kvm/lapic.h | 2 ++
> 2 files changed, 30 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index df5cd07..8487d9c 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -69,6 +69,7 @@
> #define LAPIC_TIMER_ADVANCE_ADJUST_INIT 1000
> /* step-by-step approximation to mitigate fluctuation */
> #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
> +#define LAPIC_TIMER_ADVANCE_RECALC_PERIOD (600 * HZ)
>
> static inline int apic_test_vector(int vec, void *bitmap)
> {
> @@ -1480,10 +1481,21 @@ static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
> static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
> s64 advance_expire_delta)
> {
> - struct kvm_lapic *apic = vcpu->arch.apic;
> - u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
> + struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer;
> + u32 timer_advance_ns = ktimer->timer_advance_ns;
> u64 ns;
>
> + /* periodic revaluate */
> + if (unlikely(ktimer->timer_advance_adjust_done)) {
> + ktimer->recalc_timer_advance_ns = jiffies +
> + LAPIC_TIMER_ADVANCE_RECALC_PERIOD;
> + if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_DONE) {
> + timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
> + ktimer->timer_advance_adjust_done = false;
> + } else
> + return;
> + }
> +
> /* too early */
> if (advance_expire_delta < 0) {
> ns = -advance_expire_delta * 1000000ULL;
> @@ -1499,12 +1511,18 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
> }
>
> if (abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
> - apic->lapic_timer.timer_advance_adjust_done = true;
> + ktimer->timer_advance_adjust_done = true;
> if (unlikely(timer_advance_ns > 5000)) {
> timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
> - apic->lapic_timer.timer_advance_adjust_done = false;
> + ktimer->timer_advance_adjust_done = false;
> + }
> + ktimer->timer_advance_ns = timer_advance_ns;
> +
> + if (ktimer->timer_advance_adjust_done) {
> + if (ktimer->min_timer_advance_ns > timer_advance_ns)
> + ktimer->min_timer_advance_ns = timer_advance_ns;
> + ktimer->timer_advance_ns = ktimer->min_timer_advance_ns;
> }
> - apic->lapic_timer.timer_advance_ns = timer_advance_ns;
> }
>
> static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
> @@ -1523,7 +1541,8 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
> if (guest_tsc < tsc_deadline)
> __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
>
> - if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
> + if (unlikely(!apic->lapic_timer.timer_advance_adjust_done) ||
> + time_before(apic->lapic_timer.recalc_timer_advance_ns, jiffies))
> adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
> }
>
> @@ -2301,9 +2320,12 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
> if (timer_advance_ns == -1) {
> apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
> apic->lapic_timer.timer_advance_adjust_done = false;
> + apic->lapic_timer.recalc_timer_advance_ns = jiffies;
> + apic->lapic_timer.min_timer_advance_ns = UINT_MAX;
> } else {
> apic->lapic_timer.timer_advance_ns = timer_advance_ns;
> apic->lapic_timer.timer_advance_adjust_done = true;
> + apic->lapic_timer.recalc_timer_advance_ns = MAX_JIFFY_OFFSET;
> }
>
>
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index 50053d2..56a05eb 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -31,6 +31,8 @@ struct kvm_timer {
> u32 timer_mode_mask;
> u64 tscdeadline;
> u64 expired_tscdeadline;
> + unsigned long recalc_timer_advance_ns;
> + u32 min_timer_advance_ns;
> u32 timer_advance_ns;
> s64 advance_expire_delta;
> atomic_t pending; /* accumulated triggered timers */
> --
> 2.7.4
>