Re: [PATCH] KVM: Pre-allocate 1 cpumask variable per cpu for both pv tlb and pv ipis

From: Wanpeng Li
Date: Tue Feb 04 2020 - 08:10:13 EST


Cc Thadeu,
On Tue, 4 Feb 2020 at 20:57, Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> wrote:
>
> Wanpeng Li <kernellwp@xxxxxxxxx> writes:
>
> > From: Wanpeng Li <wanpengli@xxxxxxxxxxx>
> >
> > Nick Desaulniers Reported:
> >
> > When building with:
> > $ make CC=clang arch/x86/ CFLAGS=-Wframe-larger-than=1000
> > The following warning is observed:
> > arch/x86/kernel/kvm.c:494:13: warning: stack frame size of 1064 bytes in
> > function 'kvm_send_ipi_mask_allbutself' [-Wframe-larger-than=]
> > static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int
> > vector)
> > ^
> > Debugging with:
> > https://github.com/ClangBuiltLinux/frame-larger-than
> > via:
> > $ python3 frame_larger_than.py arch/x86/kernel/kvm.o \
> > kvm_send_ipi_mask_allbutself
> > points to the stack allocated `struct cpumask newmask` in
> > `kvm_send_ipi_mask_allbutself`. The size of a `struct cpumask` is
> > potentially large, as it's CONFIG_NR_CPUS divided by BITS_PER_LONG for
> > the target architecture. CONFIG_NR_CPUS for X86_64 can be as high as
> > 8192, making a single instance of a `struct cpumask` 1024 B.
> >
> > This patch fixes it by pre-allocate 1 cpumask variable per cpu and use it for
> > both pv tlb and pv ipis..
> >
> > Reported-by: Nick Desaulniers <ndesaulniers@xxxxxxxxxx>
> > Acked-by: Nick Desaulniers <ndesaulniers@xxxxxxxxxx>
> > Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> > Cc: Nick Desaulniers <ndesaulniers@xxxxxxxxxx>
> > Signed-off-by: Wanpeng Li <wanpengli@xxxxxxxxxxx>
> > ---
> > arch/x86/kernel/kvm.c | 33 +++++++++++++++++++++------------
> > 1 file changed, 21 insertions(+), 12 deletions(-)
> >
> > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> > index 81045aab..b1e8efa 100644
> > --- a/arch/x86/kernel/kvm.c
> > +++ b/arch/x86/kernel/kvm.c
> > @@ -425,6 +425,8 @@ static void __init sev_map_percpu_data(void)
> > }
> > }
> >
> > +static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
> > +
> > #ifdef CONFIG_SMP
> > #define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
> >
> > @@ -490,12 +492,12 @@ static void kvm_send_ipi_mask(const struct
> > cpumask *mask, int vector)
> > static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask,
> > int vector)
> > {
> > unsigned int this_cpu = smp_processor_id();
> > - struct cpumask new_mask;
> > + struct cpumask *new_mask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
> > const struct cpumask *local_mask;
> >
> > - cpumask_copy(&new_mask, mask);
> > - cpumask_clear_cpu(this_cpu, &new_mask);
> > - local_mask = &new_mask;
> > + cpumask_copy(new_mask, mask);
> > + cpumask_clear_cpu(this_cpu, new_mask);
> > + local_mask = new_mask;
> > __send_ipi_mask(local_mask, vector);
> > }
> >
> > @@ -575,7 +577,6 @@ static void __init kvm_apf_trap_init(void)
> > update_intr_gate(X86_TRAP_PF, async_page_fault);
> > }
> >
> > -static DEFINE_PER_CPU(cpumask_var_t, __pv_tlb_mask);
> >
> > static void kvm_flush_tlb_others(const struct cpumask *cpumask,
> > const struct flush_tlb_info *info)
> > @@ -583,7 +584,7 @@ static void kvm_flush_tlb_others(const struct
> > cpumask *cpumask,
> > u8 state;
> > int cpu;
> > struct kvm_steal_time *src;
> > - struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_tlb_mask);
> > + struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
> >
> > cpumask_copy(flushmask, cpumask);
> > /*
> > @@ -624,6 +625,7 @@ static void __init kvm_guest_init(void)
> > kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
> > pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
> > pv_ops.mmu.tlb_remove_table = tlb_remove_table;
> > + pr_info("KVM setup pv remote TLB flush\n");
> > }
> >
> > if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
> > @@ -732,23 +734,30 @@ static __init int activate_jump_labels(void)
> > }
> > arch_initcall(activate_jump_labels);
> >
> > -static __init int kvm_setup_pv_tlb_flush(void)
> > +static __init int kvm_alloc_cpumask(void)
> > {
> > int cpu;
> > + bool alloc = false;
> >
> > if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
> > !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
> > - kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
> > + kvm_para_has_feature(KVM_FEATURE_STEAL_TIME))
> > + alloc = true;
> > +
> > +#if defined(CONFIG_SMP)
> > + if (!alloc && kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI))
>
> '!alloc' check is superfluous.
>
> > + alloc = true;
> > +#endif
> > +
> > + if (alloc)
> > for_each_possible_cpu(cpu) {
> > - zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
> > + zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu),
> > GFP_KERNEL, cpu_to_node(cpu));
> > }
> > - pr_info("KVM setup pv remote TLB flush\n");
> > - }
> >
> > return 0;
> > }
> > -arch_initcall(kvm_setup_pv_tlb_flush);
> > +arch_initcall(kvm_alloc_cpumask);
>
> Honestly, I'd simplify the check in kvm_alloc_cpumask() as
>
> if (!kvm_para_available())
> return;
>
> and allocated masks for all other cases.

This will waste the memory if pv tlb and pv ipis are not exposed which
are the only users currently.

Wanpeng