RE: [PATCH 2/2] x86/hyperv: remove on-stack cpumask from hv_send_ipi_mask_allbutself

From: Michael Kelley
Date: Fri Sep 10 2021 - 13:25:26 EST


From: Wei Liu <wei.liu@xxxxxxxxxx> Sent: Wednesday, September 8, 2021 12:43 PM
>
> It is not a good practice to allocate a cpumask on stack, given it may
> consume up to 1 kilobytes of stack space if the kernel is configured to
> have 8192 cpus.
>
> The internal helper functions __send_ipi_mask{,_ex} need to loop over
> the provided mask anyway, so it is not too difficult to skip `self'
> there. We can thus do away with the on-stack cpumask in
> hv_send_ipi_mask_allbutself.
>
> Adjust call sites of __send_ipi_mask as needed.
>
> Reported-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
> Suggested-by: Michael Kelley <mikelley@xxxxxxxxxxxxx>
> Suggested-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
> Fixes: 68bb7bfb7985d ("X86/Hyper-V: Enable IPI enlightenments")
> Signed-off-by: Wei Liu <wei.liu@xxxxxxxxxx>
> ---
> arch/x86/hyperv/hv_apic.c | 32 ++++++++++++++++----------------
> 1 file changed, 16 insertions(+), 16 deletions(-)
>
> diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
> index 90e682a92820..911cd41d931c 100644
> --- a/arch/x86/hyperv/hv_apic.c
> +++ b/arch/x86/hyperv/hv_apic.c
> @@ -99,7 +99,8 @@ static void hv_apic_eoi_write(u32 reg, u32 val)
> /*
> * IPI implementation on Hyper-V.
> */
> -static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
> +static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
> + bool exclude_self)
> {
> struct hv_send_ipi_ex **arg;
> struct hv_send_ipi_ex *ipi_arg;
> @@ -123,7 +124,10 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
>
> if (!cpumask_equal(mask, cpu_present_mask)) {
> ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
> - nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
> + if (exclude_self)
> + nr_bank = cpumask_to_vpset_noself(&(ipi_arg->vp_set), mask);
> + else
> + nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
> }
> if (nr_bank < 0)
> goto ipi_mask_ex_done;
> @@ -138,9 +142,10 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
> return hv_result_success(status);
> }
>
> -static bool __send_ipi_mask(const struct cpumask *mask, int vector)
> +static bool __send_ipi_mask(const struct cpumask *mask, int vector,
> + bool exclude_self)
> {
> - int cur_cpu, vcpu;
> + int cur_cpu, vcpu, this_cpu = smp_processor_id();
> struct hv_send_ipi ipi_arg;
> u64 status;
>
> @@ -172,6 +177,8 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
> ipi_arg.cpu_mask = 0;
>
> for_each_cpu(cur_cpu, mask) {
> + if (exclude_self && cur_cpu == this_cpu)
> + continue;
> vcpu = hv_cpu_number_to_vp_number(cur_cpu);
> if (vcpu == VP_INVAL)
> return false;
> @@ -191,7 +198,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
> return hv_result_success(status);
>
> do_ex_hypercall:
> - return __send_ipi_mask_ex(mask, vector);
> + return __send_ipi_mask_ex(mask, vector, exclude_self);
> }

This all looks correct to me, except for one difference compared with the
current code. In the current code, if the cpumask passed to
hv_send_ipi_mask_allbutself() indicates only a single CPU that is "self",
__send_ipi_mask() will detect that the cpumask is now empty, and
correctly return success without making the hypercall. But
the new code will make the hypercall with an empty input mask (both
in the SEND_IPI and SEND_IPI_EX cases). The Hyper-V TLFS is silent
on whether such a hypercall is a no-op that returns success or is an
error. We'll have a problem if it is an error. I think the safest thing
is to enhance the cpumask_empty() test at the beginning of
__send_ipi_mask() to also detect the case where only a single CPU
is specified, and it is "self". This could be done using cpumask_weight()
and checking for zero as the "empty" case. Then check for "1", and if
exclude_self is set, check if it is the "self" CPU.

The check for VP number >= 64 could also give a false positive since
"self" isn't filtered out yet, but that's OK as all that will happen is using
the _ex version where the non-ex version would have worked.

>
> static bool __send_ipi_one(int cpu, int vector)
> @@ -208,7 +215,7 @@ static bool __send_ipi_one(int cpu, int vector)
> return false;
>
> if (vp >= 64)
> - return __send_ipi_mask_ex(cpumask_of(cpu), vector);
> + return __send_ipi_mask_ex(cpumask_of(cpu), vector, false);
>
> status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, vector, BIT_ULL(vp));
> return hv_result_success(status);
> @@ -222,20 +229,13 @@ static void hv_send_ipi(int cpu, int vector)
>
> static void hv_send_ipi_mask(const struct cpumask *mask, int vector)
> {
> - if (!__send_ipi_mask(mask, vector))
> + if (!__send_ipi_mask(mask, vector, false))
> orig_apic.send_IPI_mask(mask, vector);
> }
>
> static void hv_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
> {
> - unsigned int this_cpu = smp_processor_id();
> - struct cpumask new_mask;
> - const struct cpumask *local_mask;
> -
> - cpumask_copy(&new_mask, mask);
> - cpumask_clear_cpu(this_cpu, &new_mask);
> - local_mask = &new_mask;
> - if (!__send_ipi_mask(local_mask, vector))
> + if (!__send_ipi_mask(mask, vector, true))
> orig_apic.send_IPI_mask_allbutself(mask, vector);
> }
>
> @@ -246,7 +246,7 @@ static void hv_send_ipi_allbutself(int vector)
>
> static void hv_send_ipi_all(int vector)
> {
> - if (!__send_ipi_mask(cpu_online_mask, vector))
> + if (!__send_ipi_mask(cpu_online_mask, vector, false))
> orig_apic.send_IPI_all(vector);
> }
>
> --
> 2.30.2