RE: [PATCH v2 3/3] Drivers: hv: vmbus: Check for pending channel interrupts before taking a CPU offline

From: Michael Kelley
Date: Wed Apr 14 2021 - 14:43:28 EST


From: Andrea Parri (Microsoft) <parri.andrea@xxxxxxxxx> Sent: Wednesday, April 14, 2021 8:01 AM
>
> Check that enough time has passed such that the modify channel message
> has been processed before taking a CPU offline.
>
> Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@xxxxxxxxx>
> ---
> drivers/hv/hv.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 49 insertions(+)
>
> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
> index 3e6ff83adff42..dc9aa1130b22f 100644
> --- a/drivers/hv/hv.c
> +++ b/drivers/hv/hv.c
> @@ -15,6 +15,7 @@
> #include <linux/hyperv.h>
> #include <linux/random.h>
> #include <linux/clockchips.h>
> +#include <linux/delay.h>
> #include <linux/interrupt.h>
> #include <clocksource/hyperv_timer.h>
> #include <asm/mshyperv.h>
> @@ -292,6 +293,41 @@ void hv_synic_disable_regs(unsigned int cpu)
> disable_percpu_irq(vmbus_irq);
> }
>
> +#define HV_MAX_TRIES 3
> +/*
> + * Scan the event flags page of 'this' CPU looking for any bit that is set. If we find one
> + * bit set, then wait for a few milliseconds. Repeat these steps for a maximum of 3 times.
> + * Return 'true', if there is still any set bit after this operation; 'false', otherwise.
> + *
> + * If a bit is set, that means there is a pending channel interrupt. The expectation is
> + * that the normal interrupt handling mechanism will find and process the channel
> interrupt
> + * "very soon", and in the process clear the bit.
> + */
> +static bool hv_synic_event_pending(void)
> +{
> + struct hv_per_cpu_context *hv_cpu = this_cpu_ptr(hv_context.cpu_context);
> + union hv_synic_event_flags *event =
> + (union hv_synic_event_flags *)hv_cpu->synic_event_page +
> VMBUS_MESSAGE_SINT;
> + unsigned long *recv_int_page = event->flags; /* assumes VMBus version >=
> VERSION_WIN8 */
> + bool pending;
> + u32 relid;
> + int tries = 0;
> +
> +retry:
> + pending = false;
> + for_each_set_bit(relid, recv_int_page, HV_EVENT_FLAGS_COUNT) {
> + /* Special case - VMBus channel protocol messages */
> + if (relid == 0)
> + continue;
> + pending = true;
> + break;
> + }
> + if (pending && tries++ < HV_MAX_TRIES) {
> + usleep_range(10000, 20000);
> + goto retry;
> + }
> + return pending;
> +}
>
> int hv_synic_cleanup(unsigned int cpu)
> {
> @@ -336,6 +372,19 @@ int hv_synic_cleanup(unsigned int cpu)
> if (channel_found && vmbus_connection.conn_state == CONNECTED)
> return -EBUSY;
>
> + if (vmbus_proto_version >= VERSION_WIN10_V4_1) {
> + /*
> + * channel_found == false means that any channels that were previously
> + * assigned to the CPU have been reassigned elsewhere with a call of
> + * vmbus_send_modifychannel(). Scan the event flags page looking for
> + * bits that are set and waiting with a timeout for vmbus_chan_sched()
> + * to process such bits. If bits are still set after this operation
> + * and VMBus is connected, fail the CPU offlining operation.
> + */
> + if (hv_synic_event_pending() && vmbus_connection.conn_state == CONNECTED)
> + return -EBUSY;
> + }
> +

Perhaps the test for conn_state == CONNECTED could be factored out as follows. If we're
not CONNECTED (i.e., in the panic or kexec path) we should be able to also skip the search
for channels that are bound to the CPU since we will ignore the result anyway.

if (vmbus_connection.conn_state != CONNECTED)
goto always_cleanup;

if (cpu == VMBUS_CONNECT_CPU)
return -EBUSY;

[Code to search for channels that are bound to the CPU we're about to clean up]

if (channel_found)
return -EBUSY;

/*
* channel_found == false means that any channels that were previously
* assigned to the CPU have been reassigned elsewhere with a call of
* vmbus_send_modifychannel(). Scan the event flags page looking for
* bits that are set and waiting with a timeout for vmbus_chan_sched()
* to process such bits. If bits are still set after this operation
* and VMBus is connected, fail the CPU offlining operation.
*/
if (vmbus_proto_version >= VERSION_WIN10_V4_1 && hv_synic_event_pending())
return -EBUSY;

always_cleanup:

> hv_stimer_legacy_cleanup(cpu);
>
> hv_synic_disable_regs(cpu);
> --
> 2.25.1