[PATCH 05/11] smp: replace smp_call_function_single_async() with smp_xcall_private()

From: Donghai Qiao
Date: Thu Apr 14 2022 - 22:47:38 EST


Replaced smp_call_function_single_async() with smp_xcall_private()
and also extended smp_xcall_private() to support one CPU synchronous
call with preallocated csd structures.

Ideally, the new interface smp_xcall() should be able to do what
smp_call_function_single_async() does. Because the csd is provided
and maintained by the callers, it exposes the risk of corrupting
the call_single_queue[cpu] linked list if the clents menipulate
their csd inappropriately. On the other hand, there should have no
noticeable performance advantage to provide preallocated csd for
cross call kernel consumers. Thus, in the long run, the consumers
should change to not use this type of preallocated csd.

Signed-off-by: Donghai Qiao <dqiao@xxxxxxxxxx>
---
include/linux/smp.h | 3 +-
kernel/smp.c | 163 +++++++++++++++++++++-----------------------
2 files changed, 81 insertions(+), 85 deletions(-)

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 12d6efef34f7..8a234e707f10 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -206,7 +206,8 @@ int smp_call_function_single(int cpuid, smp_call_func_t func, void *info,
void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
void *info, bool wait, const struct cpumask *mask);

-int smp_call_function_single_async(int cpu, struct __call_single_data *csd);
+#define smp_call_function_single_async(cpu, csd) \
+ smp_xcall_private(cpu, csd, XCALL_TYPE_ASYNC)

/*
* Cpus stopping functions in panic. All have default weak definitions.
diff --git a/kernel/smp.c b/kernel/smp.c
index 42ecaf960963..aef913b54f81 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -429,41 +429,6 @@ void __smp_call_single_queue(int cpu, struct llist_node *node)
send_call_function_single_ipi(cpu);
}

-/*
- * Insert a previously allocated call_single_data_t element
- * for execution on the given CPU. data must already have
- * ->func, ->info, and ->flags set.
- */
-static int generic_exec_single(int cpu, struct __call_single_data *csd)
-{
- if (cpu == smp_processor_id()) {
- smp_call_func_t func = csd->func;
- void *info = csd->info;
- unsigned long flags;
-
- /*
- * We can unlock early even for the synchronous on-stack case,
- * since we're doing this from the same CPU..
- */
- csd_lock_record(csd);
- csd_unlock(csd);
- local_irq_save(flags);
- func(info);
- csd_lock_record(NULL);
- local_irq_restore(flags);
- return 0;
- }
-
- if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) {
- csd_unlock(csd);
- return -ENXIO;
- }
-
- __smp_call_single_queue(cpu, &csd->node.llist);
-
- return 0;
-}
-
/**
* generic_smp_call_function_single_interrupt - Execute SMP IPI callbacks
*
@@ -661,52 +626,6 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
}
EXPORT_SYMBOL(smp_call_function_single);

-/**
- * smp_call_function_single_async() - Run an asynchronous function on a
- * specific CPU.
- * @cpu: The CPU to run on.
- * @csd: Pre-allocated and setup data structure
- *
- * Like smp_call_function_single(), but the call is asynchonous and
- * can thus be done from contexts with disabled interrupts.
- *
- * The caller passes his own pre-allocated data structure
- * (ie: embedded in an object) and is responsible for synchronizing it
- * such that the IPIs performed on the @csd are strictly serialized.
- *
- * If the function is called with one csd which has not yet been
- * processed by previous call to smp_call_function_single_async(), the
- * function will return immediately with -EBUSY showing that the csd
- * object is still in progress.
- *
- * NOTE: Be careful, there is unfortunately no current debugging facility to
- * validate the correctness of this serialization.
- *
- * Return: %0 on success or negative errno value on error
- */
-int smp_call_function_single_async(int cpu, struct __call_single_data *csd)
-{
- int err = 0;
-
- preempt_disable();
-
- if (csd->node.u_flags & CSD_FLAG_LOCK) {
- err = -EBUSY;
- goto out;
- }
-
- csd->node.u_flags = CSD_FLAG_LOCK;
- smp_wmb();
-
- err = generic_exec_single(cpu, csd);
-
-out:
- preempt_enable();
-
- return err;
-}
-EXPORT_SYMBOL_GPL(smp_call_function_single_async);
-
/*
* smp_call_function_any - Run a function on any of the given cpus
* @mask: The mask of cpus it can run on.
@@ -1251,16 +1170,92 @@ EXPORT_SYMBOL(smp_xcall_mask_cond);
* Because the call is asynchonous with a preallocated csd structure, thus
* it can be called from contexts with disabled interrupts.
*
- * Parameters
+ * Ideally this functionality should be part of smp_xcall_mask_cond().
+ * Because the csd is provided and maintained by the callers, merging this
+ * functionality into smp_xcall_mask_cond() will result in some extra
+ * complications in it. Before there is better way to facilitate all
+ * kinds of xcall, let's still handle this case with a separate function.
+ *
+ * The bit CSD_FLAG_LOCK will be set to csd->node.u_flags only if the
+ * xcall is made as type CSD_TYPE_SYNC or CSD_TYPE_ASYNC.
*
+ * Parameters:
* cpu: Must be a positive value less than nr_cpu_id.
* csd: The private csd provided by the caller.
- *
* Others: see smp_xcall().
+ *
+ * Return: %0 on success or negative errno value on error.
+ *
+ * The following comments are from smp_call_function_single_async():
+ *
+ * The call is asynchronous and can thus be done from contexts with
+ * disabled interrupts. If the function is called with one csd which
+ * has not yet been processed by previous call, the function will
+ * return immediately with -EBUSY showing that the csd object is
+ * still in progress.
+ *
+ * NOTE: Be careful, there is unfortunately no current debugging
+ * facility to validate the correctness of this serialization.
*/
int smp_xcall_private(int cpu, call_single_data_t *csd, unsigned int flags)
{
- return 0;
+ int err = 0;
+
+ if ((unsigned int)cpu >= nr_cpu_ids || !cpu_online(cpu)) {
+ pr_warn("cpu ID must be a positive number < nr_cpu_ids and must be currently online\n");
+ return -EINVAL;
+ }
+
+ if (csd == NULL) {
+ pr_warn("csd must not be NULL\n");
+ return -EINVAL;
+ }
+
+ preempt_disable();
+ if (csd->node.u_flags & CSD_FLAG_LOCK) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ /*
+ * CSD_FLAG_LOCK is set for CSD_TYPE_SYNC or CSD_TYPE_ASYNC only.
+ */
+ if ((flags & ~(CSD_TYPE_SYNC | CSD_TYPE_ASYNC)) == 0)
+ csd->node.u_flags = CSD_FLAG_LOCK | flags;
+ else
+ csd->node.u_flags = flags;
+
+ if (cpu == smp_processor_id()) {
+ smp_call_func_t func = csd->func;
+ void *info = csd->info;
+ unsigned long flags;
+
+ /*
+ * We can unlock early even for the synchronous on-stack case,
+ * since we're doing this from the same CPU..
+ */
+ csd_lock_record(csd);
+ csd_unlock(csd);
+ local_irq_save(flags);
+ func(info);
+ csd_lock_record(NULL);
+ local_irq_restore(flags);
+ goto out;
+ }
+
+ /*
+ * Ensure the flags are visible before the csd
+ * goes to the queue.
+ */
+ smp_wmb();
+
+ __smp_call_single_queue(cpu, &csd->node.llist);
+
+ if (flags & CSD_TYPE_SYNC)
+ csd_lock_wait(csd);
+out:
+ preempt_enable();
+ return err;
}
EXPORT_SYMBOL(smp_xcall_private);

--
2.27.0