[PATCH 04/11] smp: replace smp_call_function_single() with smp_xcall()

From: Donghai Qiao
Date: Thu Apr 14 2022 - 22:47:28 EST


Eliminated the percpu global csd_data and temporarily hook up
to smp_xcall().

There is no obvious reason or evidence that the differentiation
of xcall of single recipient from that of multiple recipients
can exploit noticeable performance gaining. If something can be
optimized on this matter, it might be from the interrupt level
which has been already addressed by arch_send_call_function_single_ipi()
and arch_send_call_function_ipi_mask(). In fact, both have been
taken in to account from smp_call_function_many_cond().

So, it is appropriate to make this change as part of the cross
call interface.

Signed-off-by: Donghai Qiao <dqiao@xxxxxxxxxx>
---
kernel/smp.c | 74 ++++++++++++++++++----------------------------------
1 file changed, 25 insertions(+), 49 deletions(-)

diff --git a/kernel/smp.c b/kernel/smp.c
index 3f9bc5ae7180..42ecaf960963 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -399,8 +399,6 @@ static __always_inline void csd_unlock(struct __call_single_data *csd)
smp_store_release(&csd->node.u_flags, 0);
}

-static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
-
void __smp_call_single_queue(int cpu, struct llist_node *node)
{
#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
@@ -634,6 +632,9 @@ void flush_smp_call_function_from_idle(void)
}

/*
+ * This is a temporarily hook up. This function will be eliminated
+ * with the last patch in this series.
+ *
* smp_call_function_single - Run a function on a specific CPU
* @func: The function to run. This must be fast and non-blocking.
* @info: An arbitrary pointer to pass to the function.
@@ -642,59 +643,21 @@ void flush_smp_call_function_from_idle(void)
* Returns 0 on success, else a negative status code.
*/
int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
- int wait)
+ int wait)
{
- call_single_data_t *csd;
- call_single_data_t csd_stack = {
- .node = { .u_flags = CSD_FLAG_LOCK | CSD_TYPE_SYNC, },
- };
- int this_cpu;
- int err;
-
- /*
- * prevent preemption and reschedule on another processor,
- * as well as CPU removal
- */
- this_cpu = get_cpu();
-
- /*
- * Can deadlock when called with interrupts disabled.
- * We allow cpu's that are not yet online though, as no one else can
- * send smp call function interrupt to this cpu and as such deadlocks
- * can't happen.
- */
- WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
- && !oops_in_progress);
-
- /*
- * When @wait we can deadlock when we interrupt between llist_add() and
- * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
- * csd_lock() on because the interrupt context uses the same csd
- * storage.
- */
- WARN_ON_ONCE(!in_task());
-
- csd = &csd_stack;
- if (!wait) {
- csd = this_cpu_ptr(&csd_data);
- csd_lock(csd);
- }
-
- csd->func = func;
- csd->info = info;
-#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
- csd->node.src = smp_processor_id();
- csd->node.dst = cpu;
-#endif
+ unsigned int flags = 0;

- err = generic_exec_single(cpu, csd);
+ if ((unsigned int)cpu >= nr_cpu_ids || !cpu_online(cpu))
+ return -ENXIO;

if (wait)
- csd_lock_wait(csd);
+ flags = XCALL_TYPE_SYNC;
+ else
+ flags = XCALL_TYPE_ASYNC;

- put_cpu();
+ smp_xcall(cpu, func, info, flags);

- return err;
+ return 0;
}
EXPORT_SYMBOL(smp_call_function_single);

@@ -1159,6 +1122,19 @@ void __smp_call_mask_cond(const struct cpumask *mask,
smp_cond_func_t cond_func,
unsigned int flags)
{
+ bool wait = false;
+
+ if (flags == XCALL_TYPE_SYNC)
+ wait = true;
+
+ preempt_disable();
+
+ /*
+ * This is temporarily hook. The function smp_call_function_many_cond()
+ * will be inlined here with the last patch in this series.
+ */
+ smp_call_function_many_cond(mask, func, info, wait, cond_func);
+ preempt_enable();
}

/*
--
2.27.0