RE: [PATCH] rcu: Fix bind rcu related kthreads to housekeeping CPUs

From: Zhang, Qiang1
Date: Wed Feb 08 2023 - 21:50:44 EST



>Did the testing and checked the CPU affinities of RCU kthreads by running "ps -eo psr,comm| grep rcu" [1]:
>
>Without Zqiang's patch:
> ...
> 0 rcu_preempt
> 0 rcuog/0
> 0 rcuop/0
> 0 rcuop/1
> 0 rcuog/2
> 0 rcuop/2
> 2 rcuop/3
>
>With Zqiang's patch:
> ...
> 3 rcu_preempt // on housekeeping CPU 3
> 2 rcuog/0 // on housekeeping CPU 2
> 3 rcuop/0 // on housekeeping CPU 3
> 3 rcuop/1 // on housekeeping CPU 3
> 0 rcuog/2 // on non-housekeeping CPU 0. [2]
> 0 rcuop/2 // on non-housekeeping CPU 0. [2]
> 2 rcuop/3
>
>[1] The 1st column of the output is for CPU IDs and the 2nd column is for thread names.
>[2] Added debug messages into the two threads, and found that the two threads didn't run after all CPUs were online.
> So if they run again after all CPUs are online, they will also be moved to housekeeping CPUs by Zqiang's patch.
>
> From: Zqiang <qiang1.zhang@xxxxxxxxx>
> Sent: Wednesday, February 8, 2023 11:34 AM
> To: paulmck@xxxxxxxxxx; frederic@xxxxxxxxxx; joel@xxxxxxxxxxxxxxxxx
> Cc: rcu@xxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx
> Subject: [PATCH] rcu: Fix bind rcu related kthreads to housekeeping CPUs
>
> For kernels built with CONFIG_NO_HZ_FULL=y and CONFIG_RCU_NOCB_CPU=y,
> run the following tests:
>
> runqemu kvm slirp nographic qemuparams="-m 1024 -smp 4"
> bootparams="console=ttyS0 isolcpus=0,1 nohz_full=0,1 rcu_nocbs=0,1"
>
> root@qemux86-64:~# ps -ef | grep "rcu_preempt" | grep -v grep | awk '{print
> $2}'
> 15
> root@qemux86-64:~# ps -ef | grep "rcuop/0" | grep -v grep | awk '{print $2}'
> 17
> root@qemux86-64:~# taskset -p 15
> pid 15's current affinity mask: 1
> root@qemux86-64:~# taskset -p 17
> pid 17's current affinity mask: 1
>
> The affinity of these rcu related kthreads is not set to housekeeping cpumask,
> even if called rcu_bind_gp_kthread() when the rcu related kthread starts.
>
> set_cpus_allowed_ptr()
> ->__set_cpus_allowed_ptr()
> ->__set_cpus_allowed_ptr_locked
> {
> bool kthread = p->flags & PF_KTHREAD;
> ....
> if (kthread || is_migration_disabled(p))
> cpu_valid_mask = cpu_online_mask;
> ....
> dest_cpu = cpumask_any_and_distribute(cpu_valid_mask, ctx-
> >new_mask);
> if (dest_cpu >= nr_cpu_ids) {
> ret = -EINVAL;
> goto out;
> }
> ....
> }
>
> Due to these rcu related kthreads be created before bringup other CPUS, so
> when they running and set hosekeeping cpus affinity, found that only CPU0 is
> online at this time and CPU0 is set to no_hz_full CPU, the ctx->new_mask not
> contain CPU0 and this will cause dest_cpu in the above code snippet to be an
> illegal value and return directly, ultimately, these rcu related kthreads failed to
> bind housekeeping CPUS.
>
>s/so//
>s/hosekeeping/housekeeping/
>...
>
> This commit therefore rebind these rcu related kthreads to housekeeping CPUs
>
>s/rebind/rebinds/
>
>Could you tweak all the commit messages to fix the typos and the grammar errors? 😊
>Other than that,


Thanks for testing, I ignore the cpu hotplug scenario and exp kworker cpu affinity setting,
I will add Tested-by tags in next version.

Thanks
Zqiang


>
> Tested-by: Qiuxu Zhuo <qiuxu.zhuo@xxxxxxxxx>
>
> after the kernel boot sequence ends, at this point all CPUs are online.
>
> Signed-off-by: Zqiang <qiang1.zhang@xxxxxxxxx>
> ---
> kernel/rcu/tasks.h | 7 +++++--
> kernel/rcu/tree.c | 7 ++++++-
> kernel/rcu/tree.h | 1 -
> kernel/rcu/tree_nocb.h | 13 ++++++++++++-
> kernel/rcu/tree_plugin.h | 9 ---------
> 5 files changed, 23 insertions(+), 14 deletions(-)
>
> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index
> baf7ec178155..8b3530cca291 100644
> --- a/kernel/rcu/tasks.h
> +++ b/kernel/rcu/tasks.h
> @@ -544,9 +544,8 @@ static void rcu_tasks_one_gp(struct rcu_tasks *rtp, bool
> midboot) static int __noreturn rcu_tasks_kthread(void *arg) {
> struct rcu_tasks *rtp = arg;
> + bool rcu_setaffinity_setup = false;
>
> - /* Run on housekeeping CPUs by default. Sysadm can move if desired.
> */
> - housekeeping_affine(current, HK_TYPE_RCU);
> WRITE_ONCE(rtp->kthread_ptr, current); // Let GPs start!
>
> /*
> @@ -556,6 +555,10 @@ static int __noreturn rcu_tasks_kthread(void *arg)
> * This loop is terminated by the system going down. ;-)
> */
> for (;;) {
> + if (!rcu_setaffinity_setup && rcu_inkernel_boot_has_ended()) {
> + set_cpus_allowed_ptr(current,
> housekeeping_cpumask(HK_TYPE_RCU));
> + rcu_setaffinity_setup = true;
> + }
> // Wait for one grace period and invoke any callbacks
> // that are ready.
> rcu_tasks_one_gp(rtp, false);
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index
> ee27a03d7576..0ac47a773e13 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -1781,8 +1781,13 @@ static noinline void rcu_gp_cleanup(void)
> */
> static int __noreturn rcu_gp_kthread(void *unused) {
> - rcu_bind_gp_kthread();
> + bool rcu_setaffinity_setup = false;
> +
> for (;;) {
> + if (!rcu_setaffinity_setup && rcu_inkernel_boot_has_ended()) {
> + set_cpus_allowed_ptr(current,
> housekeeping_cpumask(HK_TYPE_RCU));
> + rcu_setaffinity_setup = true;
> + }
>
> /* Handle grace-period start. */
> for (;;) {
> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index
> 192536916f9a..391e3fae4ff5 100644
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -495,7 +495,6 @@ do {
> \
> #define rcu_nocb_lock_irqsave(rdp, flags) local_irq_save(flags) #endif /* #else
> #ifdef CONFIG_RCU_NOCB_CPU */
>
> -static void rcu_bind_gp_kthread(void);
> static bool rcu_nohz_full_cpu(void);
>
> /* Forward declarations for tree_stall.h */ diff --git a/kernel/rcu/tree_nocb.h
> b/kernel/rcu/tree_nocb.h index f2280616f9d5..254d0f631d57 100644
> --- a/kernel/rcu/tree_nocb.h
> +++ b/kernel/rcu/tree_nocb.h
> @@ -894,8 +894,14 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
> static int rcu_nocb_gp_kthread(void *arg) {
> struct rcu_data *rdp = arg;
> + bool rcu_setaffinity_setup = false;
>
> for (;;) {
> + if (!rcu_setaffinity_setup && rcu_inkernel_boot_has_ended()) {
> + set_cpus_allowed_ptr(current,
> housekeeping_cpumask(HK_TYPE_RCU));
> + rcu_setaffinity_setup = true;
> + }
> +
> WRITE_ONCE(rdp->nocb_gp_loops, rdp->nocb_gp_loops + 1);
> nocb_gp_wait(rdp);
> cond_resched_tasks_rcu_qs();
> @@ -1002,10 +1008,15 @@ static void nocb_cb_wait(struct rcu_data *rdp)
> static int rcu_nocb_cb_kthread(void *arg) {
> struct rcu_data *rdp = arg;
> -
> + bool rcu_setaffinity_setup = false;
> // Each pass through this loop does one callback batch, and,
> // if there are no more ready callbacks, waits for them.
> for (;;) {
> + if (!rcu_setaffinity_setup && rcu_inkernel_boot_has_ended()) {
> + set_cpus_allowed_ptr(current,
> housekeeping_cpumask(HK_TYPE_RCU));
> + rcu_setaffinity_setup = true;
> + }
> +
> nocb_cb_wait(rdp);
> cond_resched_tasks_rcu_qs();
> }
> diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index
> 7b0fe741a088..fdde71ebb83e 100644
> --- a/kernel/rcu/tree_plugin.h
> +++ b/kernel/rcu/tree_plugin.h
> @@ -1294,12 +1294,3 @@ static bool rcu_nohz_full_cpu(void)
> return false;
> }
>
> -/*
> - * Bind the RCU grace-period kthreads to the housekeeping CPU.
> - */
> -static void rcu_bind_gp_kthread(void)
> -{
> - if (!tick_nohz_full_enabled())
> - return;
> - housekeeping_affine(current, HK_TYPE_RCU);
> -}
> --
> 2.25.1