Re: [PATCH v5 10/15] sched: Introduce force_compatible_cpus_allowed_ptr() to limit CPU affinity

From: Suren Baghdasaryan
Date: Sun Dec 27 2020 - 23:33:36 EST


Just a couple minor nits.

On Tue, Dec 8, 2020 at 5:29 AM Will Deacon <will@xxxxxxxxxx> wrote:
>
> Asymmetric systems may not offer the same level of userspace ISA support
> across all CPUs, meaning that some applications cannot be executed by
> some CPUs. As a concrete example, upcoming arm64 big.LITTLE designs do
> not feature support for 32-bit applications on both clusters.
>
> Although userspace can carefully manage the affinity masks for such
> tasks, one place where it is particularly problematic is execve()
> because the CPU on which the execve() is occurring may be incompatible
> with the new application image. In such a situation, it is desirable to
> restrict the affinity mask of the task and ensure that the new image is
> entered on a compatible CPU. From userspace's point of view, this looks
> the same as if the incompatible CPUs have been hotplugged off in the
> task's affinity mask.
>
> In preparation for restricting the affinity mask for compat tasks on
> arm64 systems without uniform support for 32-bit applications, introduce
> force_compatible_cpus_allowed_ptr(), which restricts the affinity mask
> for a task to contain only compatible CPUs.
>
> Reviewed-by: Quentin Perret <qperret@xxxxxxxxxx>
> Signed-off-by: Will Deacon <will@xxxxxxxxxx>
> ---
> include/linux/sched.h | 1 +
> kernel/sched/core.c | 100 +++++++++++++++++++++++++++++++++++-------
> 2 files changed, 86 insertions(+), 15 deletions(-)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 76cd21fa5501..e42dd0fb85c5 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1653,6 +1653,7 @@ extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_
> #ifdef CONFIG_SMP
> extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
> extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
> +extern void force_compatible_cpus_allowed_ptr(struct task_struct *p);
> #else
> static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
> {
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 92ac3e53f50a..1cfc94be18a9 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -1863,25 +1863,19 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
> }
>
> /*
> - * Change a given task's CPU affinity. Migrate the thread to a
> - * proper CPU and schedule it away if the CPU it's executing on
> - * is removed from the allowed bitmask.
> - *
> - * NOTE: the caller must have a valid reference to the task, the
> - * task must not exit() & deallocate itself prematurely. The
> - * call is not atomic; no spinlocks may be held.
> + * Called with both p->pi_lock and rq->lock held; drops both before returning.

Maybe annotate with __releases()?

> */
> -static int __set_cpus_allowed_ptr(struct task_struct *p,
> - const struct cpumask *new_mask, bool check)
> +static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
> + const struct cpumask *new_mask,
> + bool check,
> + struct rq *rq,
> + struct rq_flags *rf)
> {
> const struct cpumask *cpu_valid_mask = cpu_active_mask;
> const struct cpumask *cpu_allowed_mask = task_cpu_possible_mask(p);
> unsigned int dest_cpu;
> - struct rq_flags rf;
> - struct rq *rq;
> int ret = 0;
>
> - rq = task_rq_lock(p, &rf);
> update_rq_clock(rq);
>
> if (p->flags & PF_KTHREAD) {
> @@ -1936,7 +1930,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
> if (task_running(rq, p) || p->state == TASK_WAKING) {
> struct migration_arg arg = { p, dest_cpu };
> /* Need help from migration thread: drop lock and wait. */
> - task_rq_unlock(rq, p, &rf);
> + task_rq_unlock(rq, p, rf);
> stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
> return 0;
> } else if (task_on_rq_queued(p)) {
> @@ -1944,20 +1938,96 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
> * OK, since we're going to drop the lock immediately
> * afterwards anyway.
> */
> - rq = move_queued_task(rq, &rf, p, dest_cpu);
> + rq = move_queued_task(rq, rf, p, dest_cpu);
> }
> out:
> - task_rq_unlock(rq, p, &rf);
> + task_rq_unlock(rq, p, rf);
>
> return ret;
> }
>
> +/*
> + * Change a given task's CPU affinity. Migrate the thread to a
> + * proper CPU and schedule it away if the CPU it's executing on
> + * is removed from the allowed bitmask.
> + *
> + * NOTE: the caller must have a valid reference to the task, the
> + * task must not exit() & deallocate itself prematurely. The
> + * call is not atomic; no spinlocks may be held.
> + */
> +static int __set_cpus_allowed_ptr(struct task_struct *p,
> + const struct cpumask *new_mask, bool check)
> +{
> + struct rq_flags rf;
> + struct rq *rq;
> +
> + rq = task_rq_lock(p, &rf);
> + return __set_cpus_allowed_ptr_locked(p, new_mask, check, rq, &rf);
> +}
> +
> int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
> {
> return __set_cpus_allowed_ptr(p, new_mask, false);
> }
> EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
>
> +/*
> + * Change a given task's CPU affinity to the intersection of its current
> + * affinity mask and @subset_mask, writing the resulting mask to @new_mask.
> + * If the resulting mask is empty, leave the affinity unchanged and return
> + * -EINVAL.
> + */
> +static int restrict_cpus_allowed_ptr(struct task_struct *p,
> + struct cpumask *new_mask,
> + const struct cpumask *subset_mask)
> +{
> + struct rq_flags rf;
> + struct rq *rq;
> +
> + rq = task_rq_lock(p, &rf);
> + if (!cpumask_and(new_mask, &p->cpus_mask, subset_mask)) {
> + task_rq_unlock(rq, p, &rf);
> + return -EINVAL;
> + }
> +
> + return __set_cpus_allowed_ptr_locked(p, new_mask, false, rq, &rf);
> +}
> +
> +/*
> + * Restrict a given task's CPU affinity so that it is a subset of
> + * task_cpu_possible_mask(). If the resulting mask is empty, we warn and
> + * walk up the cpuset hierarchy until we find a suitable mask.

Technically you also have a case when new_mask allocation fails, in
which case task_cpu_possible_mask() is forced instead. Maybe add a
clarifying comment at "goto out_set_mask" ?

> + */
> +void force_compatible_cpus_allowed_ptr(struct task_struct *p)
> +{
> + cpumask_var_t new_mask;
> + const struct cpumask *override_mask = task_cpu_possible_mask(p);
> +
> + if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
> + goto out_set_mask;
> +
> + if (!restrict_cpus_allowed_ptr(p, new_mask, override_mask))
> + goto out_free_mask;
> +
> + /*
> + * We failed to find a valid subset of the affinity mask for the
> + * task, so override it based on its cpuset hierarchy.
> + */
> + cpuset_cpus_allowed(p, new_mask);
> + override_mask = new_mask;
> +
> +out_set_mask:
> + if (printk_ratelimit()) {
> + printk_deferred("Overriding affinity for process %d (%s) to CPUs %*pbl\n",
> + task_pid_nr(p), p->comm,
> + cpumask_pr_args(override_mask));
> + }
> +
> + set_cpus_allowed_ptr(p, override_mask);
> +out_free_mask:
> + free_cpumask_var(new_mask);
> +}
> +
> void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
> {
> #ifdef CONFIG_SCHED_DEBUG
> --
> 2.29.2.576.ga3fc446d84-goog
>