[RFC PATCH 07/18] cgroup/cpuset: Allow overwriting HK_TYPE_DOMAIN housekeeping cpumask
From: Waiman Long
Date: Fri Aug 08 2025 - 11:13:59 EST
As we did not modify housekeeping cpumasks in the creation of cpuset
partition before, we had to disallow the creation of non-isolated
partitions from using any of the HK_TYPE_DOMAIN isolated CPUs. Now we
are going to modify housekeeping cpumasks at run time, we will now allow
overwriting of HK_TYPE_DOMAIN cpumask when an isolated partition is first
created or when the creation of a non-isolated partition conflicts with
the boot time HK_TYPE_DOMAIN isolated CPUs. The unnecessary checking code
are now being removed. The doc file will be updated in a later patch.
On the other hand, there is still a latency spike problem when CPU
hotplug code is used to facilitate the proper functioning of the
dynamically modified nohz_full HK_TYPE_KERNEL_NOISE cpumask. So the
cpuset code will be modified to maintain the boot-time enabled nohz_full
cpumask to avoid using cpu hotplug if all the newly isolated/non-isolated
CPUs are already in that cpumask. This code will be removed in the
future when the latency spike problem is solved.
Signed-off-by: Waiman Long <longman@xxxxxxxxxx>
---
kernel/cgroup/cpuset.c | 45 ++++++++----------------------------------
1 file changed, 8 insertions(+), 37 deletions(-)
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 2190efd33efb..87e9ee7922cd 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -59,7 +59,6 @@ static const char * const perr_strings[] = {
[PERR_NOCPUS] = "Parent unable to distribute cpu downstream",
[PERR_HOTPLUG] = "No cpu available due to hotplug",
[PERR_CPUSEMPTY] = "cpuset.cpus and cpuset.cpus.exclusive are empty",
- [PERR_HKEEPING] = "partition config conflicts with housekeeping setup",
[PERR_ACCESS] = "Enable partition not permitted",
[PERR_REMOTE] = "Have remote partition underneath",
};
@@ -81,9 +80,10 @@ static cpumask_var_t subpartitions_cpus;
static cpumask_var_t isolated_cpus;
/*
- * Housekeeping (HK_TYPE_DOMAIN) CPUs at boot
+ * Housekeeping (nohz_full) CPUs at boot
*/
-static cpumask_var_t boot_hk_cpus;
+static cpumask_var_t boot_nohz_full_hk_cpus;
+static bool have_boot_nohz_full;
static bool have_boot_isolcpus;
/* List of remote partition root children */
@@ -1609,26 +1609,6 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *xcpus,
remote_partition_disable(cs, tmp);
}
-/*
- * prstate_housekeeping_conflict - check for partition & housekeeping conflicts
- * @prstate: partition root state to be checked
- * @new_cpus: cpu mask
- * Return: true if there is conflict, false otherwise
- *
- * CPUs outside of boot_hk_cpus, if defined, can only be used in an
- * isolated partition.
- */
-static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus)
-{
- if (!have_boot_isolcpus)
- return false;
-
- if ((prstate != PRS_ISOLATED) && !cpumask_subset(new_cpus, boot_hk_cpus))
- return true;
-
- return false;
-}
-
/**
* update_parent_effective_cpumask - update effective_cpus mask of parent cpuset
* @cs: The cpuset that requests change in partition root state
@@ -1737,9 +1717,6 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
if (cpumask_empty(xcpus))
return PERR_INVCPUS;
- if (prstate_housekeeping_conflict(new_prs, xcpus))
- return PERR_HKEEPING;
-
/*
* A parent can be left with no CPU as long as there is no
* task directly associated with the parent partition.
@@ -2356,9 +2333,6 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
cpumask_empty(trialcs->effective_xcpus)) {
invalidate = true;
cs->prs_err = PERR_INVCPUS;
- } else if (prstate_housekeeping_conflict(old_prs, trialcs->effective_xcpus)) {
- invalidate = true;
- cs->prs_err = PERR_HKEEPING;
} else if (tasks_nocpu_error(parent, cs, trialcs->effective_xcpus)) {
invalidate = true;
cs->prs_err = PERR_NOCPUS;
@@ -2499,9 +2473,6 @@ static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs,
if (cpumask_empty(trialcs->effective_xcpus)) {
invalidate = true;
cs->prs_err = PERR_INVCPUS;
- } else if (prstate_housekeeping_conflict(old_prs, trialcs->effective_xcpus)) {
- invalidate = true;
- cs->prs_err = PERR_HKEEPING;
} else if (tasks_nocpu_error(parent, cs, trialcs->effective_xcpus)) {
invalidate = true;
cs->prs_err = PERR_NOCPUS;
@@ -3787,11 +3758,11 @@ int __init cpuset_init(void)
BUG_ON(!alloc_cpumask_var(&cpus_attach, GFP_KERNEL));
- have_boot_isolcpus = housekeeping_enabled(HK_TYPE_DOMAIN);
- if (have_boot_isolcpus) {
- BUG_ON(!alloc_cpumask_var(&boot_hk_cpus, GFP_KERNEL));
- cpumask_copy(boot_hk_cpus, housekeeping_cpumask(HK_TYPE_DOMAIN));
- cpumask_andnot(isolated_cpus, cpu_possible_mask, boot_hk_cpus);
+ have_boot_nohz_full = housekeeping_enabled(HK_TYPE_KERNEL_NOISE);
+ have_boot_isolcpus = housekeeping_enabled(HK_TYPE_DOMAIN);
+ if (have_boot_nohz_full) {
+ BUG_ON(!alloc_cpumask_var(&boot_nohz_full_hk_cpus, GFP_KERNEL));
+ cpumask_copy(boot_nohz_full_hk_cpus, housekeeping_cpumask(HK_TYPE_KERNEL_NOISE));
}
return 0;
--
2.50.0