Re: [PATCH 4/4] workqueue: Allow modifying low level unbound workqueue cpumask

From: Lai Jiangshan
Date: Fri Mar 13 2015 - 03:47:32 EST


On 03/12/2015 01:00 PM, Lai Jiangshan wrote:
> Allow to modify the low-level unbound workqueues cpumask through
> sysfs. This is performed by traversing the entire workqueue list
> and calling wq_unbound_install_ctx_prepare() on the unbound workqueues
> with the low level mask passed in. Only after all the preparation are done,
> we commit them all together.
>
> The oreder-workquue is ignore from the low level unbound workqueue cpumask,
> it will be handled in near future.
>
> The per-nodes' pwqs are mandatorily controlled by the low level cpumask, while
> the default pwq ignores the low level cpumask when (and ONLY when) the cpumask set
> by the user doesn't overlap with the low level cpumask. In this case, we can't
> apply the empty cpumask to the default pwq, so we use the user-set cpumask
> directly.
>
> Cc: Christoph Lameter <cl@xxxxxxxxx>
> Cc: Kevin Hilman <khilman@xxxxxxxxxx>
> Cc: Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
> Cc: Mike Galbraith <bitbucket@xxxxxxxxx>
> Cc: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
> Cc: Tejun Heo <tj@xxxxxxxxxx>
> Cc: Viresh Kumar <viresh.kumar@xxxxxxxxxx>
> Cc: Frederic Weisbecker <fweisbec@xxxxxxxxx>
> Original-patch-by: Frederic Weisbecker <fweisbec@xxxxxxxxx>
> Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx>

miss a part in wq_update_unbound_numa()

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index facaaae..4027ec9 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3722,6 +3722,9 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
* wq's, the default pwq should be used.
*/
if (wq_calc_node_cpumask(wq->unbound_attrs, node, cpu_off, cpumask)) {
+ cpumask_and(cpumask, cpumask, wq_unbound_cpumask);
+ if (cpumask_empty(cpumask))
+ goto use_dfl_pwq;
if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
goto out_unlock;
} else {

> ---
> kernel/workqueue.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++-----
> 1 file changed, 88 insertions(+), 8 deletions(-)
>
> diff --git a/kernel/workqueue.c b/kernel/workqueue.c
> index 61b5bfa..facaaae 100644
> --- a/kernel/workqueue.c
> +++ b/kernel/workqueue.c
> @@ -299,7 +299,7 @@ static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
> static LIST_HEAD(workqueues); /* PR: list of all workqueues */
> static bool workqueue_freezing; /* PL: have wqs started freezing? */
>
> -static cpumask_var_t wq_unbound_cpumask;
> +static cpumask_var_t wq_unbound_cpumask; /* PL: low level cpumask for all unbound wqs */
>
> /* the per-cpu worker pools */
> static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
> @@ -3491,6 +3491,7 @@ static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
> struct wq_unbound_install_ctx {
> struct workqueue_struct *wq; /* target to be installed */
> struct workqueue_attrs *attrs; /* attrs for installing */
> + struct list_head list; /* queued for batching commit */
> struct pool_workqueue *dfl_pwq;
> struct pool_workqueue *pwq_tbl[];
> };
> @@ -3513,10 +3514,11 @@ static void wq_unbound_install_ctx_free(struct wq_unbound_install_ctx *ctx)
>
> static struct wq_unbound_install_ctx *
> wq_unbound_install_ctx_prepare(struct workqueue_struct *wq,
> - const struct workqueue_attrs *attrs)
> + const struct workqueue_attrs *attrs,
> + cpumask_var_t unbound_cpumask)
> {
> struct wq_unbound_install_ctx *ctx;
> - struct workqueue_attrs *new_attrs, *tmp_attrs;
> + struct workqueue_attrs *new_attrs, *pwq_attrs, *tmp_attrs;
> int node;
>
> lockdep_assert_held(&wq_pool_mutex);
> @@ -3525,13 +3527,16 @@ wq_unbound_install_ctx_prepare(struct workqueue_struct *wq,
> GFP_KERNEL);
>
> new_attrs = alloc_workqueue_attrs(GFP_KERNEL);
> + pwq_attrs = alloc_workqueue_attrs(GFP_KERNEL);
> tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL);
> if (!ctx || !new_attrs || !tmp_attrs)
> goto out_free;
>
> /* make a copy of @attrs and sanitize it */
> copy_workqueue_attrs(new_attrs, attrs);
> - cpumask_and(new_attrs->cpumask, new_attrs->cpumask, wq_unbound_cpumask);
> + copy_workqueue_attrs(pwq_attrs, attrs);
> + cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
> + cpumask_and(pwq_attrs->cpumask, pwq_attrs->cpumask, unbound_cpumask);
>
> /*
> * We may create multiple pwqs with differing cpumasks. Make a
> @@ -3544,13 +3549,21 @@ wq_unbound_install_ctx_prepare(struct workqueue_struct *wq,
> * If something goes wrong during CPU up/down, we'll fall back to
> * the default pwq covering whole @attrs->cpumask. Always create
> * it even if we don't use it immediately.
> + *
> + * If the cpumask set by the user doesn't overlap with the global
> + * wq_unbound_cpumask, we ignore the wq_unbound_cpumask for this wq
> + * which means all its nodes' pwqs are its default pwq and its default
> + * pwq's workers' cpumask is totally equals to the user setting.
> */
> - ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
> + if (cpumask_empty(pwq_attrs->cpumask))
> + ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
> + else
> + ctx->dfl_pwq = alloc_unbound_pwq(wq, pwq_attrs);
> if (!ctx->dfl_pwq)
> goto out_free;
>
> for_each_node(node) {
> - if (wq_calc_node_cpumask(attrs, node, -1, tmp_attrs->cpumask)) {
> + if (wq_calc_node_cpumask(pwq_attrs, node, -1, tmp_attrs->cpumask)) {
> ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
> if (!ctx->pwq_tbl[node])
> goto out_free;
> @@ -3564,6 +3577,7 @@ wq_unbound_install_ctx_prepare(struct workqueue_struct *wq,
> ctx->attrs = new_attrs;
>
> out_free:
> + free_workqueue_attrs(pwq_attrs);
> free_workqueue_attrs(tmp_attrs);
>
> if (!ctx || !ctx->wq) {
> @@ -3634,7 +3648,7 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
> get_online_cpus();
>
> mutex_lock(&wq_pool_mutex);
> - ctx = wq_unbound_install_ctx_prepare(wq, attrs);
> + ctx = wq_unbound_install_ctx_prepare(wq, attrs, wq_unbound_cpumask);
> mutex_unlock(&wq_pool_mutex);
>
> put_online_cpus();
> @@ -3961,19 +3975,85 @@ static struct bus_type wq_subsys = {
> .dev_groups = wq_sysfs_groups,
> };
>
> +static int unbounds_cpumask_apply(cpumask_var_t cpumask)
> +{
> + LIST_HEAD(ctxs);
> + int ret = 0;
> + struct workqueue_struct *wq;
> + struct wq_unbound_install_ctx *ctx, *n;
> +
> + lockdep_assert_held(&wq_pool_mutex);
> +
> + list_for_each_entry(wq, &workqueues, list) {
> + if (!(wq->flags & WQ_UNBOUND))
> + continue;
> + /* creating multiple pwqs breaks ordering guarantee */
> + if (wq->flags & __WQ_ORDERED)
> + continue;
> +
> + ctx = wq_unbound_install_ctx_prepare(wq, wq->unbound_attrs,
> + cpumask);
> + if (!ctx) {
> + ret = -ENOMEM;
> + break;
> + }
> +
> + list_add_tail(&ctx->list, &ctxs);
> + }
> +
> + list_for_each_entry_safe(ctx, n, &ctxs, list) {
> + if (ret >= 0)
> + wq_unbound_install_ctx_commit(ctx);
> + wq_unbound_install_ctx_free(ctx);
> + }
> +
> + return ret;
> +}
> +
> +static ssize_t unbounds_cpumask_store(struct device *dev,
> + struct device_attribute *attr,
> + const char *buf, size_t count)
> +{
> + cpumask_var_t cpumask;
> + int ret = -EINVAL;
> +
> + if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
> + return -ENOMEM;
> +
> + ret = cpumask_parse(buf, cpumask);
> + if (ret)
> + goto out;
> +
> + get_online_cpus();
> + cpumask_and(cpumask, cpumask, cpu_possible_mask);
> + if (cpumask_intersects(cpumask, cpu_online_mask)) {
> + mutex_lock(&wq_pool_mutex);
> + ret = unbounds_cpumask_apply(cpumask);
> + if (ret >= 0)
> + cpumask_copy(wq_unbound_cpumask, cpumask);
> + mutex_unlock(&wq_pool_mutex);
> + }
> + put_online_cpus();
> +out:
> + free_cpumask_var(cpumask);
> + return ret ? ret : count;
> +}
> +
> static ssize_t unbounds_cpumask_show(struct device *dev,
> struct device_attribute *attr, char *buf)
> {
> int written;
>
> + mutex_lock(&wq_pool_mutex);
> written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
> cpumask_pr_args(wq_unbound_cpumask));
> + mutex_unlock(&wq_pool_mutex);
>
> return written;
> }
>
> static struct device_attribute wq_sysfs_cpumask_attr =
> - __ATTR(cpumask, 0444, unbounds_cpumask_show, NULL);
> + __ATTR(cpumask, 0644, unbounds_cpumask_show, unbounds_cpumask_store);
>
> static int __init wq_sysfs_init(void)
> {
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/