[RFC][PATCH] create workqueue threads only when needed

From: Frederic Weisbecker
Date: Mon Jan 26 2009 - 19:17:27 EST


While looking at the statistics from the workqueue tracer, I've been suprised by the
number of useless workqueues I had:

CPU INSERTED EXECUTED NAME
| | | |

* 0 0 kpsmoused
* 0 0 ata_aux
* 0 0 cqueue
* 0 0 kacpi_notify
* 0 0 kacpid
* 998 998 khelper
* 0 0 cpuset

1 0 0 hda0/1
1 42 42 reiserfs/1
1 0 0 scsi_tgtd/1
1 0 0 aio/1
1 0 0 ata/1
1 193 193 kblockd/1
1 0 0 kintegrityd/1
1 4 4 work_on_cpu/1
1 1244 1244 events/1

0 0 0 hda0/0
0 63 63 reiserfs/0
0 0 0 scsi_tgtd/0
0 0 0 aio/0
0 0 0 ata/0
0 188 188 kblockd/0
0 0 0 kintegrityd/0
0 16 16 work_on_cpu/0
0 1360 1360 events/0


All of the workqueues with 0 work inserted do nothing.
For several reasons:

_ Unneeded built drivers for my system that create workqueue(s) when they init
_ Services which need their own workqueue, for several reasons, but who receive
very rare jobs (often never)
_ ...?

And the result of git-grep create_singlethread_workqueue is even more surprising.

So I've started a patch which creates the workqueues by default without thread except
the kevents one.
They will have their thread created and started only when these workqueues will receive a
first work to do. This is performed by submitting a task's creation work to the kevent workqueues
which are always there, and are the only one which have their thread started on creation.

The result after this patch:

# CPU INSERTED EXECUTED NAME
# | | | |

* 999 1000 khelper

1 5 6 reiserfs/1
1 0 2 work_on_cpu/1
1 86 87 kblockd/1
1 14 16 work_on_cpu/1
1 149 149 events/1

0 15 16 reiserfs/0
0 85 86 kblockd/0
0 146 146 events/0


Dropping 16 useless kernel threads in my system.
(Yes the inserted values are not synced with the executed one because
the tracers looses the first events. I just rewrote some parts to make it work
with this patch).
I guess I will update this tracer to display the "shadow workqueues" which have no
threads too.

I hadn't any problems until now with this patch but I think it needs more testing,
like with cpu hotplug, and some renaming for its functions and structures...
And I would like to receive some comments and feelings before continuing. So this
is just an RFC :-)

Signed-off-by: Frederic Weisbecker <fweisbec@xxxxxxxxx>
---
include/linux/workqueue.h | 44 +++++++++++----------
kernel/workqueue.c | 95 +++++++++++++++++++++++++++++++++++++++------
2 files changed, 106 insertions(+), 33 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 3cd51e5..c4283c5 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -162,33 +162,35 @@ struct execute_work {
extern struct workqueue_struct *
__create_workqueue_key(const char *name, int singlethread,
int freezeable, int rt, struct lock_class_key *key,
- const char *lock_name);
+ const char *lock_name, bool when_needed);

#ifdef CONFIG_LOCKDEP
-#define __create_workqueue(name, singlethread, freezeable, rt) \
-({ \
- static struct lock_class_key __key; \
- const char *__lock_name; \
- \
- if (__builtin_constant_p(name)) \
- __lock_name = (name); \
- else \
- __lock_name = #name; \
- \
- __create_workqueue_key((name), (singlethread), \
- (freezeable), (rt), &__key, \
- __lock_name); \
+#define __create_workqueue(name, singlethread, freezeable, rt, when_needed) \
+({ \
+ static struct lock_class_key __key; \
+ const char *__lock_name; \
+ \
+ if (__builtin_constant_p(name)) \
+ __lock_name = (name); \
+ else \
+ __lock_name = #name; \
+ \
+ __create_workqueue_key((name), (singlethread), \
+ (freezeable), (rt), &__key, \
+ __lock_name, when_needed); \
})
#else
-#define __create_workqueue(name, singlethread, freezeable, rt) \
- __create_workqueue_key((name), (singlethread), (freezeable), (rt), \
- NULL, NULL)
+#define __create_workqueue(name, singlethread, freezeable, rt, when_needed) \
+ __create_workqueue_key((name), (singlethread), (freezeable), (rt), \
+ NULL, NULL, when_needed)
#endif

-#define create_workqueue(name) __create_workqueue((name), 0, 0, 0)
-#define create_rt_workqueue(name) __create_workqueue((name), 0, 0, 1)
-#define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1, 0)
-#define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0, 0)
+#define create_workqueue(name) __create_workqueue((name), 0, 0, 0, 1)
+#define create_rt_workqueue(name) __create_workqueue((name), 0, 0, 1, 0)
+#define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1, 0, 0)
+#define create_singlethread_workqueue(name) \
+ __create_workqueue((name), 1, 0, 0, 1)
+#define create_kevents(name) __create_workqueue((name), 0, 0, 0, 0)

extern void destroy_workqueue(struct workqueue_struct *wq);

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e53ee18..430cb5a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -69,6 +69,12 @@ struct workqueue_struct {
#endif
};

+
+struct late_workqueue_creation_data {
+ struct cpu_workqueue_struct *cwq;
+ struct work_struct work;
+};
+
/* Serializes the accesses to the list of workqueues. */
static DEFINE_SPINLOCK(workqueue_lock);
static LIST_HEAD(workqueues);
@@ -126,12 +132,34 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
}

+static void create_wq_thread_late_work(struct work_struct *work);
+
+/* Called when a first work is inserted on a workqueue */
+static void create_wq_thread_late(struct cpu_workqueue_struct *cwq)
+{
+ struct late_workqueue_creation_data *l;
+
+ /*
+ * The work can be inserted whatever is the context.
+ * But such atomic allocation will be rare and freed soon.
+ */
+ l = kmalloc(sizeof(*l), GFP_ATOMIC);
+ if (!l) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+ INIT_WORK(&l->work, create_wq_thread_late_work);
+ l->cwq = cwq;
+ schedule_work(&l->work);
+}
+
+
DEFINE_TRACE(workqueue_insertion);

static void insert_work(struct cpu_workqueue_struct *cwq,
struct work_struct *work, struct list_head *head)
{
- trace_workqueue_insertion(cwq->thread, work);
+ trace_workqueue_insertion(cwq->thread, work, cwq->wq->singlethread);

set_wq_data(work, cwq);
/*
@@ -148,6 +176,9 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
{
unsigned long flags;

+ if (!cwq->thread)
+ create_wq_thread_late(cwq);
+
spin_lock_irqsave(&cwq->lock, flags);
insert_work(cwq, work, &cwq->worklist);
spin_unlock_irqrestore(&cwq->lock, flags);
@@ -291,7 +322,9 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
*/
struct lockdep_map lockdep_map = work->lockdep_map;
#endif
- trace_workqueue_execution(cwq->thread, work);
+ struct workqueue_struct *wq = cwq->wq;
+
+ trace_workqueue_execution(cwq->thread, work, wq->singlethread);
cwq->current_work = work;
list_del_init(cwq->worklist.next);
spin_unlock_irq(&cwq->lock);
@@ -387,6 +420,8 @@ static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
} else {
struct wq_barrier barr;

+ if (!cwq->thread)
+ create_wq_thread_late(cwq);
active = 0;
spin_lock_irq(&cwq->lock);
if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) {
@@ -796,7 +831,8 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
cwq->thread = p;

- trace_workqueue_creation(cwq->thread, cpu);
+ trace_workqueue_creation(cwq->thread, wq->singlethread ?
+ SINGLETHREAD_CPU : cpu);

return 0;
}
@@ -812,12 +848,34 @@ static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
}
}

+static void create_wq_thread_late_work(struct work_struct *work)
+{
+ struct late_workqueue_creation_data *l;
+ struct cpu_workqueue_struct *cwq;
+ int cpu = smp_processor_id();
+ int err = 0;
+
+ l = container_of(work, struct late_workqueue_creation_data, work);
+ cwq = l->cwq;
+
+ if (is_wq_single_threaded(cwq->wq)) {
+ err = create_workqueue_thread(cwq, singlethread_cpu);
+ start_workqueue_thread(cwq, -1);
+ } else {
+ err = create_workqueue_thread(cwq, cpu);
+ start_workqueue_thread(cwq, cpu);
+ }
+ WARN_ON_ONCE(err);
+ kfree(l);
+}
+
struct workqueue_struct *__create_workqueue_key(const char *name,
int singlethread,
int freezeable,
int rt,
struct lock_class_key *key,
- const char *lock_name)
+ const char *lock_name,
+ bool when_needed)
{
struct workqueue_struct *wq;
struct cpu_workqueue_struct *cwq;
@@ -842,8 +900,10 @@ struct workqueue_struct *__create_workqueue_key(const char *name,

if (singlethread) {
cwq = init_cpu_workqueue(wq, singlethread_cpu);
- err = create_workqueue_thread(cwq, singlethread_cpu);
- start_workqueue_thread(cwq, -1);
+ if (!when_needed) {
+ err = create_workqueue_thread(cwq, singlethread_cpu);
+ start_workqueue_thread(cwq, -1);
+ }
} else {
cpu_maps_update_begin();
/*
@@ -865,8 +925,11 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
cwq = init_cpu_workqueue(wq, cpu);
if (err || !cpu_online(cpu))
continue;
- err = create_workqueue_thread(cwq, cpu);
- start_workqueue_thread(cwq, cpu);
+
+ if (!when_needed) {
+ err = create_workqueue_thread(cwq, cpu);
+ start_workqueue_thread(cwq, cpu);
+ }
}
cpu_maps_update_done();
}
@@ -904,9 +967,12 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
* checks list_empty(), and a "normal" queue_work() can't use
* a dead CPU.
*/
- trace_workqueue_destruction(cwq->thread);
- kthread_stop(cwq->thread);
- cwq->thread = NULL;
+
+ if (cwq->thread) {
+ trace_workqueue_destruction(cwq->thread, cwq->wq->singlethread);
+ kthread_stop(cwq->thread);
+ cwq->thread = NULL;
+ }
}

/**
@@ -955,6 +1021,9 @@ undo:

switch (action) {
case CPU_UP_PREPARE:
+ /* Will be created during the first work insertion */
+ if (wq != keventd_wq)
+ break;
if (!create_workqueue_thread(cwq, cpu))
break;
printk(KERN_ERR "workqueue [%s] for %i failed\n",
@@ -964,6 +1033,8 @@ undo:
goto undo;

case CPU_ONLINE:
+ if (wq != keventd_wq)
+ break;
start_workqueue_thread(cwq, cpu);
break;

@@ -1033,7 +1104,7 @@ void __init init_workqueues(void)
singlethread_cpu = cpumask_first(cpu_possible_mask);
cpu_singlethread_map = cpumask_of(singlethread_cpu);
hotcpu_notifier(workqueue_cpu_callback, 0);
- keventd_wq = create_workqueue("events");
+ keventd_wq = create_kevents("events");
BUG_ON(!keventd_wq);
#ifdef CONFIG_SMP
work_on_cpu_wq = create_workqueue("work_on_cpu");



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/