[patch] reduce IPI noise due to /dev/cdrom open/close

From: Jes Sorensen
Date: Mon Jul 03 2006 - 11:37:43 EST

Next message: Fengguang Wu: "Re: New readahead - ups and downs new test"
Previous message: H. Peter Anvin: "Re: [patch 7/8] inode-diet: Use a union for i_blocks and i_size,i_rdev and i_devices"
Next in thread: Milton Miller: "[PATCH] simplfy bh_lru_install"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

Hi,

Certain applications cause a lot of IPI noise due to them constantly
doing open/close on /dev/cdrom. hald is a particularly annoying case
of this. However since every distribution insists on shipping it, it's
one of those that are hard to get rid of :(

Anyway, this patch reduces the IPI noise by keeping a cpumask of CPUs
which have items in the bh lru and only flushing on the relevant
CPUs. On systems with larger CPU counts it's quite normal that only a
few CPUs are actively doing block IO, so spewing IPIs everywhere to
flush this is unnecessary.

I also switched the code to use schedule_on_each_cpu() as suggested by
Andrew, and I made the API there more flexible by introducing
schedule_on_each_cpu_mask().

Cheers,
Jes

Introduce more flexible schedule_on_each_cpu_mask() API allowing one
to specify a CPU mask to schedule on and implement
schedule_on_each_cpu_mask() on top of it.

Use a cpumask to keep track of which CPUs have items in the per CPU
buffer_head lru. Let invalidate_bh_lrus() use the new cpumask API to
limit the number of cross-CPU calls when a block device is
open/closed.

This significantly reduces IPI noise on large CPU number systems which
are running hald.

Signed-off-by: Jes Sorensen <jes@xxxxxxx>

---
fs/buffer.c | 23 +++++++++++++++++++----
include/linux/workqueue.h | 3 +++
kernel/workqueue.c | 31 +++++++++++++++++++++++++++----
3 files changed, 49 insertions(+), 8 deletions(-)

Index: linux-2.6/fs/buffer.c
===================================================================
--- linux-2.6.orig/fs/buffer.c
+++ linux-2.6/fs/buffer.c
@@ -1323,6 +1323,7 @@ struct bh_lru {
};

static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
+static cpumask_t lru_in_use;

#ifdef CONFIG_SMP
#define bh_lru_lock() local_irq_disable()
@@ -1352,9 +1353,14 @@ static void bh_lru_install(struct buffer
lru = &__get_cpu_var(bh_lrus);
if (lru->bhs[0] != bh) {
struct buffer_head *bhs[BH_LRU_SIZE];
- int in;
- int out = 0;
+ int in, out, cpu;

+ cpu = raw_smp_processor_id();
+ /* Test first to avoid cache lines bouncing around */
+ if (!cpu_isset(cpu, lru_in_use))
+ cpu_set(cpu, lru_in_use);
+
+ out = 0;
get_bh(bh);
bhs[out++] = bh;
for (in = 0; in < BH_LRU_SIZE; in++) {
@@ -1500,19 +1506,28 @@ EXPORT_SYMBOL(__bread);
*/
static void invalidate_bh_lru(void *arg)
{
- struct bh_lru *b = &get_cpu_var(bh_lrus);
+ struct bh_lru *b;
int i;

+ local_irq_disable();
+ b = &get_cpu_var(bh_lrus);
for (i = 0; i < BH_LRU_SIZE; i++) {
brelse(b->bhs[i]);
b->bhs[i] = NULL;
}
put_cpu_var(bh_lrus);
+ local_irq_enable();
}

static void invalidate_bh_lrus(void)
{
- on_each_cpu(invalidate_bh_lru, NULL, 1, 1);
+ /*
+ * Need to hand down a copy of the mask or we wouldn't be run
+ * anywhere due to the original mask being cleared
+ */
+ cpumask_t mask = lru_in_use;
+ cpus_clear(lru_in_use);
+ schedule_on_each_cpu_mask(invalidate_bh_lru, NULL, mask);
}

void set_bh_page(struct buffer_head *bh,
Index: linux-2.6/include/linux/workqueue.h
===================================================================
--- linux-2.6.orig/include/linux/workqueue.h
+++ linux-2.6/include/linux/workqueue.h
@@ -8,6 +8,7 @@
#include <linux/timer.h>
#include <linux/linkage.h>
#include <linux/bitops.h>
+#include <linux/cpumask.h>

struct workqueue_struct;

@@ -70,6 +71,8 @@ extern int FASTCALL(schedule_delayed_wor

extern int schedule_delayed_work_on(int cpu, struct work_struct *work, unsigned long delay);
extern int schedule_on_each_cpu(void (*func)(void *info), void *info);
+extern int schedule_on_each_cpu_mask(void (*func)(void *info),
+ void *info, cpumask_t mask);
extern void flush_scheduled_work(void);
extern int current_is_keventd(void);
extern int keventd_up(void);
Index: linux-2.6/kernel/workqueue.c
===================================================================
--- linux-2.6.orig/kernel/workqueue.c
+++ linux-2.6/kernel/workqueue.c
@@ -429,9 +429,11 @@ int schedule_delayed_work_on(int cpu,
}

/**
- * schedule_on_each_cpu - call a function on each online CPU from keventd
+ * schedule_on_each_cpu_mask - call a function on each online CPU in the
+ * mask from keventd
* @func: the function to call
* @info: a pointer to pass to func()
+ * @mask: a cpumask_t of CPUs to schedule on
*
* Returns zero on success.
* Returns -ve errno on failure.
@@ -440,7 +442,8 @@ int schedule_delayed_work_on(int cpu,
*
* schedule_on_each_cpu() is very slow.
*/
-int schedule_on_each_cpu(void (*func)(void *info), void *info)
+int
+schedule_on_each_cpu_mask(void (*func)(void *info), void *info, cpumask_t mask)
{
int cpu;
struct work_struct *works;
@@ -451,14 +454,34 @@ int schedule_on_each_cpu(void (*func)(vo

for_each_online_cpu(cpu) {
INIT_WORK(per_cpu_ptr(works, cpu), func, info);
- __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu),
- per_cpu_ptr(works, cpu));
+ if (cpu_isset(cpu, mask))
+ __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu),
+ per_cpu_ptr(works, cpu));
}
flush_workqueue(keventd_wq);
free_percpu(works);
return 0;
}

+/**
+ * schedule_on_each_cpu_mask - call a function on each online CPU from keventd
+ *
+ * @func: the function to call
+ * @info: a pointer to pass to func()
+ *
+ * Returns zero on success.
+ * Returns -ve errno on failure.
+ *
+ * Appears to be racy against CPU hotplug.
+ *
+ * schedule_on_each_cpu() is very slow.
+ */
+int
+schedule_on_each_cpu(void (*func)(void *info), void *info)
+{
+ return schedule_on_each_cpu_mask(func, info, CPU_MASK_ALL);
+}
+
void flush_scheduled_work(void)
{
flush_workqueue(keventd_wq);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Fengguang Wu: "Re: New readahead - ups and downs new test"
Previous message: H. Peter Anvin: "Re: [patch 7/8] inode-diet: Use a union for i_blocks and i_size,i_rdev and i_devices"
Next in thread: Milton Miller: "[PATCH] simplfy bh_lru_install"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]