Re: [0.5/2] scheduler caller profiling

From: William Lee Irwin III
Date: Sun May 02 2004 - 21:31:13 EST


On Sun, May 02, 2004 at 07:23:46PM -0700, William Lee Irwin III wrote:
> This patch was used to collect the data on the offending callers into
> the scheduler. It creates a profile buffer completely analogous to its

This patch creates a new scheduling entrypoint, wake_up_filtered(), and
uses it in page waitqueue hashing to discriminate between the waiters
on various pages. One of the sources of the thundering herds was
identified as the page waitqueue hashing by a priori methods and
empirically confirmed using the scheduler caller profiling patch.


-- wli

Index: wli-2.6.6-rc3-mm1/include/linux/wait.h
===================================================================
--- wli-2.6.6-rc3-mm1.orig/include/linux/wait.h 2004-04-03 19:37:07.000000000 -0800
+++ wli-2.6.6-rc3-mm1/include/linux/wait.h 2004-04-30 19:50:33.000000000 -0700
@@ -28,6 +28,11 @@
struct list_head task_list;
};

+struct filtered_wait_queue {
+ void *key;
+ wait_queue_t wait;
+};
+
struct __wait_queue_head {
spinlock_t lock;
struct list_head task_list;
@@ -104,6 +109,7 @@
list_del(&old->task_list);
}

+void FASTCALL(wake_up_filtered(wait_queue_head_t *, void *));
extern void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr));
extern void FASTCALL(__wake_up_locked(wait_queue_head_t *q, unsigned int mode));
extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr));
@@ -257,6 +263,16 @@
wait->func = autoremove_wake_function; \
INIT_LIST_HEAD(&wait->task_list); \
} while (0)
+
+#define DEFINE_FILTERED_WAIT(name, p) \
+ struct filtered_wait_queue name = { \
+ .key = p, \
+ .wait = { \
+ .task = current, \
+ .func = autoremove_wake_function, \
+ .task_list = LIST_HEAD_INIT(name.wait.task_list),\
+ }, \
+ }

#endif /* __KERNEL__ */

Index: wli-2.6.6-rc3-mm1/kernel/sched.c
===================================================================
--- wli-2.6.6-rc3-mm1.orig/kernel/sched.c 2004-04-30 16:13:32.000000000 -0700
+++ wli-2.6.6-rc3-mm1/kernel/sched.c 2004-04-30 19:50:33.000000000 -0700
@@ -2524,6 +2524,19 @@
}
}

+void fastcall wake_up_filtered(wait_queue_head_t *q, void *key)
+{
+ unsigned long flags;
+ unsigned int mode = TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE;
+ struct filtered_wait_queue *wait, *save;
+ spin_lock_irqsave(&q->lock, flags);
+ list_for_each_entry_safe(wait, save, &q->task_list, wait.task_list) {
+ if (wait->key == key)
+ wait->wait.func(&wait->wait, mode, 0);
+ }
+ spin_unlock_irqrestore(&q->lock, flags);
+}
+
/**
* __wake_up - wake up threads blocked on a waitqueue.
* @q: the waitqueue
Index: wli-2.6.6-rc3-mm1/mm/filemap.c
===================================================================
--- wli-2.6.6-rc3-mm1.orig/mm/filemap.c 2004-04-30 15:06:49.000000000 -0700
+++ wli-2.6.6-rc3-mm1/mm/filemap.c 2004-04-30 19:50:33.000000000 -0700
@@ -307,16 +307,16 @@
void fastcall wait_on_page_bit(struct page *page, int bit_nr)
{
wait_queue_head_t *waitqueue = page_waitqueue(page);
- DEFINE_WAIT(wait);
+ DEFINE_FILTERED_WAIT(wait, page);

do {
- prepare_to_wait(waitqueue, &wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_wait(waitqueue, &wait.wait, TASK_UNINTERRUPTIBLE);
if (test_bit(bit_nr, &page->flags)) {
sync_page(page);
io_schedule();
}
} while (test_bit(bit_nr, &page->flags));
- finish_wait(waitqueue, &wait);
+ finish_wait(waitqueue, &wait.wait);
}

EXPORT_SYMBOL(wait_on_page_bit);
@@ -344,7 +344,7 @@
BUG();
smp_mb__after_clear_bit();
if (waitqueue_active(waitqueue))
- wake_up_all(waitqueue);
+ wake_up_filtered(waitqueue, page);
}

EXPORT_SYMBOL(unlock_page);
@@ -363,7 +363,7 @@
smp_mb__after_clear_bit();
}
if (waitqueue_active(waitqueue))
- wake_up_all(waitqueue);
+ wake_up_filtered(waitqueue, page);
}

EXPORT_SYMBOL(end_page_writeback);
@@ -379,16 +379,16 @@
void fastcall __lock_page(struct page *page)
{
wait_queue_head_t *wqh = page_waitqueue(page);
- DEFINE_WAIT(wait);
+ DEFINE_FILTERED_WAIT(wait, page);

while (TestSetPageLocked(page)) {
- prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_wait(wqh, &wait.wait, TASK_UNINTERRUPTIBLE);
if (PageLocked(page)) {
sync_page(page);
io_schedule();
}
}
- finish_wait(wqh, &wait);
+ finish_wait(wqh, &wait.wait);
}

EXPORT_SYMBOL(__lock_page);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/