[PATCH] per-backing dev unplugging #2

From: Jens Axboe
Date: Thu Mar 11 2004 - 03:38:59 EST


Hi,

Final version, unless something stupid pops up. Changes:

- Adapt to 2.6.4-mm1
- Cleaned up the dm bits, much nicer with the lockless unplugging
(thanks Joe)
- md and loop unplugging, stacked devices should unplug their targets.
Otherwise they'll end up waiting for the unplug timer, which sucks.
- XFS fixed up, I hope. XFS folks still encouraged to look at this,
looks better this time around though (and works, I tested).
- blk_run_* inlined in blkdev.h

Against 2.6.4-mm1 (note you need other attached patch to boot it).

diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/drivers/block/ll_rw_blk.c linux-2.6.4-mm1/drivers/block/ll_rw_blk.c
--- /opt/kernel/linux-2.6.4-mm1/drivers/block/ll_rw_blk.c 2004-03-11 09:10:00.000000000 +0100
+++ linux-2.6.4-mm1/drivers/block/ll_rw_blk.c 2004-03-11 09:07:12.000000000 +0100
@@ -42,12 +42,6 @@
*/
static kmem_cache_t *request_cachep;

-/*
- * plug management
- */
-static LIST_HEAD(blk_plug_list);
-static spinlock_t blk_plug_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
-
static wait_queue_head_t congestion_wqh[2];

/*
@@ -248,8 +242,6 @@
*/
blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);

- INIT_LIST_HEAD(&q->plug_list);
-
blk_queue_activity_fn(q, NULL, NULL);
}

@@ -1101,13 +1093,11 @@
* don't plug a stopped queue, it must be paired with blk_start_queue()
* which will restart the queueing
*/
- if (!blk_queue_plugged(q)
- && !test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)) {
- spin_lock(&blk_plug_lock);
- list_add_tail(&q->plug_list, &blk_plug_list);
+ if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))
+ return;
+
+ if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
- spin_unlock(&blk_plug_lock);
- }
}

EXPORT_SYMBOL(blk_plug_device);
@@ -1119,15 +1109,12 @@
int blk_remove_plug(request_queue_t *q)
{
WARN_ON(!irqs_disabled());
- if (blk_queue_plugged(q)) {
- spin_lock(&blk_plug_lock);
- list_del_init(&q->plug_list);
- del_timer(&q->unplug_timer);
- spin_unlock(&blk_plug_lock);
- return 1;
- }

- return 0;
+ if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
+ return 0;
+
+ del_timer(&q->unplug_timer);
+ return 1;
}

EXPORT_SYMBOL(blk_remove_plug);
@@ -1158,14 +1145,11 @@
* Linux uses plugging to build bigger requests queues before letting
* the device have at them. If a queue is plugged, the I/O scheduler
* is still adding and merging requests on the queue. Once the queue
- * gets unplugged (either by manually calling this function, or by
- * calling blk_run_queues()), the request_fn defined for the
- * queue is invoked and transfers started.
+ * gets unplugged, the request_fn defined for the queue is invoked and
+ * transfers started.
**/
-void generic_unplug_device(void *data)
+void generic_unplug_device(request_queue_t *q)
{
- request_queue_t *q = data;
-
spin_lock_irq(q->queue_lock);
__generic_unplug_device(q);
spin_unlock_irq(q->queue_lock);
@@ -1173,9 +1157,23 @@

EXPORT_SYMBOL(generic_unplug_device);

+static inline void blk_backing_dev_unplug(struct backing_dev_info *bdi)
+{
+ request_queue_t *q = bdi->unplug_io_data;
+
+ /*
+ * devices don't necessarily have an ->unplug_fn defined
+ */
+ if (q->unplug_fn)
+ q->unplug_fn(q);
+}
+
+EXPORT_SYMBOL(blk_backing_dev_unplug);
+
static void blk_unplug_work(void *data)
{
request_queue_t *q = data;
+
q->unplug_fn(q);
}

@@ -1253,42 +1251,6 @@
EXPORT_SYMBOL(blk_run_queue);

/**
- * blk_run_queues - fire all plugged queues
- *
- * Description:
- * Start I/O on all plugged queues known to the block layer. Queues that
- * are currently stopped are ignored. This is equivalent to the older
- * tq_disk task queue run.
- **/
-#define blk_plug_entry(entry) list_entry((entry), request_queue_t, plug_list)
-void blk_run_queues(void)
-{
- LIST_HEAD(local_plug_list);
-
- spin_lock_irq(&blk_plug_lock);
-
- /*
- * this will happen fairly often
- */
- if (list_empty(&blk_plug_list))
- goto out;
-
- list_splice_init(&blk_plug_list, &local_plug_list);
-
- while (!list_empty(&local_plug_list)) {
- request_queue_t *q = blk_plug_entry(local_plug_list.next);
-
- spin_unlock_irq(&blk_plug_lock);
- q->unplug_fn(q);
- spin_lock_irq(&blk_plug_lock);
- }
-out:
- spin_unlock_irq(&blk_plug_lock);
-}
-
-EXPORT_SYMBOL(blk_run_queues);
-
-/**
* blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
* @q: the request queue to be released
*
@@ -1393,6 +1355,10 @@
memset(q, 0, sizeof(*q));
init_timer(&q->unplug_timer);
atomic_set(&q->refcnt, 1);
+
+ q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
+ q->backing_dev_info.unplug_io_data = q;
+
return q;
}

@@ -2046,7 +2012,6 @@
DEFINE_WAIT(wait);
wait_queue_head_t *wqh = &congestion_wqh[rw];

- blk_run_queues();
prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
ret = io_schedule_timeout(timeout);
finish_wait(wqh, &wait);
@@ -2302,7 +2267,7 @@
if (blk_queue_plugged(q)) {
int nr_queued = q->rq.count[READ] + q->rq.count[WRITE];

- if (nr_queued == q->unplug_thresh)
+ if (nr_queued == q->unplug_thresh || bio_sync(bio))
__generic_unplug_device(q);
}
spin_unlock_irq(q->queue_lock);
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/drivers/block/loop.c linux-2.6.4-mm1/drivers/block/loop.c
--- /opt/kernel/linux-2.6.4-mm1/drivers/block/loop.c 2004-03-11 09:10:00.000000000 +0100
+++ linux-2.6.4-mm1/drivers/block/loop.c 2004-03-11 09:07:12.000000000 +0100
@@ -434,6 +434,17 @@
goto out;
}

+/*
+ * kick off io on the underlying address space
+ */
+static void loop_unplug(request_queue_t *q)
+{
+ struct loop_device *lo = q->queuedata;
+
+ clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
+ blk_run_address_space(lo->lo_backing_file->f_mapping);
+}
+
struct switch_request {
struct file *file;
struct completion wait;
@@ -614,7 +625,6 @@
{
struct file *file;
struct inode *inode;
- struct block_device *lo_device = NULL;
struct address_space *mapping;
unsigned lo_blocksize;
int lo_flags = 0;
@@ -671,7 +681,7 @@
set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);

lo->lo_blocksize = lo_blocksize;
- lo->lo_device = lo_device;
+ lo->lo_device = bdev;
lo->lo_flags = lo_flags;
lo->lo_backing_file = file;
lo->transfer = NULL;
@@ -689,6 +699,7 @@
*/
blk_queue_make_request(lo->lo_queue, loop_make_request);
lo->lo_queue->queuedata = lo;
+ lo->lo_queue->unplug_fn = loop_unplug;

set_capacity(disks[lo->lo_number], size);

diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/drivers/md/dm.c linux-2.6.4-mm1/drivers/md/dm.c
--- /opt/kernel/linux-2.6.4-mm1/drivers/md/dm.c 2004-03-11 09:10:01.000000000 +0100
+++ linux-2.6.4-mm1/drivers/md/dm.c 2004-03-11 09:07:12.000000000 +0100
@@ -575,6 +575,14 @@
return 0;
}

+static void dm_unplug_all(request_queue_t *q)
+{
+ struct mapped_device *md = q->queuedata;
+
+ clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
+ dm_table_unplug_all(md->map);
+}
+
static int dm_any_congested(void *congested_data, int bdi_bits)
{
int r;
@@ -672,6 +680,7 @@
md->queue->backing_dev_info.congested_fn = dm_any_congested;
md->queue->backing_dev_info.congested_data = md;
blk_queue_make_request(md->queue, dm_request);
+ md->queue->unplug_fn = dm_unplug_all;

md->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab,
mempool_free_slab, _io_cache);
@@ -900,7 +909,7 @@
* Then we wait for the already mapped ios to
* complete.
*/
- blk_run_queues();
+ dm_table_unplug_all(md->map);
while (1) {
set_current_state(TASK_INTERRUPTIBLE);

@@ -947,7 +956,7 @@
up_write(&md->lock);
dm_table_put(map);

- blk_run_queues();
+ dm_table_unplug_all(md->map);

return 0;
}
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/drivers/md/dm-crypt.c linux-2.6.4-mm1/drivers/md/dm-crypt.c
--- /opt/kernel/linux-2.6.4-mm1/drivers/md/dm-crypt.c 2004-03-11 09:10:01.000000000 +0100
+++ linux-2.6.4-mm1/drivers/md/dm-crypt.c 2004-03-11 09:07:12.000000000 +0100
@@ -668,7 +668,7 @@

/* out of memory -> run queues */
if (remaining)
- blk_run_queues();
+ blk_congestion_wait(bio_data_dir(clone), HZ/100);
}

/* drop reference, clones could have returned before we reach this */
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/drivers/md/dm.h linux-2.6.4-mm1/drivers/md/dm.h
--- /opt/kernel/linux-2.6.4-mm1/drivers/md/dm.h 2004-03-11 09:10:01.000000000 +0100
+++ linux-2.6.4-mm1/drivers/md/dm.h 2004-03-11 09:07:12.000000000 +0100
@@ -116,6 +116,7 @@
void dm_table_suspend_targets(struct dm_table *t);
void dm_table_resume_targets(struct dm_table *t);
int dm_table_any_congested(struct dm_table *t, int bdi_bits);
+void dm_table_unplug_all(struct dm_table *t);

/*-----------------------------------------------------------------
* A registry of target types.
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/drivers/md/dm-table.c linux-2.6.4-mm1/drivers/md/dm-table.c
--- /opt/kernel/linux-2.6.4-mm1/drivers/md/dm-table.c 2004-03-11 09:10:01.000000000 +0100
+++ linux-2.6.4-mm1/drivers/md/dm-table.c 2004-03-11 09:07:12.000000000 +0100
@@ -885,6 +885,21 @@
return r;
}

+void dm_table_unplug_all(struct dm_table *t)
+{
+ struct list_head *d, *devices = dm_table_get_devices(t);
+
+ for (d = devices->next; d != devices; d = d->next) {
+ struct dm_dev *dd = list_entry(d, struct dm_dev, list);
+ request_queue_t *q = bdev_get_queue(dd->bdev);
+
+ if (q->unplug_fn) {
+ set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
+ q->unplug_fn(q);
+ }
+ }
+}
+
EXPORT_SYMBOL(dm_vcalloc);
EXPORT_SYMBOL(dm_get_device);
EXPORT_SYMBOL(dm_put_device);
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/drivers/md/md.c linux-2.6.4-mm1/drivers/md/md.c
--- /opt/kernel/linux-2.6.4-mm1/drivers/md/md.c 2004-03-11 09:10:01.000000000 +0100
+++ linux-2.6.4-mm1/drivers/md/md.c 2004-03-11 09:07:12.000000000 +0100
@@ -160,6 +160,28 @@
return 0;
}

+static void md_unplug_all(request_queue_t *q)
+{
+ mddev_t *mddev = q->queuedata;
+ struct list_head *tmp;
+ mdk_rdev_t *rdev;
+
+ clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
+
+ /*
+ * this list iteration is done without any locking in md?!
+ */
+ ITERATE_RDEV(mddev, rdev, tmp) {
+ request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
+
+ if (r_queue->unplug_fn) {
+ set_bit(QUEUE_FLAG_PLUGGED, &r_queue->queue_flags);
+ r_queue->unplug_fn(r_queue);
+ }
+ }
+
+}
+
static inline mddev_t *mddev_get(mddev_t *mddev)
{
atomic_inc(&mddev->active);
@@ -335,6 +357,8 @@
struct bio_vec vec;
struct completion event;

+ rw |= (1 << BIO_RW_SYNC);
+
bio_init(&bio);
bio.bi_io_vec = &vec;
vec.bv_page = page;
@@ -349,7 +373,6 @@
bio.bi_private = &event;
bio.bi_end_io = bi_complete;
submit_bio(rw, &bio);
- blk_run_queues();
wait_for_completion(&event);

return test_bit(BIO_UPTODATE, &bio.bi_flags);
@@ -1644,6 +1667,7 @@
*/
mddev->queue->queuedata = mddev;
mddev->queue->make_request_fn = mddev->pers->make_request;
+ mddev->queue->unplug_fn = md_unplug_all;

mddev->changed = 1;
return 0;
@@ -2718,7 +2742,7 @@
run = thread->run;
if (run) {
run(thread->mddev);
- blk_run_queues();
+ blk_run_queue(thread->mddev->queue);
}
if (signal_pending(current))
flush_signals(current);
@@ -3286,7 +3310,7 @@
test_bit(MD_RECOVERY_ERR, &mddev->recovery))
break;

- blk_run_queues();
+ blk_run_queue(mddev->queue);

repeat:
if (jiffies >= mark[last_mark] + SYNC_MARK_STEP ) {
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/drivers/md/raid1.c linux-2.6.4-mm1/drivers/md/raid1.c
--- /opt/kernel/linux-2.6.4-mm1/drivers/md/raid1.c 2004-03-11 03:55:28.000000000 +0100
+++ linux-2.6.4-mm1/drivers/md/raid1.c 2004-03-11 09:07:12.000000000 +0100
@@ -451,6 +451,7 @@

static void device_barrier(conf_t *conf, sector_t sect)
{
+ blk_run_queue(conf->mddev->queue);
spin_lock_irq(&conf->resync_lock);
wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume), conf->resync_lock);

@@ -478,6 +479,7 @@
* thread has put up a bar for new requests.
* Continue immediately if no resync is active currently.
*/
+ blk_run_queue(conf->mddev->queue);
spin_lock_irq(&conf->resync_lock);
wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock);
conf->nr_pending++;
@@ -644,6 +646,7 @@

static void close_sync(conf_t *conf)
{
+ blk_run_queue(conf->mddev->queue);
spin_lock_irq(&conf->resync_lock);
wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock);
spin_unlock_irq(&conf->resync_lock);
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/drivers/md/raid5.c linux-2.6.4-mm1/drivers/md/raid5.c
--- /opt/kernel/linux-2.6.4-mm1/drivers/md/raid5.c 2004-03-11 09:10:01.000000000 +0100
+++ linux-2.6.4-mm1/drivers/md/raid5.c 2004-03-11 09:07:12.000000000 +0100
@@ -249,6 +249,7 @@
break;
if (!sh) {
conf->inactive_blocked = 1;
+ blk_run_queue(conf->mddev->queue);
wait_event_lock_irq(conf->wait_for_stripe,
!list_empty(&conf->inactive_list) &&
(atomic_read(&conf->active_stripes) < (NR_STRIPES *3/4)
@@ -1292,9 +1293,8 @@
}
}
}
-static void raid5_unplug_device(void *data)
+static void raid5_unplug_device(request_queue_t *q)
{
- request_queue_t *q = data;
mddev_t *mddev = q->queuedata;
raid5_conf_t *conf = mddev_to_conf(mddev);
unsigned long flags;
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/drivers/md/raid6main.c linux-2.6.4-mm1/drivers/md/raid6main.c
--- /opt/kernel/linux-2.6.4-mm1/drivers/md/raid6main.c 2004-03-11 09:10:01.000000000 +0100
+++ linux-2.6.4-mm1/drivers/md/raid6main.c 2004-03-11 09:07:12.000000000 +0100
@@ -1454,9 +1454,8 @@
}
}
}
-static void raid6_unplug_device(void *data)
+static void raid6_unplug_device(request_queue_t *q)
{
- request_queue_t *q = data;
mddev_t *mddev = q->queuedata;
raid6_conf_t *conf = mddev_to_conf(mddev);
unsigned long flags;
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/drivers/mtd/devices/blkmtd.c linux-2.6.4-mm1/drivers/mtd/devices/blkmtd.c
--- /opt/kernel/linux-2.6.4-mm1/drivers/mtd/devices/blkmtd.c 2004-03-11 03:55:21.000000000 +0100
+++ linux-2.6.4-mm1/drivers/mtd/devices/blkmtd.c 2004-03-11 09:07:12.000000000 +0100
@@ -147,8 +147,7 @@
bio->bi_private = &event;
bio->bi_end_io = bi_read_complete;
if(bio_add_page(bio, page, PAGE_SIZE, 0) == PAGE_SIZE) {
- submit_bio(READ, bio);
- blk_run_queues();
+ submit_bio(READ_SYNC, bio);
wait_for_completion(&event);
err = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : -EIO;
bio_put(bio);
@@ -179,8 +178,7 @@
init_completion(&event);
bio->bi_private = &event;
bio->bi_end_io = bi_write_complete;
- submit_bio(WRITE, bio);
- blk_run_queues();
+ submit_bio(WRITE_SYNC, bio);
wait_for_completion(&event);
DEBUG(3, "submit_bio completed, bi_vcnt = %d\n", bio->bi_vcnt);
err = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : -EIO;
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/fs/buffer.c linux-2.6.4-mm1/fs/buffer.c
--- /opt/kernel/linux-2.6.4-mm1/fs/buffer.c 2004-03-11 09:10:01.948448091 +0100
+++ linux-2.6.4-mm1/fs/buffer.c 2004-03-11 09:07:12.000000000 +0100
@@ -132,7 +132,7 @@
do {
prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
if (buffer_locked(bh)) {
- blk_run_queues();
+ blk_run_address_space(bh->b_bdev->bd_inode->i_mapping);
io_schedule();
}
} while (buffer_locked(bh));
@@ -491,7 +491,6 @@
pg_data_t *pgdat;

wakeup_bdflush(1024);
- blk_run_queues();
yield();

for_each_pgdat(pgdat) {
@@ -2924,7 +2923,7 @@

int block_sync_page(struct page *page)
{
- blk_run_queues();
+ blk_run_address_space(page->mapping);
return 0;
}

diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/fs/direct-io.c linux-2.6.4-mm1/fs/direct-io.c
--- /opt/kernel/linux-2.6.4-mm1/fs/direct-io.c 2004-03-11 09:10:01.987443867 +0100
+++ linux-2.6.4-mm1/fs/direct-io.c 2004-03-11 09:07:12.000000000 +0100
@@ -364,7 +364,7 @@
if (dio->bio_list == NULL) {
dio->waiter = current;
spin_unlock_irqrestore(&dio->bio_lock, flags);
- blk_run_queues();
+ blk_run_address_space(dio->inode->i_mapping);
io_schedule();
spin_lock_irqsave(&dio->bio_lock, flags);
dio->waiter = NULL;
@@ -1035,7 +1035,7 @@
if (ret == 0)
ret = dio->result;
finished_one_bio(dio); /* This can free the dio */
- blk_run_queues();
+ blk_run_address_space(inode->i_mapping);
if (should_wait) {
unsigned long flags;
/*
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/fs/jfs/jfs_logmgr.c linux-2.6.4-mm1/fs/jfs/jfs_logmgr.c
--- /opt/kernel/linux-2.6.4-mm1/fs/jfs/jfs_logmgr.c 2004-03-11 03:55:27.000000000 +0100
+++ linux-2.6.4-mm1/fs/jfs/jfs_logmgr.c 2004-03-11 09:07:12.000000000 +0100
@@ -1972,8 +1972,7 @@

bio->bi_end_io = lbmIODone;
bio->bi_private = bp;
- submit_bio(READ, bio);
- blk_run_queues();
+ submit_bio(READ_SYNC, bio);

wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));

@@ -2117,9 +2116,8 @@

/* check if journaling to disk has been disabled */
if (!log->no_integrity) {
- submit_bio(WRITE, bio);
+ submit_bio(WRITE_SYNC, bio);
INCREMENT(lmStat.submitted);
- blk_run_queues();
}
else {
bio->bi_size = 0;
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/fs/ntfs/compress.c linux-2.6.4-mm1/fs/ntfs/compress.c
--- /opt/kernel/linux-2.6.4-mm1/fs/ntfs/compress.c 2004-03-11 03:55:27.000000000 +0100
+++ linux-2.6.4-mm1/fs/ntfs/compress.c 2004-03-11 09:07:12.000000000 +0100
@@ -668,7 +668,7 @@
"uptodate! Unplugging the disk queue "
"and rescheduling.");
get_bh(tbh);
- blk_run_queues();
+ blk_run_address_space(mapping);
schedule();
put_bh(tbh);
if (unlikely(!buffer_uptodate(tbh)))
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/fs/ufs/truncate.c linux-2.6.4-mm1/fs/ufs/truncate.c
--- /opt/kernel/linux-2.6.4-mm1/fs/ufs/truncate.c 2004-03-11 03:55:27.000000000 +0100
+++ linux-2.6.4-mm1/fs/ufs/truncate.c 2004-03-11 09:07:12.000000000 +0100
@@ -456,7 +456,7 @@
break;
if (IS_SYNC(inode) && (inode->i_state & I_DIRTY))
ufs_sync_inode (inode);
- blk_run_queues();
+ blk_run_address_space(inode->i_mapping);
yield();
}
offset = inode->i_size & uspi->s_fshift;
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/fs/xfs/linux/xfs_buf.c linux-2.6.4-mm1/fs/xfs/linux/xfs_buf.c
--- /opt/kernel/linux-2.6.4-mm1/fs/xfs/linux/xfs_buf.c 2004-03-11 03:55:21.000000000 +0100
+++ linux-2.6.4-mm1/fs/xfs/linux/xfs_buf.c 2004-03-11 09:07:12.706793571 +0100
@@ -1013,7 +1013,7 @@
{
PB_TRACE(pb, "lock", 0);
if (atomic_read(&pb->pb_io_remaining))
- blk_run_queues();
+ blk_run_address_space(pb->pb_target->pbr_mapping);
down(&pb->pb_sema);
PB_SET_OWNER(pb);
PB_TRACE(pb, "locked", 0);
@@ -1109,7 +1109,7 @@
if (atomic_read(&pb->pb_pin_count) == 0)
break;
if (atomic_read(&pb->pb_io_remaining))
- blk_run_queues();
+ blk_run_address_space(pb->pb_target->pbr_mapping);
schedule();
}
remove_wait_queue(&pb->pb_waiters, &wait);
@@ -1407,7 +1407,7 @@
if (pb->pb_flags & PBF_RUN_QUEUES) {
pb->pb_flags &= ~PBF_RUN_QUEUES;
if (atomic_read(&pb->pb_io_remaining) > 1)
- blk_run_queues();
+ blk_run_address_space(pb->pb_target->pbr_mapping);
}
}

@@ -1471,7 +1471,7 @@
{
PB_TRACE(pb, "iowait", 0);
if (atomic_read(&pb->pb_io_remaining))
- blk_run_queues();
+ blk_run_address_space(pb->pb_target->pbr_mapping);
down(&pb->pb_iodonesema);
PB_TRACE(pb, "iowaited", (long)pb->pb_error);
return pb->pb_error;
@@ -1617,7 +1617,6 @@
pagebuf_daemon(
void *data)
{
- int count;
page_buf_t *pb;
struct list_head *curr, *next, tmp;

@@ -1640,7 +1639,6 @@

spin_lock(&pbd_delwrite_lock);

- count = 0;
list_for_each_safe(curr, next, &pbd_delwrite_queue) {
pb = list_entry(curr, page_buf_t, pb_list);

@@ -1657,7 +1655,6 @@
pb->pb_flags &= ~PBF_DELWRI;
pb->pb_flags |= PBF_WRITE;
list_move(&pb->pb_list, &tmp);
- count++;
}
}

@@ -1667,12 +1664,11 @@
list_del_init(&pb->pb_list);

pagebuf_iostrategy(pb);
+ blk_run_address_space(pb->pb_target->pbr_mapping);
}

if (as_list_len > 0)
purge_addresses();
- if (count)
- blk_run_queues();

force_flush = 0;
} while (pagebuf_daemon_active);
@@ -1689,7 +1685,6 @@
page_buf_t *pb;
struct list_head *curr, *next, tmp;
int pincount = 0;
- int flush_cnt = 0;

pagebuf_runall_queues(pagebuf_dataio_workqueue);
pagebuf_runall_queues(pagebuf_logio_workqueue);
@@ -1733,14 +1728,8 @@

pagebuf_lock(pb);
pagebuf_iostrategy(pb);
- if (++flush_cnt > 32) {
- blk_run_queues();
- flush_cnt = 0;
- }
}

- blk_run_queues();
-
while (!list_empty(&tmp)) {
pb = list_entry(tmp.next, page_buf_t, pb_list);

diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/include/linux/backing-dev.h linux-2.6.4-mm1/include/linux/backing-dev.h
--- /opt/kernel/linux-2.6.4-mm1/include/linux/backing-dev.h 2004-03-11 09:10:02.000000000 +0100
+++ linux-2.6.4-mm1/include/linux/backing-dev.h 2004-03-11 09:07:12.706793571 +0100
@@ -28,6 +28,8 @@
int memory_backed; /* Cannot clean pages with writepage */
congested_fn *congested_fn; /* Function pointer if device is md/dm */
void *congested_data; /* Pointer to aux data for congested func */
+ void (*unplug_io_fn)(struct backing_dev_info *);
+ void *unplug_io_data;
};

extern struct backing_dev_info default_backing_dev_info;
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/include/linux/bio.h linux-2.6.4-mm1/include/linux/bio.h
--- /opt/kernel/linux-2.6.4-mm1/include/linux/bio.h 2004-03-11 03:55:37.000000000 +0100
+++ linux-2.6.4-mm1/include/linux/bio.h 2004-03-11 09:07:12.707793462 +0100
@@ -124,6 +124,7 @@
#define BIO_RW_AHEAD 1
#define BIO_RW_BARRIER 2
#define BIO_RW_FAILFAST 3
+#define BIO_RW_SYNC 4

/*
* various member access, note that bio_data should of course not be used
@@ -138,6 +139,7 @@
#define bio_cur_sectors(bio) (bio_iovec(bio)->bv_len >> 9)
#define bio_data(bio) (page_address(bio_page((bio))) + bio_offset((bio)))
#define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER))
+#define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC))

/*
* will die
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/include/linux/blkdev.h linux-2.6.4-mm1/include/linux/blkdev.h
--- /opt/kernel/linux-2.6.4-mm1/include/linux/blkdev.h 2004-03-11 09:10:02.000000000 +0100
+++ linux-2.6.4-mm1/include/linux/blkdev.h 2004-03-11 09:07:12.708793354 +0100
@@ -243,7 +243,7 @@
typedef void (request_fn_proc) (request_queue_t *q);
typedef int (make_request_fn) (request_queue_t *q, struct bio *bio);
typedef int (prep_rq_fn) (request_queue_t *, struct request *);
-typedef void (unplug_fn) (void *q);
+typedef void (unplug_fn) (request_queue_t *);

struct bio_vec;
typedef int (merge_bvec_fn) (request_queue_t *, struct bio *, struct bio_vec *);
@@ -315,8 +315,6 @@
unsigned long bounce_pfn;
int bounce_gfp;

- struct list_head plug_list;
-
/*
* various queue flags, see QUEUE_* below
*/
@@ -370,8 +368,9 @@
#define QUEUE_FLAG_WRITEFULL 4 /* read queue has been filled */
#define QUEUE_FLAG_DEAD 5 /* queue being torn down */
#define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */
+#define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */

-#define blk_queue_plugged(q) !list_empty(&(q)->plug_list)
+#define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)

@@ -515,7 +514,7 @@
extern void blk_start_queue(request_queue_t *q);
extern void blk_stop_queue(request_queue_t *q);
extern void __blk_stop_queue(request_queue_t *q);
-extern void blk_run_queue(request_queue_t *q);
+extern void blk_run_queue(request_queue_t *);
extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *);
extern struct request *blk_rq_map_user(request_queue_t *, int, void __user *, unsigned int);
extern int blk_rq_unmap_user(struct request *, void __user *, unsigned int, int);
@@ -526,6 +525,18 @@
return bdev->bd_disk->queue;
}

+static inline void blk_run_backing_dev(struct backing_dev_info *bdi)
+{
+ if (bdi)
+ bdi->unplug_io_fn(bdi);
+}
+
+static inline void blk_run_address_space(struct address_space *mapping)
+{
+ if (mapping)
+ blk_run_backing_dev(mapping->backing_dev_info);
+}
+
/*
* end_request() and friends. Must be called with the request queue spinlock
* acquired. All functions called within end_request() _must_be_ atomic.
@@ -572,7 +583,7 @@

extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *);
extern void blk_dump_rq_flags(struct request *, char *);
-extern void generic_unplug_device(void *);
+extern void generic_unplug_device(request_queue_t *);
extern long nr_blockdev_pages(void);

int blk_get_queue(request_queue_t *);
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/include/linux/fs.h linux-2.6.4-mm1/include/linux/fs.h
--- /opt/kernel/linux-2.6.4-mm1/include/linux/fs.h 2004-03-11 09:10:02.309408999 +0100
+++ linux-2.6.4-mm1/include/linux/fs.h 2004-03-11 09:07:12.000000000 +0100
@@ -82,6 +82,8 @@
#define WRITE 1
#define READA 2 /* read-ahead - don't block if no resources */
#define SPECIAL 4 /* For non-blockdevice requests in request queue */
+#define READ_SYNC (READ | BIO_RW_SYNC)
+#define WRITE_SYNC (WRITE | BIO_RW_SYNC)

#define SEL_IN 1
#define SEL_OUT 2
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/include/linux/raid/md_k.h linux-2.6.4-mm1/include/linux/raid/md_k.h
--- /opt/kernel/linux-2.6.4-mm1/include/linux/raid/md_k.h 2004-03-11 03:55:21.000000000 +0100
+++ linux-2.6.4-mm1/include/linux/raid/md_k.h 2004-03-11 09:07:12.000000000 +0100
@@ -326,7 +326,6 @@
if (condition) \
break; \
spin_unlock_irq(&lock); \
- blk_run_queues(); \
schedule(); \
spin_lock_irq(&lock); \
} \
@@ -341,30 +340,5 @@
__wait_event_lock_irq(wq, condition, lock); \
} while (0)

-
-#define __wait_disk_event(wq, condition) \
-do { \
- wait_queue_t __wait; \
- init_waitqueue_entry(&__wait, current); \
- \
- add_wait_queue(&wq, &__wait); \
- for (;;) { \
- set_current_state(TASK_UNINTERRUPTIBLE); \
- if (condition) \
- break; \
- blk_run_queues(); \
- schedule(); \
- } \
- current->state = TASK_RUNNING; \
- remove_wait_queue(&wq, &__wait); \
-} while (0)
-
-#define wait_disk_event(wq, condition) \
-do { \
- if (condition) \
- break; \
- __wait_disk_event(wq, condition); \
-} while (0)
-
#endif

diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/kernel/power/disk.c linux-2.6.4-mm1/kernel/power/disk.c
--- /opt/kernel/linux-2.6.4-mm1/kernel/power/disk.c 2004-03-11 03:55:21.000000000 +0100
+++ linux-2.6.4-mm1/kernel/power/disk.c 2004-03-11 09:07:12.000000000 +0100
@@ -84,7 +84,6 @@
while (shrink_all_memory(10000))
printk(".");
printk("|\n");
- blk_run_queues();
}


diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/kernel/power/pmdisk.c linux-2.6.4-mm1/kernel/power/pmdisk.c
--- /opt/kernel/linux-2.6.4-mm1/kernel/power/pmdisk.c 2004-03-11 03:55:28.000000000 +0100
+++ linux-2.6.4-mm1/kernel/power/pmdisk.c 2004-03-11 09:07:12.000000000 +0100
@@ -859,7 +859,6 @@

static void wait_io(void)
{
- blk_run_queues();
while(atomic_read(&io_done))
io_schedule();
}
@@ -895,6 +894,7 @@
goto Done;
}

+ rw |= BIO_RW_SYNC;
if (rw == WRITE)
bio_set_pages_dirty(bio);
start_io();
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/kernel/power/swsusp.c linux-2.6.4-mm1/kernel/power/swsusp.c
--- /opt/kernel/linux-2.6.4-mm1/kernel/power/swsusp.c 2004-03-11 03:55:24.000000000 +0100
+++ linux-2.6.4-mm1/kernel/power/swsusp.c 2004-03-11 09:07:12.000000000 +0100
@@ -707,11 +707,6 @@

free_some_memory();

- /* No need to invalidate any vfsmnt list --
- * they will be valid after resume, anyway.
- */
- blk_run_queues();
-
/* Save state of all device drivers, and stop them. */
if ((res = device_suspend(4))==0)
/* If stopping device drivers worked, we proceed basically into
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/mm/mempool.c linux-2.6.4-mm1/mm/mempool.c
--- /opt/kernel/linux-2.6.4-mm1/mm/mempool.c 2004-03-11 03:55:34.000000000 +0100
+++ linux-2.6.4-mm1/mm/mempool.c 2004-03-11 09:07:12.000000000 +0100
@@ -233,8 +233,6 @@
if (!(gfp_mask & __GFP_WAIT))
return NULL;

- blk_run_queues();
-
prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE);
if (!pool->curr_nr)
io_schedule();
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/mm/readahead.c linux-2.6.4-mm1/mm/readahead.c
--- /opt/kernel/linux-2.6.4-mm1/mm/readahead.c 2004-03-11 09:10:02.452393513 +0100
+++ linux-2.6.4-mm1/mm/readahead.c 2004-03-11 09:07:12.000000000 +0100
@@ -15,9 +15,14 @@
#include <linux/backing-dev.h>
#include <linux/pagevec.h>

+static void default_unplug_io_fn(struct backing_dev_info *bdi)
+{
+}
+
struct backing_dev_info default_backing_dev_info = {
.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE,
.state = 0,
+ .unplug_io_fn = default_unplug_io_fn,
};

EXPORT_SYMBOL_GPL(default_backing_dev_info);

--
Jens Axboe

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/