[patch 9/13] direct_io mopup

From: Andrew Morton (akpm@zip.com.au)
Date: Wed Jul 17 2002 - 00:30:05 EST


Some cleanup from the surprise direct-to-bio for O_DIRECT merge.

- Remove bits and pieces from the kiobuf implementation

- Replace the waitqueue in struct dio with just a task_struct pointer
  and use wake_up_process. (Ben).

- Only take mmap_sem around the individual calls to get_user_pages().
   (It pins the vmas, yes?)

- Remove some debug code.

- Fix JFS.

 fs/buffer.c | 49 -------------------------------
 fs/direct-io.c | 31 +++++--------------
 fs/fcntl.c | 17 ----------
 fs/file_table.c | 3 -
 fs/jfs/inode.c | 7 +---
 fs/open.c | 11 -------
 include/linux/fs.h | 5 ---
 mm/filemap.c | 83 -----------------------------------------------------
 8 files changed, 12 insertions(+), 194 deletions(-)

--- 2.5.26/fs/direct-io.c~direct-io-wrapup Tue Jul 16 21:46:47 2002
+++ 2.5.26-akpm/fs/direct-io.c Tue Jul 16 21:46:47 2002
@@ -1,5 +1,5 @@
 /*
- * mm/direct-io.c
+ * fs/direct-io.c
  *
  * Copyright (C) 2002, Linus Torvalds.
  *
@@ -61,7 +61,7 @@ struct dio {
         atomic_t bio_count;
         spinlock_t bio_list_lock;
         struct bio *bio_list; /* singly linked via bi_private */
- wait_queue_head_t wait_q;
+ struct task_struct *waiter;
 };
 
 /*
@@ -81,6 +81,7 @@ static int dio_refill_pages(struct dio *
         int nr_pages;
 
         nr_pages = min(dio->total_pages - dio->curr_page, DIO_PAGES);
+ down_read(&current->mm->mmap_sem);
         ret = get_user_pages(
                 current, /* Task for fault acounting */
                 current->mm, /* whose pages? */
@@ -90,6 +91,7 @@ static int dio_refill_pages(struct dio *
                 0, /* force (?) */
                 &dio->pages[0],
                 NULL); /* vmas */
+ up_read(&current->mm->mmap_sem);
 
         if (ret >= 0) {
                 dio->curr_user_address += ret * PAGE_SIZE;
@@ -139,7 +141,7 @@ static void dio_bio_end_io(struct bio *b
         bio->bi_private = dio->bio_list;
         dio->bio_list = bio;
         spin_unlock_irqrestore(&dio->bio_list_lock, flags);
- wake_up(&dio->wait_q);
+ wake_up_process(dio->waiter);
 }
 
 static int
@@ -193,13 +195,11 @@ static void dio_cleanup(struct dio *dio)
  */
 static struct bio *dio_await_one(struct dio *dio)
 {
- DECLARE_WAITQUEUE(wait, current);
         unsigned long flags;
         struct bio *bio;
 
         spin_lock_irqsave(&dio->bio_list_lock, flags);
         while (dio->bio_list == NULL) {
- add_wait_queue(&dio->wait_q, &wait);
                 set_current_state(TASK_UNINTERRUPTIBLE);
                 if (dio->bio_list == NULL) {
                         spin_unlock_irqrestore(&dio->bio_list_lock, flags);
@@ -208,7 +208,6 @@ static struct bio *dio_await_one(struct
                         spin_lock_irqsave(&dio->bio_list_lock, flags);
                 }
                 set_current_state(TASK_RUNNING);
- remove_wait_queue(&dio->wait_q, &wait);
         }
         bio = dio->bio_list;
         dio->bio_list = bio->bi_private;
@@ -224,23 +223,17 @@ static int dio_bio_complete(struct dio *
         const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
         struct bio_vec *bvec = bio->bi_io_vec;
         int page_no;
- int ret = 0;
 
         for (page_no = 0; page_no < bio->bi_vcnt; page_no++) {
                 struct page *page = bvec[page_no].bv_page;
 
- if (!uptodate) {
- if (ret == 0)
- ret = -EIO;
- }
-
                 if (dio->rw == READ)
                         set_page_dirty(page);
                 page_cache_release(page);
         }
         atomic_dec(&dio->bio_count);
         bio_put(bio);
- return ret;
+ return uptodate ? 0 : -EIO;
 }
 
 /*
@@ -265,7 +258,7 @@ static int dio_await_completion(struct d
  * to keep the memory consumption sane we periodically reap any completed BIOs
  * during the BIO generation phase.
  *
- * This also helps to limis the peak amount of pinned userspace memory.
+ * This also helps to limit the peak amount of pinned userspace memory.
  */
 static int dio_bio_reap(struct dio *dio)
 {
@@ -388,15 +381,13 @@ out:
         return ret;
 }
 
-struct dio *g_dio;
-
 int
 generic_direct_IO(int rw, struct inode *inode, char *buf, loff_t offset,
                         size_t count, get_block_t get_block)
 {
         const unsigned blocksize_mask = (1 << inode->i_blkbits) - 1;
         const unsigned long user_addr = (unsigned long)buf;
- int ret = 0;
+ int ret;
         int ret2;
         struct dio dio;
         size_t bytes;
@@ -407,8 +398,6 @@ generic_direct_IO(int rw, struct inode *
                 goto out;
         }
 
- g_dio = &dio;
-
         /* BIO submission state */
         dio.bio = NULL;
         dio.bvec = NULL;
@@ -444,11 +433,9 @@ generic_direct_IO(int rw, struct inode *
         atomic_set(&dio.bio_count, 0);
         spin_lock_init(&dio.bio_list_lock);
         dio.bio_list = NULL;
- init_waitqueue_head(&dio.wait_q);
+ dio.waiter = current;
 
- down_read(&current->mm->mmap_sem);
         ret = do_direct_IO(&dio);
- up_read(&current->mm->mmap_sem);
 
         if (dio.bio)
                 dio_bio_submit(&dio);
--- 2.5.26/fs/buffer.c~direct-io-wrapup Tue Jul 16 21:46:47 2002
+++ 2.5.26-akpm/fs/buffer.c Tue Jul 16 21:46:47 2002
@@ -2309,55 +2309,6 @@ sector_t generic_block_bmap(struct addre
         return tmp.b_blocknr;
 }
 
-#if 0
-int generic_direct_IO(int rw, struct inode *inode,
- struct kiobuf *iobuf, unsigned long blocknr,
- int blocksize, get_block_t *get_block)
-{
- int i, nr_blocks, retval = 0;
- sector_t *blocks = iobuf->blocks;
- struct block_device *bdev = NULL;
-
- nr_blocks = iobuf->length / blocksize;
- /* build the blocklist */
- for (i = 0; i < nr_blocks; i++, blocknr++) {
- struct buffer_head bh;
-
- bh.b_state = 0;
- bh.b_size = blocksize;
-
- retval = get_block(inode, blocknr, &bh, rw & 1);
- if (retval)
- goto out;
-
- if (rw == READ) {
- if (buffer_new(&bh))
- BUG();
- if (!buffer_mapped(&bh)) {
- /* there was an hole in the filesystem */
- blocks[i] = -1UL;
- continue;
- }
- } else {
- if (buffer_new(&bh))
- unmap_underlying_metadata(bh.b_bdev,
- bh.b_blocknr);
- if (!buffer_mapped(&bh))
- BUG();
- }
- blocks[i] = bh.b_blocknr;
- bdev = bh.b_bdev;
- }
-
- /* This does not understand multi-device filesystems currently */
- if (bdev)
- retval = brw_kiovec(rw, 1, &iobuf, bdev, blocks, blocksize);
-
- out:
- return retval;
-}
-#endif
-
 /*
  * Start I/O on a physical range of kernel memory, defined by a vector
  * of kiobuf structs (much like a user-space iovec list).
--- 2.5.26/include/linux/fs.h~direct-io-wrapup Tue Jul 16 21:46:47 2002
+++ 2.5.26-akpm/include/linux/fs.h Tue Jul 16 21:59:32 2002
@@ -274,7 +274,6 @@ struct iattr {
  */
 struct page;
 struct address_space;
-struct kiobuf;
 
 struct address_space_operations {
         int (*writepage)(struct page *);
@@ -493,10 +492,6 @@ struct file {
 
         /* needed for tty driver, and maybe others */
         void *private_data;
-
- /* preallocated helper kiobuf to speedup O_DIRECT */
- struct kiobuf *f_iobuf;
- long f_iobuf_lock;
 };
 extern spinlock_t files_lock;
 #define file_list_lock() spin_lock(&files_lock);
--- 2.5.26/fs/fcntl.c~direct-io-wrapup Tue Jul 16 21:46:47 2002
+++ 2.5.26-akpm/fs/fcntl.c Tue Jul 16 21:46:47 2002
@@ -248,23 +248,6 @@ static int setfl(int fd, struct file * f
                 if (!inode->i_mapping || !inode->i_mapping->a_ops ||
                         !inode->i_mapping->a_ops->direct_IO)
                                 return -EINVAL;
-
- /*
- * alloc_kiovec() can sleep and we are only serialized by
- * the big kernel lock here, so abuse the i_sem to serialize
- * this case too. We of course wouldn't need to go deep down
- * to the inode layer, we could stay at the file layer, but
- * we don't want to pay for the memory of a semaphore in each
- * file structure too and we use the inode semaphore that we just
- * pay for anyways.
- */
- error = 0;
- down(&inode->i_sem);
- if (!filp->f_iobuf)
- error = alloc_kiovec(1, &filp->f_iobuf);
- up(&inode->i_sem);
- if (error < 0)
- return error;
         }
 
         /* required for strict SunOS emulation */
--- 2.5.26/fs/file_table.c~direct-io-wrapup Tue Jul 16 21:46:47 2002
+++ 2.5.26-akpm/fs/file_table.c Tue Jul 16 21:59:32 2002
@@ -115,9 +115,6 @@ void __fput(struct file * file)
 
         locks_remove_flock(file);
 
- if (file->f_iobuf)
- free_kiovec(1, &file->f_iobuf);
-
         if (file->f_op && file->f_op->release)
                 file->f_op->release(inode, file);
         fops_put(file->f_op);
--- 2.5.26/fs/open.c~direct-io-wrapup Tue Jul 16 21:46:47 2002
+++ 2.5.26-akpm/fs/open.c Tue Jul 16 21:46:47 2002
@@ -647,15 +647,6 @@ struct file *dentry_open(struct dentry *
         f->f_op = fops_get(inode->i_fop);
         file_move(f, &inode->i_sb->s_files);
 
- /* preallocate kiobuf for O_DIRECT */
- f->f_iobuf = NULL;
- f->f_iobuf_lock = 0;
- if (f->f_flags & O_DIRECT) {
- error = alloc_kiovec(1, &f->f_iobuf);
- if (error)
- goto cleanup_all;
- }
-
         if (f->f_op && f->f_op->open) {
                 error = f->f_op->open(inode,f);
                 if (error)
@@ -675,8 +666,6 @@ struct file *dentry_open(struct dentry *
         return f;
 
 cleanup_all:
- if (f->f_iobuf)
- free_kiovec(1, &f->f_iobuf);
         fops_put(f->f_op);
         if (f->f_mode & FMODE_WRITE)
                 put_write_access(inode);
--- 2.5.26/mm/filemap.c~direct-io-wrapup Tue Jul 16 21:46:47 2002
+++ 2.5.26-akpm/mm/filemap.c Tue Jul 16 21:59:37 2002
@@ -1102,89 +1102,6 @@ no_cached_page:
         UPDATE_ATIME(inode);
 }
 
-#if 0
-static ssize_t generic_file_direct_IO(int rw, struct file * filp, char * buf, size_t count, loff_t offset)
-{
- ssize_t retval;
- int new_iobuf, chunk_size, blocksize_mask, blocksize, blocksize_bits, iosize, progress;
- struct kiobuf * iobuf;
- struct address_space * mapping = filp->f_dentry->d_inode->i_mapping;
- struct inode * inode = mapping->host;
-
- new_iobuf = 0;
- iobuf = filp->f_iobuf;
- if (test_and_set_bit(0, &filp->f_iobuf_lock)) {
- /*
- * A parallel read/write is using the preallocated iobuf
- * so just run slow and allocate a new one.
- */
- retval = alloc_kiovec(1, &iobuf);
- if (retval)
- goto out;
- new_iobuf = 1;
- }
-
- blocksize = 1 << inode->i_blkbits;
- blocksize_bits = inode->i_blkbits;
- blocksize_mask = blocksize - 1;
- chunk_size = KIO_MAX_ATOMIC_IO << 10;
-
- retval = -EINVAL;
- if ((offset & blocksize_mask) || (count & blocksize_mask))
- goto out_free;
-
- /*
- * Flush to disk exclusively the _data_, metadata must remain
- * completly asynchronous or performance will go to /dev/null.
- */
- retval = filemap_fdatawait(mapping);
- if (retval == 0)
- retval = filemap_fdatawrite(mapping);
- if (retval == 0)
- retval = filemap_fdatawait(mapping);
- if (retval < 0)
- goto out_free;
-
- progress = retval = 0;
- while (count > 0) {
- iosize = count;
- if (iosize > chunk_size)
- iosize = chunk_size;
-
- retval = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize);
- if (retval)
- break;
-
- retval = mapping->a_ops->direct_IO(rw, inode, iobuf, (offset+progress) >> blocksize_bits, blocksize);
-
- if (rw == READ && retval > 0)
- mark_dirty_kiobuf(iobuf, retval);
-
- if (retval >= 0) {
- count -= retval;
- buf += retval;
- progress += retval;
- }
-
- unmap_kiobuf(iobuf);
-
- if (retval != iosize)
- break;
- }
-
- if (progress)
- retval = progress;
-
- out_free:
- if (!new_iobuf)
- clear_bit(0, &filp->f_iobuf_lock);
- else
- free_kiovec(1, &iobuf);
- out:
- return retval;
-}
-#endif
-
 int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
 {
         char *kaddr;
--- 2.5.26/fs/jfs/inode.c~direct-io-wrapup Tue Jul 16 21:46:47 2002
+++ 2.5.26-akpm/fs/jfs/inode.c Tue Jul 16 21:46:47 2002
@@ -293,11 +293,10 @@ static int jfs_bmap(struct address_space
         return generic_block_bmap(mapping, block, jfs_get_block);
 }
 
-static int jfs_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf,
- unsigned long blocknr, int blocksize)
+static int jfs_direct_IO(int rw, struct inode *inode, char *buf,
+ loff_t offset, size_t count)
 {
- return generic_direct_IO(rw, inode, iobuf, blocknr,
- blocksize, jfs_get_block);
+ return generic_direct_IO(rw, inode, buf, offset, count, jfs_get_block);
 }
 
 struct address_space_operations jfs_aops = {

.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Tue Jul 23 2002 - 22:00:22 EST