Re: [patch-2.3.50-pre2] bdget() needs to be exported

From: Mike Galbraith (mikeg@weiden.de)
Date: Wed Mar 08 2000 - 10:49:17 EST


On Wed, 8 Mar 2000, Tigran Aivazian wrote:

> On Wed, 8 Mar 2000, Mike Galbraith wrote:
> > OK, I think I have it fixed, and would _really_ appreciate it if you would
> > test/confirm. (would very much like to see this problem finally die..
> > and stay dead long enough for someone to actually use the device :)
>
> Hi Mike,
>
> I tested your patch and it works ok (no initrd, just plain ramdisk) but I
> have questions/comments:

Thanks for testing.

There's still a problem. If the ramdisk has been used but is idle when
kswapd decides to shrink the icache, it (rightfully I think) victimizes
the ramdisk's inode. That triggers BUG() in bdput() ala..

kernel BUG at block_dev.c:426!
kdb> bt
    EBP EIP Function(args)
0xc11d1f4c 0xc0131707 bdput+0x2b(0xc4ff23e0)
0xc11d1f5c 0xc013bb41 clear_inode+0xcd(0xc2400840)
0xc11d1f70 0xc013bb95 dispose_list+0x39(0xc11d1f8c, 0x5, 0x6)
0xc11d1f94 0xc013bd82 prune_icache+0xda()
0xc11d1fa0 0xc013bda8 shrink_icache_memory+0x1c(0x6, 0x4, 0xc028fc2c, 0x6, 0x4)
0xc11d1fcc 0xc0127265 do_try_to_free_pages+0x59(0x4, 0xc028fc2c, 0xf00)
0xc11d1fec 0xc012732e kswapd+0x7e(0x0, 0x78, 0xc02adfd0, 0xc01d5758, 0xc01d576c)
0xc1175f94 0xc0107598 kernel_thread+0x28)
0xc1174000 0x0 kernel_thread+0x3fef8a90)
0x1 0x100 kernel_thread

I revised the protections to igrab()/iput() the inode, and it seems ok.

> 1. I believe there is no need to check return of rbh = getblk() as
> getblk() never returns NULL. This is why things like bread()/breada()
> never check the return of getblk().

Looks right.. removed in the latest attempt. (below)

> 2. why do we mark_buffer_protected(rbh)? I thought the point was to make
> bdflush to skip them when flushing buffers but looking at bdflush()
> sources it skips !buffer_dirty() and buffer_locked() buffers and nothing
> (except obviously show_buffers()) seems to ask if(buffer_protected(bh))
> question.

Check out __refile_buffer();

> Hence my question - does mark_buffer_protected(bh) serve any purpose?

AFAIK, that's what keeps the buffers from being disposed of when they
become idle.

        -Mike

--- linux-2.3.50.virgin/drivers/block/rd.c Fri Mar 3 06:28:01 2000
+++ linux-2.3.49.test/drivers/block/rd.c Wed Mar 8 16:36:24 2000
@@ -99,6 +99,7 @@
 static int rd_blocksizes[NUM_RAMDISKS]; /* Size of 1024 byte blocks :) */
 static int rd_kbsize[NUM_RAMDISKS]; /* Size in blocks of 1024 bytes */
 static devfs_handle_t devfs_handle = NULL;
+static struct inode *rd_inode[NUM_RAMDISKS]; /* Protected device inodes */
 
 /*
  * Parameters for the boot-loading of the RAM disk. These are set by
@@ -169,11 +170,18 @@
         return ramdisk_size(str);
 }
 
+static int __init ramdisk_blocksize(char *str)
+{
+ rd_blocksize = simple_strtol(str,NULL,0);
+ return 1;
+}
+
 __setup("ramdisk_start=", ramdisk_start_setup);
 __setup("load_ramdisk=", load_ramdisk);
 __setup("prompt_ramdisk=", prompt_ramdisk);
 __setup("ramdisk=", ramdisk_size);
 __setup("ramdisk_size=", ramdisk_size2);
+__setup("ramdisk_blocksize=", ramdisk_blocksize);
 
 #endif
 
@@ -216,67 +224,16 @@
                 goto repeat;
         }
 
- /*
- * This has become somewhat more complicated with the addition of
- * the page cache. The problem is that in some cases the furnished
- * buffer is "real", i.e., part of the existing ramdisk, while in
- * others it is "unreal", e.g., part of a page. In the first case
- * not much needs to be done, while in the second, some kind of
- * transfer is needed.
- *
- * The two cases are distinguished here by checking whether the
- * real buffer is already in the buffer cache, and whether it is
- * the same as the one supplied.
- *
- * There are three cases with read/write to consider:
- *
- * 1. Supplied buffer matched one in the buffer cache:
- * Read - Clear the buffer, as it wasn't already valid.
- * Write - Mark the buffer as "Protected".
- *
- * 2. Supplied buffer mismatched one in the buffer cache:
- * Read - Copy the data from the buffer cache entry.
- * Write - Copy the data to the buffer cache entry.
- *
- * 3 No buffer cache entry existed:
- * Read - Clear the supplied buffer, but do not create a real
- * one.
- * Write - Create a real buffer, copy the data to it, and mark
- * it as "Protected".
- *
- * NOTE: There seems to be some schizophrenia here - the logic
- * using "len" seems to assume arbitrary request lengths, while
- * the "protect" logic assumes a single buffer cache entry.
- * This seems to be left over from the ancient contiguous ramdisk
- * logic.
- */
-
         sbh = CURRENT->bh;
- rbh = get_hash_table(sbh->b_dev, sbh->b_blocknr, sbh->b_size);
- if (sbh == rbh) {
- if (CURRENT->cmd == READ)
- memset(CURRENT->buffer, 1, len);
- } else if (rbh) {
- if (CURRENT->cmd == READ)
+ rbh = getblk(sbh->b_dev, sbh->b_blocknr, sbh->b_size);
+ if (CURRENT->cmd == READ) {
+ if (sbh != rbh)
                         memcpy(CURRENT->buffer, rbh->b_data, rbh->b_size);
- else
+ } else
+ if (sbh != rbh)
                         memcpy(rbh->b_data, CURRENT->buffer, rbh->b_size);
- } else { /* !rbh */
- if (CURRENT->cmd == READ)
- memset(sbh->b_data, 2, len);
- else {
- rbh = getblk(sbh->b_dev, sbh->b_blocknr, sbh->b_size);
- if (rbh)
- memcpy(rbh->b_data, CURRENT->buffer,
- rbh->b_size);
- else
- BUG(); /* No buffer, what to do here? */
- }
- }
- if (rbh) {
- mark_buffer_protected(rbh);
- brelse(rbh);
- }
+ mark_buffer_protected(rbh);
+ brelse(rbh);
 
         end_request(1);
         goto repeat;
@@ -372,6 +329,14 @@
         if (DEVICE_NR(inode->i_rdev) >= NUM_RAMDISKS)
                 return -ENXIO;
 
+ /*
+ * Immunize device against invalidate_buffers() and prune_icache().
+ */
+ if (rd_inode[DEVICE_NR(inode->i_rdev)] == NULL) {
+ if((rd_inode[DEVICE_NR(inode->i_rdev)] = igrab(inode)) != NULL)
+ atomic_inc(&rd_inode[DEVICE_NR(inode->i_rdev)]->i_bdev->bd_openers);
+ }
+
         MOD_INC_USE_COUNT;
 
         return 0;
@@ -389,24 +354,31 @@
         ioctl: rd_ioctl,
 };
 
+#ifdef MODULE
 /* Before freeing the module, invalidate all of the protected buffers! */
 static void __exit rd_cleanup (void)
 {
         int i;
 
         for (i = 0 ; i < NUM_RAMDISKS; i++) {
- struct block_device *bdev;
- bdev = bdget(kdev_t_to_nr(MKDEV(MAJOR_NR,i)));
- atomic_dec(&bdev->bd_openers);
+ if (rd_inode[i]) {
+ /* withdraw invalidate_buffers() and prune_icache() immunity */
+ atomic_dec(&rd_inode[i]->i_bdev->bd_openers);
+ /* remove stale pointer to module address space */
+ rd_inode[i]->i_bdev->bd_op = NULL;
+ iput(rd_inode[i]);
+ }
                 destroy_buffers(MKDEV(MAJOR_NR, i));
         }
 
         devfs_unregister (devfs_handle);
         unregister_blkdev( MAJOR_NR, "ramdisk" );
         blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
+ hardsect_size[MAJOR_NR] = NULL;
         blksize_size[MAJOR_NR] = NULL;
         blk_size[MAJOR_NR] = NULL;
 }
+#endif
 
 /* This is the registration and initialization section of the RAM disk driver */
 int __init rd_init (void)
@@ -441,21 +413,16 @@
                                S_IFBLK | S_IRUSR | S_IWUSR, 0, 0,
                                &fd_fops, NULL);
 
- hardsect_size[MAJOR_NR] = rd_hardsec; /* Size of the RAM disk blocks */
- blksize_size[MAJOR_NR] = rd_blocksizes; /* Avoid set_blocksize() check */
-
- for (i = 0; i < NUM_RAMDISKS; i++) {
- struct block_device *bdev;
+ for (i = 0; i < NUM_RAMDISKS; i++)
                 register_disk(NULL, MKDEV(MAJOR_NR,i), 1, &fd_fops, rd_size<<1);
- bdev = bdget(kdev_t_to_nr(MKDEV(MAJOR_NR,i)));
- atomic_inc(&bdev->bd_openers); /* avoid invalidate_buffers() */
- }
 
 #ifdef CONFIG_BLK_DEV_INITRD
         /* We ought to separate initrd operations here */
         register_disk(NULL, MKDEV(MAJOR_NR,INITRD_MINOR), 1, &fd_fops, rd_size<<1);
 #endif
 
+ hardsect_size[MAJOR_NR] = rd_hardsec; /* Size of the RAM disk blocks */
+ blksize_size[MAJOR_NR] = rd_blocksizes; /* Avoid set_blocksize() check */
         blk_size[MAJOR_NR] = rd_kbsize; /* Size of the RAM disk in kB */
 
                 /* rd_size is given in kB */
@@ -468,8 +435,8 @@
 
 #ifdef MODULE
 module_init(rd_init);
-#endif
 module_exit(rd_cleanup);
+#endif
 
 /* loadable module support */
 MODULE_PARM (rd_size, "1i");

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Wed Mar 15 2000 - 21:00:13 EST