Re: [PATCH] writeback_inodes can race with unmount

From: Andrew Morton
Date: Tue Jun 08 2004 - 22:24:32 EST


Chris Mason <mason@xxxxxxxx> wrote:
>
> On Tue, 2004-06-08 at 17:56, Andrew Morton wrote:
> > Chris Mason <mason@xxxxxxxx> wrote:
>
> > > > Why don't we have the same race in the sync() path as well? Moving the
> > > > locking to sync_sb_inodes() itself would fix it also.
> > >
> > > In the sync() path we're already taking a read lock on s_umount sem.
> > > Moving the locking into sync_sb_inodes would be tricky, it is sometimes
> > > called with the write lock on s_umount_sem held and sometimes with a
> > > read lock.
> > >
> >
> > Plus we'd be dead if we had to do the above. If that read_trylock() fails
> > the sync() will forget to sync stuff.
> >
> > And, contra your description, we'll fail the trylock relatively frequently
> > - when some other process is writing back this superblock.
>
> The write lock is taken much less frequently. Should be just on
> mount/unmount no?

OK.

> So, it looks like we get this:
>
> CPU 0 CPU 1
> writeback_inodes generic_shutdown_super
> sync_sb_inodes
> iget(inode)
> spin_unlock(&inode_lock)
> sop->put_super(sb)
> iput(inode)
> generic_delete_inode()

We really shouldn't have got to ->put_super() when there are still live
inodes around. I'd say that the problem lies on the umount path. 2.4
might have the same problem, but it'll be much harder to hit.

Something like this? If so, we should probably lock the inode in
prune_icache() and remove iprune_sem.


--- 25/fs/inode.c~a 2004-06-08 20:06:41.905455400 -0700
+++ 25-akpm/fs/inode.c 2004-06-08 20:19:40.300121424 -0700
@@ -294,10 +294,11 @@ static void dispose_list(struct list_hea
/*
* Invalidate all inodes for a device.
*/
-static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose)
+static int invalidate_list(struct list_head *head, struct super_block *sb,
+ struct list_head *dispose)
{
struct list_head *next;
- int busy = 0, count = 0;
+ int ret = 0, count = 0;

next = head->next;
for (;;) {
@@ -311,6 +312,17 @@ static int invalidate_list(struct list_h
if (inode->i_sb != sb)
continue;
invalidate_inode_buffers(inode);
+ if (inode->i_state & I_LOCK) {
+ __iget(inode);
+ inodes_stat.nr_unused -= count;
+ count = 0;
+ spin_unlock(&inode_lock);
+ wait_on_inode(inode);
+ iput(inode);
+ ret = 2;
+ spin_lock(&inode_lock);
+ break;
+ }
if (!atomic_read(&inode->i_count)) {
hlist_del_init(&inode->i_hash);
list_move(&inode->i_list, dispose);
@@ -318,11 +330,11 @@ static int invalidate_list(struct list_h
count++;
continue;
}
- busy = 1;
+ ret = 1;
}
/* only unused inodes may be cached with i_count zero */
inodes_stat.nr_unused -= count;
- return busy;
+ return ret;
}

/*
@@ -343,21 +355,23 @@ static int invalidate_list(struct list_h
*/
int invalidate_inodes(struct super_block * sb)
{
- int busy;
+ int state;
LIST_HEAD(throw_away);

down(&iprune_sem);
spin_lock(&inode_lock);
- busy = invalidate_list(&inode_in_use, sb, &throw_away);
- busy |= invalidate_list(&inode_unused, sb, &throw_away);
- busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
- busy |= invalidate_list(&sb->s_io, sb, &throw_away);
+ do {
+ state = invalidate_list(&inode_in_use, sb, &throw_away);
+ state |= invalidate_list(&inode_unused, sb, &throw_away);
+ state |= invalidate_list(&sb->s_dirty, sb, &throw_away);
+ state |= invalidate_list(&sb->s_io, sb, &throw_away);
+ } while (state & 2);
spin_unlock(&inode_lock);

dispose_list(&throw_away);
up(&iprune_sem);

- return busy;
+ return state;
}

EXPORT_SYMBOL(invalidate_inodes);
_

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/