Re: mm: fix page_mkclean_one (was: 2.6.19 file content corruptionon ext3)

From: Martin Johansson
Date: Thu Dec 21 2006 - 09:07:23 EST


> On Wed, 20 Dec 2006, Linus Torvalds wrote:
> Martin, Andrei, does this make any difference for your corruption cases?

Hi!
I've been watching this issue since I'm experiencing rtorrent corruption since 2.6.19.

Details: i386, UP, no preempt:
kungen:/proc# zgrep PREEMPT config.gz
CONFIG_PREEMPT_NONE=y
# CONFIG_PREEMPT_VOLUNTARY is not set
# CONFIG_PREEMPT is not set
kungen:/proc# uname -a
Linux kungen.fatbob.nu 2.6.19.1 #3 Thu Dec 21 13:18:06 CET 2006 i686 GNU/Linux

Corruption is still present with the patch below (patched against 2.6.19.1 and removed task_io_account_cancelled_write call)

/Martin

[Not subscribed to the list]

> ---
> diff --git a/fs/buffer.c b/fs/buffer.c
> index d1f1b54..263f88e 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -2834,7 +2834,7 @@ int try_to_free_buffers(struct page *page)
> int ret = 0;
> > BUG_ON(!PageLocked(page));
> - if (PageWriteback(page))
> + if (PageDirty(page) || PageWriteback(page))
> return 0;
> > if (mapping == NULL) { /* can this still happen? */
> @@ -2845,22 +2845,6 @@ int try_to_free_buffers(struct page *page)
> spin_lock(&mapping->private_lock);
> ret = drop_buffers(page, &buffers_to_free);
> spin_unlock(&mapping->private_lock);
> - if (ret) {
> - /*
> - * If the filesystem writes its buffers by hand (eg ext3)
> - * then we can have clean buffers against a dirty page. We
> - * clean the page here; otherwise later reattachment of buffers
> - * could encounter a non-uptodate page, which is unresolvable.
> - * This only applies in the rare case where try_to_free_buffers
> - * succeeds but the page is not freed.
> - *
> - * Also, during truncate, discard_buffer will have marked all
> - * the page's buffers clean. We discover that here and clean
> - * the page also.
> - */
> - if (test_clear_page_dirty(page))
> - task_io_account_cancelled_write(PAGE_CACHE_SIZE);
> - }
> out:
> if (buffers_to_free) {
> struct buffer_head *bh = buffers_to_free;
> diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
> index ed2c223..4f4cd13 100644
> --- a/fs/hugetlbfs/inode.c
> +++ b/fs/hugetlbfs/inode.c
> @@ -176,7 +176,7 @@ static int hugetlbfs_commit_write(struct file *file,
> > static void truncate_huge_page(struct page *page)
> {
> - clear_page_dirty(page);
> + cancel_dirty_page(page, /* No IO accounting for huge pages? */0);
> ClearPageUptodate(page);
> remove_from_page_cache(page);
> put_page(page);
> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> index 4830a3b..350878a 100644
> --- a/include/linux/page-flags.h
> +++ b/include/linux/page-flags.h
> @@ -253,15 +253,11 @@ static inline void SetPageUptodate(struct page *page)
> > struct page; /* forward declaration */
> > -int test_clear_page_dirty(struct page *page);
> +extern void cancel_dirty_page(struct page *page, unsigned int account_size);
> +
> int test_clear_page_writeback(struct page *page);
> int test_set_page_writeback(struct page *page);
> > -static inline void clear_page_dirty(struct page *page)
> -{
> - test_clear_page_dirty(page);
> -}
> -
> static inline void set_page_writeback(struct page *page)
> {
> test_set_page_writeback(page);
> diff --git a/mm/memory.c b/mm/memory.c
> index c00bac6..79cecab 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -1842,6 +1842,33 @@ void unmap_mapping_range(struct address_space *mapping,
> }
> EXPORT_SYMBOL(unmap_mapping_range);
> > +static void check_last_page(struct address_space *mapping, loff_t size)
> +{
> + pgoff_t index;
> + unsigned int offset;
> + struct page *page;
> +
> + if (!mapping)
> + return;
> + offset = size & ~PAGE_MASK;
> + if (!offset)
> + return;
> + index = size >> PAGE_SHIFT;
> + page = find_lock_page(mapping, index);
> + if (page) {
> + unsigned int check = 0;
> + unsigned char *kaddr = kmap_atomic(page, KM_USER0);
> + do {
> + check += kaddr[offset++];
> + } while (offset < PAGE_SIZE);
> + kunmap_atomic(kaddr,KM_USER0);
> + unlock_page(page);
> + page_cache_release(page);
> + if (check)
> + printk("%s: BADNESS: truncate check %u\n", current->comm, check);
> + }
> +}
> +
> /**
> * vmtruncate - unmap mappings "freed" by truncate() syscall
> * @inode: inode of the file used
> @@ -1875,6 +1902,7 @@ do_expand:
> goto out_sig;
> if (offset > inode->i_sb->s_maxbytes)
> goto out_big;
> + check_last_page(mapping, inode->i_size);
> i_size_write(inode, offset);
> > out_truncate:
> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> index 237107c..b3a198c 100644
> --- a/mm/page-writeback.c
> +++ b/mm/page-writeback.c
> @@ -845,38 +845,6 @@ int set_page_dirty_lock(struct page *page)
> EXPORT_SYMBOL(set_page_dirty_lock);
> > /*
> - * Clear a page's dirty flag, while caring for dirty memory accounting.
> - * Returns true if the page was previously dirty.
> - */
> -int test_clear_page_dirty(struct page *page)
> -{
> - struct address_space *mapping = page_mapping(page);
> - unsigned long flags;
> -
> - if (!mapping)
> - return TestClearPageDirty(page);
> -
> - write_lock_irqsave(&mapping->tree_lock, flags);
> - if (TestClearPageDirty(page)) {
> - radix_tree_tag_clear(&mapping->page_tree,
> - page_index(page), PAGECACHE_TAG_DIRTY);
> - write_unlock_irqrestore(&mapping->tree_lock, flags);
> - /*
> - * We can continue to use `mapping' here because the
> - * page is locked, which pins the address_space
> - */
> - if (mapping_cap_account_dirty(mapping)) {
> - page_mkclean(page);
> - dec_zone_page_state(page, NR_FILE_DIRTY);
> - }
> - return 1;
> - }
> - write_unlock_irqrestore(&mapping->tree_lock, flags);
> - return 0;
> -}
> -EXPORT_SYMBOL(test_clear_page_dirty);
> -
> -/*
> * Clear a page's dirty flag, while caring for dirty memory accounting.
> * Returns true if the page was previously dirty.
> *
> diff --git a/mm/truncate.c b/mm/truncate.c
> index 9bfb8e8..bf9e296 100644
> --- a/mm/truncate.c
> +++ b/mm/truncate.c
> @@ -51,6 +51,20 @@ static inline void truncate_partial_page(struct page *page, unsigned partial)
> do_invalidatepage(page, partial);
> }
> > +void cancel_dirty_page(struct page *page, unsigned int account_size)
> +{
> + /* If we're cancelling the page, it had better not be mapped any more */
> + if (page_mapped(page)) {
> + static unsigned int warncount;
> +
> + WARN_ON(++warncount < 5);
> + }
> + > + if (TestClearPageDirty(page) && account_size)
> + task_io_account_cancelled_write(account_size);
> +}
> +
> +
> /*
> * If truncate cannot remove the fs-private metadata from the page, the page
> * becomes anonymous. It will be left on the LRU and may even be mapped into
> @@ -70,8 +84,8 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
> if (PagePrivate(page))
> do_invalidatepage(page, 0);
> > - if (test_clear_page_dirty(page))
> - task_io_account_cancelled_write(PAGE_CACHE_SIZE);
> + cancel_dirty_page(page, PAGE_CACHE_SIZE);
> +
> ClearPageUptodate(page);
> ClearPageMappedToDisk(page);
> remove_from_page_cache(page);
> @@ -350,7 +364,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
> for (i = 0; !ret && i < pagevec_count(&pvec); i++) {
> struct page *page = pvec.pages[i];
> pgoff_t page_index;
> - int was_dirty;
> > lock_page(page);
> if (page->mapping != mapping) {
> @@ -386,12 +399,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
> PAGE_CACHE_SIZE, 0);
> }
> }
> - was_dirty = test_clear_page_dirty(page);
> - if (!invalidate_complete_page2(mapping, page)) {
> - if (was_dirty)
> - set_page_dirty(page);
> + if (!invalidate_complete_page2(mapping, page))
> ret = -EIO;
> - }
> unlock_page(page);
> }
> pagevec_release(&pvec);
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/