Re: [6.2][regression] after commit 947a629988f191807d2d22ba63ae18259bb645c5 btrfs volume periodical forced switch to readonly after a lot of disk writes

From: Qu Wenruo
Date: Mon Dec 26 2022 - 19:18:23 EST




On 2022/12/27 07:52, Mikhail Gavrilov wrote:
On Tue, Dec 27, 2022 at 4:36 AM Qu Wenruo <quwenruo.btrfs@xxxxxxx> wrote:


Please be aware that, this newer version may cause extra kernel
warnings, if it found any uninitialized btrfs_tree_parent_check
structure at submit time.

But there are completely valid cases we can do that, thus it may cause
much larger dmesg output, and you should not rely on the kernel warning
to determine if the bug is triggered.


The kernel failed to built with v2 patch.

Here list of compilatore error messages:
fs/btrfs/disk-io.c: In function ‘btrfs_submit_metadata_bio’:
fs/btrfs/disk-io.c:5413:2: error: unterminated argument list invoking
macro "WARN_ON"
5413 | }
| ^
CC net/ipv4/icmp.o
fs/btrfs/disk-io.c:851:17: error: ‘WARN_ON’ undeclared (first use in
this function)
851 | WARN_ON(!memcmp(&check, &bbio->parent_check,
| ^~~~~~~
fs/btrfs/disk-io.c:851:17: note: each undeclared identifier is
reported only once for each function it appears in
fs/btrfs/disk-io.c:851:24: error: expected ‘;’ at end of input
851 | WARN_ON(!memcmp(&check, &bbio->parent_check,
| ^
| ;
......
fs/btrfs/disk-io.c:851:17: error: expected declaration or statement at
end of input
851 | WARN_ON(!memcmp(&check, &bbio->parent_check,
| ^~~~~~~
fs/btrfs/disk-io.c:851:17: error: expected declaration or statement at
end of input
fs/btrfs/disk-io.c:845:22: warning: unused variable ‘ret’ [-Wunused-variable]
845 | blk_status_t ret;
| ^~~
fs/btrfs/disk-io.c:844:40: warning: unused variable ‘check’ [-Wunused-variable]
844 | struct btrfs_tree_parent_check check = {0};
| ^~~~~
fs/btrfs/disk-io.c:842:31: warning: unused variable ‘fs_info’
[-Wunused-variable]
842 | struct btrfs_fs_info *fs_info = inode->root->fs_info;
| ^~~~~~~
HDRTEST usr/include/linux/loadpin.h
fs/btrfs/disk-io.c: At top level:
fs/btrfs/disk-io.c:63:13: warning: ‘btrfs_destroy_ordered_extents’
declared ‘static’ but never defined [-Wunused-function]
63 | static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
fs/btrfs/disk-io.c:64:12: warning: ‘btrfs_destroy_delayed_refs’
declared ‘static’ but never defined [-Wunused-function]
64 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~
fs/btrfs/disk-io.c:66:13: warning: ‘btrfs_destroy_delalloc_inodes’
declared ‘static’ but never defined [-Wunused-function]
66 | static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
fs/btrfs/disk-io.c:67:12: warning: ‘btrfs_destroy_marked_extents’
declared ‘static’ but never defined [-Wunused-function]
67 | static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
fs/btrfs/disk-io.c:70:12: warning: ‘btrfs_destroy_pinned_extent’
declared ‘static’ but never defined [-Wunused-function]
70 | static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~
fs/btrfs/disk-io.c:72:12: warning: ‘btrfs_cleanup_transaction’
declared ‘static’ but never defined [-Wunused-function]
72 | static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info);
| ^~~~~~~~~~~~~~~~~~~~~~~~~
fs/btrfs/disk-io.c:73:13: warning: ‘btrfs_error_commit_super’ declared
‘static’ but never defined [-Wunused-function]
73 | static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info);
| ^~~~~~~~~~~~~~~~~~~~~~~~
fs/btrfs/disk-io.c:828:13: warning: ‘should_async_write’ defined but
not used [-Wunused-function]
828 | static bool should_async_write(struct btrfs_fs_info *fs_info,
| ^~~~~~~~~~~~~~~~~~
fs/btrfs/disk-io.c:161:13: warning: ‘btrfs_supported_super_csum’
defined but not used [-Wunused-function]
161 | static bool btrfs_supported_super_csum(u16 csum_type)
| ^~~~~~~~~~~~~~~~~~~~~~~~~~
fs/btrfs/disk-io.c:130:12: warning: ‘verify_parent_transid’ defined
but not used [-Wunused-function]
130 | static int verify_parent_transid(struct extent_io_tree *io_tree,
| ^~~~~~~~~~~~~~~~~~~~~
fs/btrfs/disk-io.c:75:13: warning: ‘btrfs_free_csum_hash’ defined but
not used [-Wunused-function]
75 | static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
| ^~~~~~~~~~~~~~~~~~~~
CC kernel/notifier.o
make[3]: *** [scripts/Makefile.build:252: fs/btrfs/disk-io.o] Error 1
make[3]: *** Waiting for unfinished jobs....



My bad, missing a ")" in the WARN_ON() line.

Fixed with v3 patch.

Thanks,
QuFrom 6e8209a73e1dd69ea1c0f0b0865e41f76da14976 Mon Sep 17 00:00:00 2001
Message-Id: <6e8209a73e1dd69ea1c0f0b0865e41f76da14976.1672100228.git.wqu@xxxxxxxx>
From: Qu Wenruo <wqu@xxxxxxxx>
Date: Mon, 26 Dec 2022 16:44:08 +0800
Subject: [PATCH v3] btrfs: add extra debug for level mismatch

Currently I assume there is some race or uninitialized value for
check::level.

The extra output are for two locations:

- validate_extent_buffer()
Output the error message for read error and the members of check.

- read_extent_buffer_pages()
This will dump the stack for us to catch the offender.

Signed-off-by: Qu Wenruo <wqu@xxxxxxxx>
---
Changelog:
v2:
- Extra submission time output
This would greately enlarge the dmesg size

- Extra warning when submitting a metadata bio
If we have an uninitialized check structure, do a warning and stack
dump to show the offending call trace.

v3:
- Fix a compiling error
---
fs/btrfs/disk-io.c | 19 +++++++++++++++++--
fs/btrfs/extent_io.c | 13 ++++++++++++-
2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f8b5955f003f..49b077acf359 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -530,6 +530,10 @@ static int validate_extent_buffer(struct extent_buffer *eb,
}

if (found_level != check->level) {
+ btrfs_err(eb->fs_info,
+"level verify failed on logical %llu mirror %u wanted %u found %u",
+ eb->start, eb->read_mirror, check->level,
+ found_level);
ret = -EIO;
goto out;
}
@@ -581,13 +585,20 @@ static int validate_extent_buffer(struct extent_buffer *eb,
if (found_level > 0 && btrfs_check_node(eb))
ret = -EIO;

+out:
if (!ret)
set_extent_buffer_uptodate(eb);
- else
+ else {
btrfs_err(fs_info,
"read time tree block corruption detected on logical %llu mirror %u",
eb->start, eb->read_mirror);
-out:
+ btrfs_err(eb->fs_info,
+"check owner_root=%llu transid=%llu first_key=(%llu %u %llu) has_first_key=%d level=%u",
+ check->owner_root,
+ check->transid, check->first_key.objectid,
+ check->first_key.type, check->first_key.offset,
+ check->has_first_key, check->level);
+ }
return ret;
}

@@ -652,6 +663,7 @@ int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
int reads_done;

ASSERT(page->private);
+ WARN_ON(!bbio->is_metadata);

if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE)
return validate_subpage_buffer(page, start, end, mirror,
@@ -833,12 +845,15 @@ void btrfs_submit_metadata_bio(struct btrfs_inode *inode, struct bio *bio, int m
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_bio *bbio = btrfs_bio(bio);
+ struct btrfs_tree_parent_check check = {0};
blk_status_t ret;

bio->bi_opf |= REQ_META;
bbio->is_metadata = 1;

if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
+ WARN_ON(!memcmp(&check, &bbio->parent_check,
+ sizeof(struct btrfs_tree_parent_check)));
btrfs_submit_bio(fs_info, bio, mirror_num);
return;
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 83dd3aa59663..c94eb036dde4 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5005,8 +5005,19 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
for (i = 0; i < num_pages; i++) {
page = eb->pages[i];
wait_on_page_locked(page);
- if (!PageUptodate(page))
+ if (!PageUptodate(page)) {
ret = -EIO;
+ btrfs_err(eb->fs_info,
+"read failed, bytenr=%llu check owner_root=%llu transid=%llu has_first_key=%d first_key=(%llu %u %llu) level=%u",
+ eb->start,
+ check->owner_root, check->transid,
+ check->has_first_key,
+ check->first_key.objectid,
+ check->first_key.type,
+ check->first_key.offset,
+ check->level);
+ dump_stack();
+ }
}

return ret;
--
2.39.0