Re: [xfs] 32678f1513: aim7.jobs-per-min -5.6% regression

From: Dave Chinner
Date: Fri May 06 2022 - 17:29:35 EST


On Fri, May 06, 2022 at 05:22:50PM +0800, kernel test robot wrote:
>
>
> Greeting,
>
> FYI, we noticed a -5.6% regression of aim7.jobs-per-min due to commit:
>
>
> commit: 32678f151338b9a321e9e27139a63c81f353acb7 ("[PATCH 1/4] xfs: detect self referencing btree sibling pointers")
> url: https://github.com/intel-lab-lkp/linux/commits/Dave-Chinner/xfs-fix-random-format-verification-issues/20220502-162206
> base: https://git.kernel.org/cgit/fs/xfs/xfs-linux.git for-next
> patch link: https://lore.kernel.org/linux-xfs/20220502082018.1076561-2-david@xxxxxxxxxxxxx

Well, that answers the concern I had about the impact of
changing the way endian conversions were done in that patch.

> a44a027a8b2a20fe 32678f151338b9a321e9e27139a
> ---------------- ---------------------------
> %stddev %change %stddev
> \ | \
> 464232 -5.6% 438315 aim7.jobs-per-min
....
> 0.13 ± 5% +0.2 0.33 ± 6% perf-profile.children.cycles-pp.__xfs_btree_check_sblock
....
> 0.11 ± 4% +0.2 0.30 ± 5% perf-profile.self.cycles-pp.__xfs_btree_check_sblock

Because there is it, right at the bottom of the profile.

Can you try the patch below and see if that fixes the issue?

Cheers,

Dave.
--
Dave Chinner
david@xxxxxxxxxxxxx


xfs: avoid unnecessary runtime sibling pointer endian conversions

From: Dave Chinner <dchinner@xxxxxxxxxx>

Commit dc04db2aa7c9 has caused a small aim7 regression, showing a
small increase in CPU usage in __xfs_btree_check_sblock() as a
result of the extra checking.

This is likely due to the endian conversion of the sibling poitners
being unconditional instead of relying on the compiler to endian
convert the NULL pointer at compile time and avoiding the runtime
conversion for this common case.

Rework the checks so that endian conversion of the sibling pointers
is only done if they are not null as the original code did.

Fixes: dc04db2aa7c9 ("xfs: detect self referencing btree sibling pointers")
Reported-by: kernel test robot <oliver.sang@xxxxxxxxx>
Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
---
fs/xfs/libxfs/xfs_btree.c | 37 +++++++++++++++++++++++++------------
1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 2aa300f7461f..4d673e943317 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -51,16 +51,25 @@ xfs_btree_magic(
return magic;
}

+/*
+ * These sibling pointer checks are optimised for null sibling pointers. This
+ * happens a lot, and we don't need to byte swap at runtime if the sibling
+ * pointer is NULL.
+ */
static xfs_failaddr_t
xfs_btree_check_lblock_siblings(
struct xfs_mount *mp,
struct xfs_btree_cur *cur,
int level,
xfs_fsblock_t fsb,
- xfs_fsblock_t sibling)
+ __be64 dsibling)
{
- if (sibling == NULLFSBLOCK)
+ xfs_fsblock_t sibling;
+
+ if (dsibling == cpu_to_be64(NULLFSBLOCK))
return NULL;
+
+ sibling = be64_to_cpu(dsibling);
if (sibling == fsb)
return __this_address;
if (level >= 0) {
@@ -81,10 +90,14 @@ xfs_btree_check_sblock_siblings(
int level,
xfs_agnumber_t agno,
xfs_agblock_t agbno,
- xfs_agblock_t sibling)
+ __be32 dsibling)
{
- if (sibling == NULLAGBLOCK)
+ xfs_agblock_t sibling;
+
+ if (dsibling == cpu_to_be32(NULLAGBLOCK))
return NULL;
+
+ sibling = be32_to_cpu(dsibling);
if (sibling == agbno)
return __this_address;
if (level >= 0) {
@@ -136,10 +149,10 @@ __xfs_btree_check_lblock(
fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));

fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
- be64_to_cpu(block->bb_u.l.bb_leftsib));
+ block->bb_u.l.bb_leftsib);
if (!fa)
fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
- be64_to_cpu(block->bb_u.l.bb_rightsib));
+ block->bb_u.l.bb_rightsib);
return fa;
}

@@ -204,10 +217,10 @@ __xfs_btree_check_sblock(
}

fa = xfs_btree_check_sblock_siblings(mp, cur, level, agno, agbno,
- be32_to_cpu(block->bb_u.s.bb_leftsib));
+ block->bb_u.s.bb_leftsib);
if (!fa)
fa = xfs_btree_check_sblock_siblings(mp, cur, level, agno,
- agbno, be32_to_cpu(block->bb_u.s.bb_rightsib));
+ agbno, block->bb_u.s.bb_rightsib);
return fa;
}

@@ -4523,10 +4536,10 @@ xfs_btree_lblock_verify(
/* sibling pointer verification */
fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
- be64_to_cpu(block->bb_u.l.bb_leftsib));
+ block->bb_u.l.bb_leftsib);
if (!fa)
fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
- be64_to_cpu(block->bb_u.l.bb_rightsib));
+ block->bb_u.l.bb_rightsib);
return fa;
}

@@ -4580,10 +4593,10 @@ xfs_btree_sblock_verify(
agno = xfs_daddr_to_agno(mp, xfs_buf_daddr(bp));
agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
fa = xfs_btree_check_sblock_siblings(mp, NULL, -1, agno, agbno,
- be32_to_cpu(block->bb_u.s.bb_leftsib));
+ block->bb_u.s.bb_leftsib);
if (!fa)
fa = xfs_btree_check_sblock_siblings(mp, NULL, -1, agno, agbno,
- be32_to_cpu(block->bb_u.s.bb_rightsib));
+ block->bb_u.s.bb_rightsib);
return fa;
}