Re: [PATCH] wait_on_bit: add an acquire memory barrier

From: Linus Torvalds
Date: Mon Aug 22 2022 - 13:39:43 EST


On Mon, Aug 22, 2022 at 10:08 AM Linus Torvalds
<torvalds@xxxxxxxxxxxxxxxxxxxx> wrote:
>
> So why don't we just create a "test_bit_acquire()" and be done with
> it? We literally created clear_bit_unlock() for the opposite reason,
> and your comments about the new barrier hack even point to it.

Here's a patch that is

(a) almost entirely untested (I checked that one single case builds
and seems to generate the expected code)

(b) needs some more loving

but seems to superficially work.

At a minimum this needs to be split into two (so the bitop and the
wait_on_bit parts split up), and that whole placement of
<asm/barrier.h> and generic_bit_test_acquire() need at least some
thinking about, but on the whole it seems reasonable.

For example, it would make more sense to have this in
<asm-generic/bitops/lock.h>, but not all architectures include that,
and some do their own version. I didn't want to mess with
architecture-specific headers, so this illogically just uses
generic-non-atomic.h.

Maybe just put it in <linux/bitops.h> directly?

So I'm not at all claiming that this is a great patch. It definitely
needs more work, and a lot more testing.

But I think this is at least the right _direction_ to take here.

And yes, I think it also would have been better if
"clear_bit_unlock()" would have been called "clear_bit_release()", and
we'd have more consistent naming with our ordered atomics. But it's
probably not worth changing.

Linus
include/asm-generic/bitops/generic-non-atomic.h | 9 +++++++++
include/asm-generic/bitops/non-atomic.h | 1 +
include/linux/bitops.h | 2 +-
include/linux/wait_bit.h | 8 ++++----
kernel/sched/wait_bit.c | 2 +-
5 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/include/asm-generic/bitops/generic-non-atomic.h b/include/asm-generic/bitops/generic-non-atomic.h
index 3d5ebd24652b..f56a252db9e8 100644
--- a/include/asm-generic/bitops/generic-non-atomic.h
+++ b/include/asm-generic/bitops/generic-non-atomic.h
@@ -4,6 +4,7 @@
#define __ASM_GENERIC_BITOPS_GENERIC_NON_ATOMIC_H

#include <linux/bits.h>
+#include <asm/barrier.h>

#ifndef _LINUX_BITOPS_H
#error only <linux/bitops.h> can be included directly
@@ -158,4 +159,12 @@ const_test_bit(unsigned long nr, const volatile unsigned long *addr)
return !!(val & mask);
}

+static __always_inline bool
+generic_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+ return smp_load_acquire(p) & mask;
+}
+
#endif /* __ASM_GENERIC_BITOPS_GENERIC_NON_ATOMIC_H */
diff --git a/include/asm-generic/bitops/non-atomic.h b/include/asm-generic/bitops/non-atomic.h
index 5c37ced343ae..71f8d54a5195 100644
--- a/include/asm-generic/bitops/non-atomic.h
+++ b/include/asm-generic/bitops/non-atomic.h
@@ -13,6 +13,7 @@
#define arch___test_and_change_bit generic___test_and_change_bit

#define arch_test_bit generic_test_bit
+#define arch_test_bit_acquire generic_test_bit_acquire

#include <asm-generic/bitops/non-instrumented-non-atomic.h>

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index cf9bf65039f2..22adf74d5c25 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -59,7 +59,7 @@ extern unsigned long __sw_hweight64(__u64 w);
#define __test_and_clear_bit(nr, addr) bitop(___test_and_clear_bit, nr, addr)
#define __test_and_change_bit(nr, addr) bitop(___test_and_change_bit, nr, addr)
#define test_bit(nr, addr) bitop(_test_bit, nr, addr)
-
+#define test_bit_acquire(nr, addr) generic_test_bit_acquire(nr, addr)
/*
* Include this here because some architectures need generic_ffs/fls in
* scope
diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
index 7dec36aecbd9..7725b7579b78 100644
--- a/include/linux/wait_bit.h
+++ b/include/linux/wait_bit.h
@@ -71,7 +71,7 @@ static inline int
wait_on_bit(unsigned long *word, int bit, unsigned mode)
{
might_sleep();
- if (!test_bit(bit, word))
+ if (!test_bit_acquire(bit, word))
return 0;
return out_of_line_wait_on_bit(word, bit,
bit_wait,
@@ -96,7 +96,7 @@ static inline int
wait_on_bit_io(unsigned long *word, int bit, unsigned mode)
{
might_sleep();
- if (!test_bit(bit, word))
+ if (!test_bit_acquire(bit, word))
return 0;
return out_of_line_wait_on_bit(word, bit,
bit_wait_io,
@@ -123,7 +123,7 @@ wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode,
unsigned long timeout)
{
might_sleep();
- if (!test_bit(bit, word))
+ if (!test_bit_acquire(bit, word))
return 0;
return out_of_line_wait_on_bit_timeout(word, bit,
bit_wait_timeout,
@@ -151,7 +151,7 @@ wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action,
unsigned mode)
{
might_sleep();
- if (!test_bit(bit, word))
+ if (!test_bit_acquire(bit, word))
return 0;
return out_of_line_wait_on_bit(word, bit, action, mode);
}
diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c
index d4788f810b55..0b1cd985dc27 100644
--- a/kernel/sched/wait_bit.c
+++ b/kernel/sched/wait_bit.c
@@ -47,7 +47,7 @@ __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_
prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode);
if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags))
ret = (*action)(&wbq_entry->key, mode);
- } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret);
+ } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret);

finish_wait(wq_head, &wbq_entry->wq_entry);