Re: [PATCH v2 00/10] Rework READ_ONCE() to improve codegen

From: Peter Zijlstra
Date: Fri Jan 24 2020 - 05:41:54 EST


On Fri, Jan 24, 2020 at 09:33:07AM +0100, Peter Zijlstra wrote:
> On Thu, Jan 23, 2020 at 09:59:03AM -0800, Linus Torvalds wrote:
> > On Thu, Jan 23, 2020 at 7:33 AM Will Deacon <will@xxxxxxxxxx> wrote:
> > >
> > > This is version two of the patches I previously posted as an RFC here:
> >
> > Looks fine to me, as far as I can tell,
>
> Awesome, I've picked them up with a target for tip/locking/core.

FWIW, I have the following merge resolution against locking/kcsan.

---

diff --cc include/linux/compiler.h
index 8c0beb10c1dd,994c35638584..000000000000
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@@ -177,28 -177,69 +177,74 @@@ void ftrace_likely_update(struct ftrace
# define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)
#endif

- #include <uapi/linux/types.h>
- #include <linux/kcsan-checks.h>
+ /*
+ * Prevent the compiler from merging or refetching reads or writes. The
+ * compiler is also forbidden from reordering successive instances of
+ * READ_ONCE and WRITE_ONCE, but only when the compiler is aware of some
+ * particular ordering. One way to make the compiler aware of ordering is to
+ * put the two invocations of READ_ONCE or WRITE_ONCE in different C
+ * statements.
+ *
+ * These two macros will also work on aggregate data types like structs or
+ * unions.
+ *
+ * Their two major use cases are: (1) Mediating communication between
+ * process-level code and irq/NMI handlers, all running on the same CPU,
+ * and (2) Ensuring that the compiler does not fold, spindle, or otherwise
+ * mutilate accesses that either do not require ordering or that interact
+ * with an explicit memory barrier or atomic instruction that provides the
+ * required ordering.
+ */
+ #include <asm/barrier.h>
+ #include <linux/kasan-checks.h>
+
+ /*
+ * Use __READ_ONCE() instead of READ_ONCE() if you do not require any
+ * atomicity or dependency ordering guarantees. Note that this may result
+ * in tears!
+ */
+ #define __READ_ONCE(x) (*(const volatile __unqual_scalar_typeof(x) *)&(x))
+
+ #define __READ_ONCE_SCALAR(x) \
+ ({ \
+ __unqual_scalar_typeof(x) __x = __READ_ONCE(x); \
+ smp_read_barrier_depends(); \
+ (typeof(x))__x; \
+ })

- #define __READ_ONCE_SIZE \
+ /*
+ * Use READ_ONCE_NOCHECK() instead of READ_ONCE() if you need
+ * to hide memory access from KASAN.
+ */
+ #define READ_ONCE_NOCHECK(x) \
({ \
- switch (size) { \
- case 1: *(__u8 *)res = *(volatile __u8 *)p; break; \
- case 2: *(__u16 *)res = *(volatile __u16 *)p; break; \
- case 4: *(__u32 *)res = *(volatile __u32 *)p; break; \
- case 8: *(__u64 *)res = *(volatile __u64 *)p; break; \
- default: \
- barrier(); \
- __builtin_memcpy((void *)res, (const void *)p, size); \
- barrier(); \
- } \
+ compiletime_assert_rwonce_type(x); \
+ __READ_ONCE_SCALAR(x); \
+ })
+
-#define READ_ONCE(x) READ_ONCE_NOCHECK(x)
++#define READ_ONCE(x) \
++({ \
++ kcsan_check_atomic_read(&(x), sizeof(x)); \
++ READ_ONCE_NOCHECK(x); \
+})

+ #define __WRITE_ONCE(x, val) \
+ do { \
+ *(volatile typeof(x) *)&(x) = (val); \
+ } while (0)
+
+ #define WRITE_ONCE(x, val) \
+ do { \
+ compiletime_assert_rwonce_type(x); \
++ kcsan_check_atomic_write(&(x), sizeof(x)); \
+ __WRITE_ONCE(x, val); \
+ } while (0)
+
#ifdef CONFIG_KASAN
/*
- * We can't declare function 'inline' because __no_sanitize_address conflicts
+ * We can't declare function 'inline' because __no_sanitize_address confilcts
* with inlining. Attempt to inline it may cause a build failure.
- * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
- * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
++ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
* '__maybe_unused' allows us to avoid defined-but-not-used warnings.
*/
# define __no_kasan_or_inline __no_sanitize_address notrace __maybe_unused
@@@ -207,97 -247,6 +253,24 @@@
# define __no_kasan_or_inline __always_inline
#endif

+#define __no_kcsan __no_sanitize_thread
+#ifdef __SANITIZE_THREAD__
+/*
+ * Rely on __SANITIZE_THREAD__ instead of CONFIG_KCSAN, to avoid not inlining in
+ * compilation units where instrumentation is disabled. The attribute 'noinline'
+ * is required for older compilers, where implicit inlining of very small
+ * functions renders __no_sanitize_thread ineffective.
+ */
+# define __no_kcsan_or_inline __no_kcsan noinline notrace __maybe_unused
+# define __no_sanitize_or_inline __no_kcsan_or_inline
+#else
+# define __no_kcsan_or_inline __always_inline
+#endif
+
+#ifndef __no_sanitize_or_inline
+#define __no_sanitize_or_inline __always_inline
+#endif
+
- static __no_kcsan_or_inline
- void __read_once_size(const volatile void *p, void *res, int size)
- {
- kcsan_check_atomic_read(p, size);
- __READ_ONCE_SIZE;
- }
-
- static __no_sanitize_or_inline
- void __read_once_size_nocheck(const volatile void *p, void *res, int size)
- {
- __READ_ONCE_SIZE;
- }
-
- static __no_kcsan_or_inline
- void __write_once_size(volatile void *p, void *res, int size)
- {
- kcsan_check_atomic_write(p, size);
-
- switch (size) {
- case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
- case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
- case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
- case 8: *(volatile __u64 *)p = *(__u64 *)res; break;
- default:
- barrier();
- __builtin_memcpy((void *)p, (const void *)res, size);
- barrier();
- }
- }
-
- /*
- * Prevent the compiler from merging or refetching reads or writes. The
- * compiler is also forbidden from reordering successive instances of
- * READ_ONCE and WRITE_ONCE, but only when the compiler is aware of some
- * particular ordering. One way to make the compiler aware of ordering is to
- * put the two invocations of READ_ONCE or WRITE_ONCE in different C
- * statements.
- *
- * These two macros will also work on aggregate data types like structs or
- * unions. If the size of the accessed data type exceeds the word size of
- * the machine (e.g., 32 bits or 64 bits) READ_ONCE() and WRITE_ONCE() will
- * fall back to memcpy(). There's at least two memcpy()s: one for the
- * __builtin_memcpy() and then one for the macro doing the copy of variable
- * - '__u' allocated on the stack.
- *
- * Their two major use cases are: (1) Mediating communication between
- * process-level code and irq/NMI handlers, all running on the same CPU,
- * and (2) Ensuring that the compiler does not fold, spindle, or otherwise
- * mutilate accesses that either do not require ordering or that interact
- * with an explicit memory barrier or atomic instruction that provides the
- * required ordering.
- */
- #include <asm/barrier.h>
- #include <linux/kasan-checks.h>
-
- #define __READ_ONCE(x, check) \
- ({ \
- union { typeof(x) __val; char __c[1]; } __u; \
- if (check) \
- __read_once_size(&(x), __u.__c, sizeof(x)); \
- else \
- __read_once_size_nocheck(&(x), __u.__c, sizeof(x)); \
- smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \
- __u.__val; \
- })
- #define READ_ONCE(x) __READ_ONCE(x, 1)
-
- /*
- * Use READ_ONCE_NOCHECK() instead of READ_ONCE() if you need
- * to hide memory access from KASAN.
- */
- #define READ_ONCE_NOCHECK(x) __READ_ONCE(x, 0)
-
static __no_kasan_or_inline
unsigned long read_word_at_a_time(const void *addr)
{
@@@ -305,34 -254,6 +278,26 @@@
return *(unsigned long *)addr;
}

- #define WRITE_ONCE(x, val) \
- ({ \
- union { typeof(x) __val; char __c[1]; } __u = \
- { .__val = (__force typeof(x)) (val) }; \
- __write_once_size(&(x), __u.__c, sizeof(x)); \
- __u.__val; \
- })
-
+#include <linux/kcsan.h>
+
+/*
+ * data_race(): macro to document that accesses in an expression may conflict with
+ * other concurrent accesses resulting in data races, but the resulting
+ * behaviour is deemed safe regardless.
+ *
+ * This macro *does not* affect normal code generation, but is a hint to tooling
+ * that data races here should be ignored.
+ */
+#define data_race(expr) \
+ ({ \
+ typeof(({ expr; })) __val; \
+ kcsan_nestable_atomic_begin(); \
+ __val = ({ expr; }); \
+ kcsan_nestable_atomic_end(); \
+ __val; \
+ })
+#else
+
#endif /* __KERNEL__ */

/*