Re: Internal vs. external barriers (was: Re: Interesting LKMM litmus test)

From: Paul E. McKenney
Date: Thu Jan 19 2023 - 17:14:17 EST


On Thu, Jan 19, 2023 at 02:51:53PM -0500, Alan Stern wrote:
> On Thu, Jan 19, 2023 at 10:41:07AM -0800, Paul E. McKenney wrote:
> > In contrast, this actually needs srcu_down_read() and srcu_up_read():
> >
> > ------------------------------------------------------------------------
> >
> > C C-srcu-nest-6
> >
> > (*
> > * Result: Never
> > *
> > * Flag unbalanced-srcu-locking
> > * This would be valid for srcu_down_read() and srcu_up_read().
> > *)
> >
> > {}
> >
> > P0(int *x, int *y, struct srcu_struct *s1, int *idx)
> > {
> > int r2;
> > int r3;
> >
> > r3 = srcu_down_read(s1);
> > WRITE_ONCE(*idx, r3);
> > r2 = READ_ONCE(*y);
> > }
> >
> > P1(int *x, int *y, struct srcu_struct *s1, int *idx)
> > {
> > int r1;
> > int r3;
> >
> > r1 = READ_ONCE(*x);
> > r3 = READ_ONCE(*idx);
> > srcu_up_read(s1, r3);
> > }
> >
> > P2(int *x, int *y, struct srcu_struct *s1)
> > {
> > WRITE_ONCE(*y, 1);
> > synchronize_srcu(s1);
> > WRITE_ONCE(*x, 1);
> > }
> >
> > locations [0:r1]
> > exists (1:r1=1 /\ 0:r2=0)
>
> I modified this litmus test by adding a flag variable with an
> smp_store_release in P0, an smp_load_acquire in P1, and a filter clause
> to ensure that P1 reads the flag and idx from P1.
>
> With the patch below, the results were as expected:
>
> Test C-srcu-nest-6 Allowed
> States 3
> 0:r1=0; 0:r2=0; 1:r1=0;
> 0:r1=0; 0:r2=1; 1:r1=0;
> 0:r1=0; 0:r2=1; 1:r1=1;
> No
> Witnesses
> Positive: 0 Negative: 3
> Condition exists (1:r1=1 /\ 0:r2=0)
> Observation C-srcu-nest-6 Never 0 3
> Time C-srcu-nest-6 0.04
> Hash=2b010cf3446879fb84752a6016ff88c5

Fair point, and for example we already recommend emulating call_rcu()
using similar release-acquire tricks.

> It turns out that the idea of removing rf edges from Srcu-unlock events
> doesn't work well. The missing edges mess up herd's calculation of the
> fr relation and the coherence axiom. So I've gone back to filtering
> those edges out of carry-dep.
>
> Also, Boqun's suggestion for flagging ordinary accesses to SRCU
> structures no longer works, because the lock and unlock operations now
> _are_ normal accesses. I removed that check too, but it shouldn't hurt
> much because I don't expect to encounter litmus tests that try to read
> or write srcu_structs directly.

Agreed. I for one would definitely have something to say about an
SRCU-usage patch that directly manipulated a srcu_struct structure! ;-)

> Alan
>
>
>
> Index: usb-devel/tools/memory-model/linux-kernel.bell
> ===================================================================
> --- usb-devel.orig/tools/memory-model/linux-kernel.bell
> +++ usb-devel/tools/memory-model/linux-kernel.bell
> @@ -53,38 +53,30 @@ let rcu-rscs = let rec
> in matched
>
> (* Validate nesting *)
> -flag ~empty Rcu-lock \ domain(rcu-rscs) as unbalanced-rcu-locking
> -flag ~empty Rcu-unlock \ range(rcu-rscs) as unbalanced-rcu-locking
> +flag ~empty Rcu-lock \ domain(rcu-rscs) as unbalanced-rcu-lock
> +flag ~empty Rcu-unlock \ range(rcu-rscs) as unbalanced-rcu-unlock

This renaming makes sense to me.

> (* Compute matching pairs of nested Srcu-lock and Srcu-unlock *)
> -let srcu-rscs = let rec
> - unmatched-locks = Srcu-lock \ domain(matched)
> - and unmatched-unlocks = Srcu-unlock \ range(matched)
> - and unmatched = unmatched-locks | unmatched-unlocks
> - and unmatched-po = ([unmatched] ; po ; [unmatched]) & loc
> - and unmatched-locks-to-unlocks =
> - ([unmatched-locks] ; po ; [unmatched-unlocks]) & loc
> - and matched = matched | (unmatched-locks-to-unlocks \
> - (unmatched-po ; unmatched-po))
> - in matched
> +let srcu-rscs = ([Srcu-lock] ; (data | rf)+ ; [Srcu-unlock]) & loc

The point of the "+" instead of the "*" is to avoid LKMM being confused by
an srcu_read_lock() immediately preceding an unrelated srcu_read_unlock(),
right? Or am I missing something more subtle?

> (* Validate nesting *)
> -flag ~empty Srcu-lock \ domain(srcu-rscs) as unbalanced-srcu-locking
> -flag ~empty Srcu-unlock \ range(srcu-rscs) as unbalanced-srcu-locking
> +flag ~empty Srcu-lock \ domain(srcu-rscs) as unbalanced-srcu-lock
> +flag ~empty Srcu-unlock \ range(srcu-rscs) as unbalanced-srcu-unlock
> +flag ~empty (srcu-rscs^-1 ; srcu-rscs) \ id as multiple-srcu-matches
>
> (* Check for use of synchronize_srcu() inside an RCU critical section *)
> flag ~empty rcu-rscs & (po ; [Sync-srcu] ; po) as invalid-sleep
>
> (* Validate SRCU dynamic match *)
> -flag ~empty different-values(srcu-rscs) as srcu-bad-nesting
> +flag ~empty different-values(srcu-rscs) as bad-srcu-value-match
>
> (* Compute marked and plain memory accesses *)
> let Marked = (~M) | IW | Once | Release | Acquire | domain(rmw) | range(rmw) |
> - LKR | LKW | UL | LF | RL | RU
> + LKR | LKW | UL | LF | RL | RU | Srcu-lock | Srcu-unlock
> let Plain = M \ Marked
>
> (* Redefine dependencies to include those carried through plain accesses *)
> -let carry-dep = (data ; rfi)*
> +let carry-dep = (data ; [~ Srcu-unlock] ; rfi)*

The "[~ Srcu-unlock]" matches the store that bridges the data and rfi",
correct?

> let addr = carry-dep ; addr
> let ctrl = carry-dep ; ctrl
> let data = carry-dep ; data
> Index: usb-devel/tools/memory-model/linux-kernel.def
> ===================================================================
> --- usb-devel.orig/tools/memory-model/linux-kernel.def
> +++ usb-devel/tools/memory-model/linux-kernel.def
> @@ -49,8 +49,10 @@ synchronize_rcu() { __fence{sync-rcu}; }
> synchronize_rcu_expedited() { __fence{sync-rcu}; }
>
> // SRCU
> -srcu_read_lock(X) __srcu{srcu-lock}(X)
> -srcu_read_unlock(X,Y) { __srcu{srcu-unlock}(X,Y); }
> +srcu_read_lock(X) __load{srcu-lock}(*X)
> +srcu_read_unlock(X,Y) { __store{srcu-unlock}(*X,Y); }
> +srcu_down_read(X) __load{srcu-lock}(*X)
> +srcu_up_read(X,Y) { __store{srcu-unlock}(*X,Y); }

And here srcu_down_read() and srcu_up_read() are synonyms for
srcu_read_lock() and srcu_read_unlock(), respectively, which I believe
should suffice.

> synchronize_srcu(X) { __srcu{sync-srcu}(X); }
> synchronize_srcu_expedited(X) { __srcu{sync-srcu}(X); }

So this looks quite reasonable to me.

Thanx, Paul