Re: [RFC PATCH 3/3] Fix: tracepoint: rcu get state and cond sync for static call updates

From: Paul E. McKenney
Date: Tue Jul 27 2021 - 12:18:08 EST


On Tue, Jul 27, 2021 at 11:06:13AM -0400, Mathieu Desnoyers wrote:
> State transitions from 1->0->1 and N->2->1 callbacks require RCU
> synchronization. Rather than performing the RCU synchronization every
> time the state change occurs, which is quite slow when many tracepoints
> are registered in batch, instead keep a snapshot of the RCU state on the
> most recent transitions which belong to a chain, and conditionally wait
> for a grace period on the last transition of the chain if one g.p. has
> not elapsed since the last snapshot.
>
> This applies to both RCU and SRCU.
>
> [ Build tested only. ]
>
> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx>

Looks plausible from an RCU viewpoint.

Reviewed-by: Paul E. McKenney <paulmck@xxxxxxxxxx>

> ---
> kernel/tracepoint.c | 89 +++++++++++++++++++++++++++++++++++++++------
> 1 file changed, 77 insertions(+), 12 deletions(-)
>
> diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
> index a85e7dc8b490..82f37045cd2b 100644
> --- a/kernel/tracepoint.c
> +++ b/kernel/tracepoint.c
> @@ -28,6 +28,49 @@ extern tracepoint_ptr_t __stop___tracepoints_ptrs[];
> DEFINE_SRCU(tracepoint_srcu);
> EXPORT_SYMBOL_GPL(tracepoint_srcu);
>
> +enum tp_transition_sync {
> + TP_TRANSITION_SYNC_1_0_1,
> + TP_TRANSITION_SYNC_N_2_1,
> +
> + _NR_TP_TRANSITION_SYNC,
> +};
> +
> +struct tp_transition_snapshot {
> + unsigned long rcu;
> + unsigned long srcu;
> + bool ongoing;
> +};
> +
> +/* Protected by tracepoints_mutex */
> +static struct tp_transition_snapshot tp_transition_snapshot[_NR_TP_TRANSITION_SYNC];
> +
> +static void tp_rcu_get_state(enum tp_transition_sync sync)
> +{
> + struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync];
> +
> + /* Keep the latest get_state snapshot. */
> + snapshot->rcu = get_state_synchronize_rcu();
> + snapshot->srcu = start_poll_synchronize_srcu(&tracepoint_srcu);
> + snapshot->ongoing = true;
> +}
> +
> +static void tp_rcu_clear_ongoing(enum tp_transition_sync sync)
> +{
> + tp_transition_snapshot[sync].ongoing = false;
> +}
> +
> +static void tp_rcu_cond_sync(enum tp_transition_sync sync)
> +{
> + struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync];
> +
> + if (!snapshot->ongoing)
> + return;
> + cond_synchronize_rcu(snapshot->rcu);
> + if (!poll_state_synchronize_srcu(&tracepoint_srcu, snapshot->srcu))
> + synchronize_srcu(&tracepoint_srcu);
> + snapshot->ongoing = false;
> +}
> +
> /* Set to 1 to enable tracepoint debug output */
> static const int tracepoint_debug;
>
> @@ -311,6 +354,11 @@ static int tracepoint_add_func(struct tracepoint *tp,
> */
> switch (nr_func_state(tp_funcs)) {
> case TP_FUNC_1: /* 0->1 */
> + /*
> + * Make sure new static func never uses old data after a
> + * 1->0->1 transition sequence.
> + */
> + tp_rcu_cond_sync(TP_TRANSITION_SYNC_1_0_1);
> /* Set static call to first function */
> tracepoint_update_call(tp, tp_funcs);
> /* Both iterator and static call handle NULL tp->funcs */
> @@ -326,9 +374,21 @@ static int tracepoint_add_func(struct tracepoint *tp,
> * static call update/call.
> */
> rcu_assign_pointer(tp->funcs, tp_funcs);
> + /*
> + * Make sure static func never uses incorrect data after a
> + * 1->...->2->1 transition sequence.
> + */
> + if (tp_funcs[0].data != old[0].data)
> + tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1);
> break;
> case TP_FUNC_N: /* N->N+1 (N>1) */
> rcu_assign_pointer(tp->funcs, tp_funcs);
> + /*
> + * Make sure static func never uses incorrect data after a
> + * N->...->2->1 (N>1) transition sequence.
> + */
> + if (tp_funcs[0].data != old[0].data)
> + tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1);
> break;
> default:
> WARN_ON_ONCE(1);
> @@ -372,29 +432,34 @@ static int tracepoint_remove_func(struct tracepoint *tp,
> /* Both iterator and static call handle NULL tp->funcs */
> rcu_assign_pointer(tp->funcs, NULL);
> /*
> - * Make sure new func never uses old data after a 1->0->1
> - * transition sequence.
> - * Considering that transition 0->1 is the common case
> - * and don't have rcu-sync, issue rcu-sync after
> - * transition 1->0 to break that sequence by waiting for
> - * readers to be quiescent.
> + * Make sure new static func never uses old data after a
> + * 1->0->1 transition sequence.
> */
> - tracepoint_synchronize_unregister();
> + tp_rcu_get_state(TP_TRANSITION_SYNC_1_0_1);
> break;
> case TP_FUNC_1: /* 2->1 */
> rcu_assign_pointer(tp->funcs, tp_funcs);
> /*
> - * On 2->1 transition, RCU sync is needed before setting
> - * static call to first callback, because the observer
> - * may have loaded any prior tp->funcs after the last one
> - * associated with an rcu-sync.
> + * Make sure static func never uses incorrect data after a
> + * N->...->2->1 (N>2) transition sequence.
> */
> - tracepoint_synchronize_unregister();
> + if (tp_funcs[0].data != old[0].data) {
> + tracepoint_synchronize_unregister();
> + tp_rcu_clear_ongoing(TP_TRANSITION_SYNC_N_2_1);
> + } else {
> + tp_rcu_cond_sync(TP_TRANSITION_SYNC_N_2_1);
> + }
> /* Set static call to first function */
> tracepoint_update_call(tp, tp_funcs);
> break;
> case TP_FUNC_N: /* N->N-1 (N>2) */
> rcu_assign_pointer(tp->funcs, tp_funcs);
> + /*
> + * Make sure static func never uses incorrect data after a
> + * N->...->2->1 (N>2) transition sequence.
> + */
> + if (tp_funcs[0].data != old[0].data)
> + tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1);
> break;
> default:
> WARN_ON_ONCE(1);
> --
> 2.20.1
>