Re: [PATCH 5/6] locking/rwsem: Unify wait loop

From: Boqun Feng
Date: Thu Feb 23 2023 - 14:32:17 EST


On Thu, Feb 23, 2023 at 01:26:47PM +0100, Peter Zijlstra wrote:
> Now that the reader and writer wait loops are identical, share the
> code.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
> ---
> kernel/locking/rwsem.c | 117 +++++++++++++++++++------------------------------
> 1 file changed, 47 insertions(+), 70 deletions(-)
>
> --- a/kernel/locking/rwsem.c
> +++ b/kernel/locking/rwsem.c
> @@ -650,13 +650,11 @@ static void rwsem_mark_wake(struct rw_se
> * optionally wake up waiters before it returns.
> */
> static inline void
> -rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter,
> - struct wake_q_head *wake_q)
> +rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
> __releases(&sem->wait_lock)
> {
> bool first = rwsem_first_waiter(sem) == waiter;
> -
> - wake_q_init(wake_q);
> + DEFINE_WAKE_Q(wake_q);
>
> /*
> * If the wait_list isn't empty and the waiter to be deleted is
> @@ -664,10 +662,10 @@ rwsem_del_wake_waiter(struct rw_semaphor
> * be eligible to acquire or spin on the lock.
> */
> if (rwsem_del_waiter(sem, waiter) && first)
> - rwsem_mark_wake(sem, RWSEM_WAKE_ANY, wake_q);
> + rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
> raw_spin_unlock_irq(&sem->wait_lock);
> - if (!wake_q_empty(wake_q))
> - wake_up_q(wake_q);
> + if (!wake_q_empty(&wake_q))
> + wake_up_q(&wake_q);
> }
>
> /*
> @@ -993,6 +991,46 @@ static inline void rwsem_cond_wake_waite
> rwsem_mark_wake(sem, wake_type, wake_q);
> }
>
> +#define waiter_type(_waiter, _r, _w) \
> + ((_waiter)->type == RWSEM_WAITING_FOR_READ ? (_r) : (_w))
> +
> +static __always_inline struct rw_semaphore *
> +rwsem_waiter_wait(struct rw_semaphore *sem, struct rwsem_waiter *waiter, int state)
> +{
> + trace_contention_begin(sem, waiter_type(waiter, LCB_F_READ, LCB_F_WRITE));
> +
> + /* wait to be given the lock */
> + for (;;) {
> + set_current_state(state);
> + if (!smp_load_acquire(&waiter->task)) {
> + /* Matches rwsem_waiter_wake()'s smp_store_release(). */
> + break;
> + }
> + if (signal_pending_state(state, current)) {
> + raw_spin_lock_irq(&sem->wait_lock);

Move the below __set_current_state(TASK_RUNNING)s up here? I think we
need the preemption protection when changing the task state here.

> + if (waiter->task)
> + goto out_nolock;

I originally wanted to suggest renaming the label to "out_locked", but I
think we can just move the labeled code up here? And even open-code
rwsem_del_wake_waiter() since it only has one usage.


Regards,
Boqun

> + raw_spin_unlock_irq(&sem->wait_lock);
> + /* Ordered by sem->wait_lock against rwsem_mark_wake(). */
> + break;
> + }
> + schedule_preempt_disabled();
> + lockevent_inc(waiter_type(waiter, rwsem_sleep_reader, rwsem_sleep_writer));
> + }
> +
> + __set_current_state(TASK_RUNNING);
> + lockevent_inc(waiter_type(waiter, rwsem_rlock, rwsem_wlock));
> + trace_contention_end(sem, 0);
> + return sem;
> +
> +out_nolock:
> + rwsem_del_wake_waiter(sem, waiter);
> + __set_current_state(TASK_RUNNING);
> + lockevent_inc(waiter_type(waiter, rwsem_rlock_fail, rwsem_wlock_fail));
> + trace_contention_end(sem, -EINTR);
> + return ERR_PTR(-EINTR);
> +}
> +
> /*
> * Wait for the read lock to be granted
> */
> @@ -1071,38 +1109,7 @@ rwsem_down_read_slowpath(struct rw_semap
> if (!wake_q_empty(&wake_q))
> wake_up_q(&wake_q);
>
> - trace_contention_begin(sem, LCB_F_READ);
> -
> - /* wait to be given the lock */
> - for (;;) {
> - set_current_state(state);
> - if (!smp_load_acquire(&waiter.task)) {
> - /* Matches rwsem_waiter_wake()'s smp_store_release(). */
> - break;
> - }
> - if (signal_pending_state(state, current)) {
> - raw_spin_lock_irq(&sem->wait_lock);
> - if (waiter.task)
> - goto out_nolock;
> - raw_spin_unlock_irq(&sem->wait_lock);
> - /* Ordered by sem->wait_lock against rwsem_mark_wake(). */
> - break;
> - }
> - schedule_preempt_disabled();
> - lockevent_inc(rwsem_sleep_reader);
> - }
> -
> - __set_current_state(TASK_RUNNING);
> - lockevent_inc(rwsem_rlock);
> - trace_contention_end(sem, 0);
> - return sem;
> -
> -out_nolock:
> - rwsem_del_wake_waiter(sem, &waiter, &wake_q);
> - __set_current_state(TASK_RUNNING);
> - lockevent_inc(rwsem_rlock_fail);
> - trace_contention_end(sem, -EINTR);
> - return ERR_PTR(-EINTR);
> + return rwsem_waiter_wait(sem, &waiter, state);
> }
>
> /*
> @@ -1150,37 +1157,7 @@ rwsem_down_write_slowpath(struct rw_sema
> }
> raw_spin_unlock_irq(&sem->wait_lock);
>
> - /* wait until we successfully acquire the lock */
> - trace_contention_begin(sem, LCB_F_WRITE);
> -
> - for (;;) {
> - set_current_state(state);
> - if (!smp_load_acquire(&waiter.task)) {
> - /* Matches rwsem_waiter_wake()'s smp_store_release(). */
> - break;
> - }
> - if (signal_pending_state(state, current)) {
> - raw_spin_lock_irq(&sem->wait_lock);
> - if (waiter.task)
> - goto out_nolock;
> - raw_spin_unlock_irq(&sem->wait_lock);
> - /* Ordered by sem->wait_lock against rwsem_mark_wake(). */
> - break;
> - }
> - schedule_preempt_disabled();
> - lockevent_inc(rwsem_sleep_writer);
> - }
> - __set_current_state(TASK_RUNNING);
> - lockevent_inc(rwsem_wlock);
> - trace_contention_end(sem, 0);
> - return sem;
> -
> -out_nolock:
> - rwsem_del_wake_waiter(sem, &waiter, &wake_q);
> - __set_current_state(TASK_RUNNING);
> - lockevent_inc(rwsem_wlock_fail);
> - trace_contention_end(sem, -EINTR);
> - return ERR_PTR(-EINTR);
> + return rwsem_waiter_wait(sem, &waiter, state);
> }
>
> /*
>
>