Re: [PATCH 3/6] locking/rwsem: Rework writer wakeup

From: Peter Zijlstra
Date: Sun Feb 26 2023 - 11:02:04 EST


On Thu, Feb 23, 2023 at 01:26:45PM +0100, Peter Zijlstra wrote:
> @@ -1072,7 +1067,7 @@ rwsem_down_read_slowpath(struct rw_semap
> for (;;) {
> set_current_state(state);
> if (!smp_load_acquire(&waiter.task)) {
> - /* Matches rwsem_mark_wake()'s smp_store_release(). */
> + /* Matches rwsem_waiter_wake()'s smp_store_release(). */
> break;
> }
> if (signal_pending_state(state, current)) {
> @@ -1143,54 +1138,36 @@ rwsem_down_write_slowpath(struct rw_sema
> } else {
> atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count);

Found it; if we remove the try_write_lock below, then at least this
new-waiter path needs to still do a trylock.

Let me go test the other patches on top of all this and push out a fresh
set if that all still works.

> }
> + raw_spin_unlock_irq(&sem->wait_lock);
>
> /* wait until we successfully acquire the lock */
> - set_current_state(state);
> trace_contention_begin(sem, LCB_F_WRITE);
>
> for (;;) {
> - if (rwsem_try_write_lock(sem, &waiter)) {
> - /* rwsem_try_write_lock() implies ACQUIRE on success */
> + set_current_state(state);
> + if (!smp_load_acquire(&waiter.task)) {
> + /* Matches rwsem_waiter_wake()'s smp_store_release(). */
> break;
> }
> -
> - raw_spin_unlock_irq(&sem->wait_lock);
> -
> - if (signal_pending_state(state, current))
> - goto out_nolock;
> -
> - /*
> - * After setting the handoff bit and failing to acquire
> - * the lock, attempt to spin on owner to accelerate lock
> - * transfer. If the previous owner is a on-cpu writer and it
> - * has just released the lock, OWNER_NULL will be returned.
> - * In this case, we attempt to acquire the lock again
> - * without sleeping.
> - */
> - if (waiter.handoff_set) {
> - enum owner_state owner_state;
> -
> - owner_state = rwsem_spin_on_owner(sem);
> - if (owner_state == OWNER_NULL)
> - goto trylock_again;
> + if (signal_pending_state(state, current)) {
> + raw_spin_lock_irq(&sem->wait_lock);
> + if (waiter.task)
> + goto out_nolock;
> + raw_spin_unlock_irq(&sem->wait_lock);
> + /* Ordered by sem->wait_lock against rwsem_mark_wake(). */
> + break;
> }
> -
> schedule_preempt_disabled();
> lockevent_inc(rwsem_sleep_writer);
> - set_current_state(state);
> -trylock_again:
> - raw_spin_lock_irq(&sem->wait_lock);
> }
> __set_current_state(TASK_RUNNING);
> - raw_spin_unlock_irq(&sem->wait_lock);
> lockevent_inc(rwsem_wlock);
> trace_contention_end(sem, 0);
> return sem;
>
> out_nolock:
> - __set_current_state(TASK_RUNNING);
> - raw_spin_lock_irq(&sem->wait_lock);
> rwsem_del_wake_waiter(sem, &waiter, &wake_q);
> + __set_current_state(TASK_RUNNING);
> lockevent_inc(rwsem_wlock_fail);
> trace_contention_end(sem, -EINTR);
> return ERR_PTR(-EINTR);
>
>