Re: [PATCH v3 2/5] locking/rwsem: Limit # of null owner retries for handoff writer

From: Peter Zijlstra
Date: Tue Oct 25 2022 - 07:49:17 EST


On Tue, Oct 25, 2022 at 01:22:22PM +0200, Peter Zijlstra wrote:

> Funny, I find the former approach much saner. Disabling preemption
> around the whole thing fixes the fundamental problem while spin-limiting
> is a band-aid.
>
> Note how rwsem_write_trylock() already does preempt_disable(), having
> the read-side do something similar only makes sense.

Something like the completely untested below perhaps...


diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 44873594de03..350fb004b0fb 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -256,16 +256,13 @@ static inline bool rwsem_read_trylock(struct rw_semaphore *sem, long *cntp)
static inline bool rwsem_write_trylock(struct rw_semaphore *sem)
{
long tmp = RWSEM_UNLOCKED_VALUE;
- bool ret = false;

- preempt_disable();
if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, RWSEM_WRITER_LOCKED)) {
rwsem_set_owner(sem);
- ret = true;
+ return true;
}

- preempt_enable();
- return ret;
+ return false;
}

/*
@@ -717,7 +714,6 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
return false;
}

- preempt_disable();
/*
* Disable preemption is equal to the RCU read-side crital section,
* thus the task_strcut structure won't go away.
@@ -729,7 +725,6 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
if ((flags & RWSEM_NONSPINNABLE) ||
(owner && !(flags & RWSEM_READER_OWNED) && !owner_on_cpu(owner)))
ret = false;
- preempt_enable();

lockevent_cond_inc(rwsem_opt_fail, !ret);
return ret;
@@ -829,8 +824,6 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
int loop = 0;
u64 rspin_threshold = 0;

- preempt_disable();
-
/* sem->wait_lock should not be held when doing optimistic spinning */
if (!osq_lock(&sem->osq))
goto done;
@@ -938,7 +931,6 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
}
osq_unlock(&sem->osq);
done:
- preempt_enable();
lockevent_cond_inc(rwsem_opt_fail, !taken);
return taken;
}
@@ -1092,7 +1084,7 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat
/* Ordered by sem->wait_lock against rwsem_mark_wake(). */
break;
}
- schedule();
+ schedule_preempt_disabled();
lockevent_inc(rwsem_sleep_reader);
}

@@ -1179,15 +1171,12 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
if (waiter.handoff_set) {
enum owner_state owner_state;

- preempt_disable();
owner_state = rwsem_spin_on_owner(sem);
- preempt_enable();
-
if (owner_state == OWNER_NULL)
goto trylock_again;
}

- schedule();
+ schedule_preempt_disabled();
lockevent_inc(rwsem_sleep_writer);
set_current_state(state);
trylock_again:
@@ -1254,14 +1243,20 @@ static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
*/
static inline int __down_read_common(struct rw_semaphore *sem, int state)
{
+ int ret = 0;
long count;

+ preempt_disable();
if (!rwsem_read_trylock(sem, &count)) {
- if (IS_ERR(rwsem_down_read_slowpath(sem, count, state)))
- return -EINTR;
+ if (IS_ERR(rwsem_down_read_slowpath(sem, count, state))) {
+ ret = -EINTR;
+ goto out;
+ }
DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
}
- return 0;
+out:
+ preempt_enable();
+ return ret;
}

static inline void __down_read(struct rw_semaphore *sem)
@@ -1281,19 +1276,23 @@ static inline int __down_read_killable(struct rw_semaphore *sem)

static inline int __down_read_trylock(struct rw_semaphore *sem)
{
+ int ret = 0;
long tmp;

DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);

+ preempt_disable();
tmp = atomic_long_read(&sem->count);
while (!(tmp & RWSEM_READ_FAILED_MASK)) {
if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
tmp + RWSEM_READER_BIAS)) {
rwsem_set_reader_owned(sem);
- return 1;
+ ret = 1;
+ break;
}
}
- return 0;
+ preempt_enable();
+ return ret;
}

/*
@@ -1301,10 +1300,14 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
*/
static inline int __down_write_common(struct rw_semaphore *sem, int state)
{
+ int ret = 0;
+
+ preempt_disable();
if (unlikely(!rwsem_write_trylock(sem))) {
if (IS_ERR(rwsem_down_write_slowpath(sem, state)))
- return -EINTR;
+ ret = -EINTR;
}
+ preempt_enable();

return 0;
}
@@ -1321,8 +1324,14 @@ static inline int __down_write_killable(struct rw_semaphore *sem)

static inline int __down_write_trylock(struct rw_semaphore *sem)
{
+ int ret;
+
+ preempt_disable();
DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
- return rwsem_write_trylock(sem);
+ ret = rwsem_write_trylock(sem);
+ preempt_enable();
+
+ return ret;
}

/*