Re: Q: lockdep_assert_held_read() after downgrade_write()

From: Peter Zijlstra
Date: Tue Jan 31 2017 - 11:52:36 EST


On Wed, Feb 01, 2017 at 12:40:03AM +0900, J. R. Okajima wrote:

> Now allow me going on the second test (based upon Peter's patch)
>
> - two rwsem, rwA and rwB.
> - the locking order is rwA first, and then rwB.
> - good case
> down_read(rwA)
> down_read(rwB)
> up_read(rwB)
> up_read(rwA)
>
> down_write(rwA)
> down_write(rwB)
> up_write(rwB)
> up_write(rwA)
>
> - questionable case
> down_write(rwA)
> down_write(rwB)
> downgrade_write(rwA)
> downgrade_write(rwB)
> up_read(rwB)
> up_read(rwA)
>
> These two downgrade_write() have their strict order? If so, what is
> that?
> Do the added two lines
> + rwsem_release(&sem->dep_map, 1, _RET_IP_);
> + rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
> produce a traditional AB-BA deadlock warning, don't they?

Blergh, yes, because we do a full release.

Does something like the below work better? The annotation in
downgrade_write() would look something like:

+ lock_downgrade(&sem->dep_map, 1, _RET_IP_);

Not even compile tested and lacks the !LOCKDEP build bits.

---
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 1e327bb..76cf149 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -361,6 +361,8 @@ static inline void lock_set_subclass(struct lockdep_map *lock,
lock_set_class(lock, lock->name, lock->key, subclass, ip);
}

+extern void lock_downgrade(struct lockdep_map *lock, int read, unsigned long ip);
+
extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask);
extern void lockdep_clear_current_reclaim_state(void);
extern void lockdep_trace_alloc(gfp_t mask);
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 7c38f8f..88517b6 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3488,6 +3488,63 @@ __lock_set_class(struct lockdep_map *lock, const char *name,
return 1;
}

+static int __lock_downgrade(struct lockdep_map *lock, int read, unsigned long ip)
+{
+ struct task_struct *curr = current;
+ struct held_lock *hlock, *prev_hlock;
+ struct lock_class *class;
+ unsigned int depth;
+ int i;
+
+ depth = curr->lockdep_depth;
+ /*
+ * This function is about (re)setting the class of a held lock,
+ * yet we're not actually holding any locks. Naughty user!
+ */
+ if (DEBUG_LOCKS_WARN_ON(!depth))
+ return 0;
+
+ prev_hlock = NULL;
+ for (i = depth-1; i >= 0; i--) {
+ hlock = curr->held_locks + i;
+ /*
+ * We must not cross into another context:
+ */
+ if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
+ break;
+ if (match_held_lock(hlock, lock))
+ goto found_it;
+ prev_hlock = hlock;
+ }
+ return print_unlock_imbalance_bug(curr, lock, ip);
+
+found_it:
+ curr->lockdep_depth = i;
+ curr->curr_chain_key = hlock->prev_chain_key;
+
+ WARN(hlock->read, "downgrading a read lock");
+ hlock->read = read;
+ hlock->acquire_ip = ip;
+
+ for (; i < depth; i++) {
+ hlock = curr->held_locks + i;
+ if (!__lock_acquire(hlock->instance,
+ hlock_class(hlock)->subclass, hlock->trylock,
+ hlock->read, hlock->check, hlock->hardirqs_off,
+ hlock->nest_lock, hlock->acquire_ip,
+ hlock->references, hlock->pin_count))
+ return 0;
+ }
+
+ /*
+ * I took it apart and put it back together again, except now I have
+ * these 'spare' parts.. where shall I put them.
+ */
+ if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth))
+ return 0;
+ return 1;
+}
+
/*
* Remove the lock to the list of currently held locks - this gets
* called on mutex_unlock()/spin_unlock*() (or on a failed
@@ -3732,6 +3789,23 @@ void lock_set_class(struct lockdep_map *lock, const char *name,
}
EXPORT_SYMBOL_GPL(lock_set_class);

+void lock_downgrade(struct lockdep_map *lock, int read, unsigned long ip)
+{
+ unsigned long flags;
+
+ if (unlikely(current->lockdep_recursion))
+ return;
+
+ raw_local_irq_save(flags);
+ current->lockdep_recursion = 1;
+ check_flags(flags);
+ if (__lock_downgrade(lock, read, ip))
+ check_chain_key(current);
+ current->lockdep_recursion = 0;
+ raw_local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(lock_downgrade);
+
/*
* We are not always called with irqs disabled - do that here,
* and also avoid lockdep recursion: