Re: INFO: suspicious rcu_dereference_check() usage -kernel/sched.c:618 invoked rcu_dereference_check() without protection!

From: Paul E. McKenney
Date: Tue Oct 12 2010 - 00:20:37 EST


On Mon, Oct 11, 2010 at 06:19:55PM -0700, Greg Thelen wrote:
> I reliably see a rcu_dereference_check() failure on with v2.6.36-rc7 in
> a 512MiB VM. I would be happy to test out proposed patches to this
> issue.

Hello, Greg,

Commit 6506cf6ce68 in my -rcu tree should address this.

git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-2.6-rcu.git rcu/next

Please see below for a patch against tip/core/rcu that gathers up the
four commits.

> [ 0.036082] lockdep: fixing up alternatives.
> [ 0.037184]
> [ 0.037185] ===================================================
> [ 0.037999] [ INFO: suspicious rcu_dereference_check() usage. ]
> [ 0.037999] ---------------------------------------------------
> [ 0.037999] kernel/sched.c:618 invoked rcu_dereference_check() without protection!
> [ 0.037999]
> [ 0.037999] other info that might help us debug this:
> [ 0.037999]
> [ 0.037999]
> [ 0.037999] rcu_scheduler_active = 1, debug_locks = 0
> [ 0.037999] 3 locks held by kworker/0:0/4:
> [ 0.037999] #0: (events){+.+.+.}, at: [<ffffffff8105647d>] process_one_work+0x195/0x422
> [ 0.037999] #1: ((&c_idle.work)){+.+.+.}, at: [<ffffffff8105647d>] process_one_work+0x195/0x422
> [ 0.037999] #2: (&rq->lock){-.-...}, at: [<ffffffff81584f49>] init_idle+0x2b/0x114
> [ 0.037999]
> [ 0.037999] stack backtrace:
> [ 0.037999] Pid: 4, comm: kworker/0:0 Not tainted 2.6.36-rc7 #1
> [ 0.037999] Call Trace:
> [ 0.037999] [<ffffffff8106c273>] lockdep_rcu_dereference+0xaa/0xb2
> [ 0.037999] [<ffffffff810331b1>] task_group+0x7b/0x8b
> [ 0.037999] [<ffffffff810331d6>] set_task_rq+0x15/0x40
> [ 0.037999] [<ffffffff81584fef>] init_idle+0xd1/0x114
> [ 0.037999] [<ffffffff815853de>] fork_idle+0xb8/0xc9
> [ 0.037999] [<ffffffff8103b13a>] ? check_preempt_wakeup+0xf0/0x177
> [ 0.037999] [<ffffffff81583a88>] do_fork_idle+0x17/0x28
> [ 0.037999] [<ffffffff8105654d>] process_one_work+0x265/0x422
> [ 0.037999] [<ffffffff8105647d>] ? process_one_work+0x195/0x422
> [ 0.037999] [<ffffffff8103cf3e>] ? wake_up_process+0x10/0x12
> [ 0.037999] [<ffffffff8105810a>] ? manage_workers+0x106/0x191
> [ 0.037999] [<ffffffff810582cb>] worker_thread+0x136/0x24c
> [ 0.037999] [<ffffffff81058195>] ? worker_thread+0x0/0x24c
> [ 0.037999] [<ffffffff8105b86c>] kthread+0x7d/0x85
> [ 0.037999] [<ffffffff810039d4>] kernel_thread_helper+0x4/0x10
> [ 0.037999] [<ffffffff8158ae00>] ? restore_args+0x0/0x30
> [ 0.037999] [<ffffffff8105b7ef>] ? kthread+0x0/0x85
> [ 0.037999] [<ffffffff810039d0>] ? kernel_thread_helper+0x0/0x10
>
> Below is the .config, which was generated from:
> $ make defconfig
> $ make menuconfig
> - enable CONFIG_SPINLOCK_SLEEP
> - enable CONFIG_PREEMPT
> - enable CONFIG_PROVE_LOCKING
> - enable CONFIG_PROVE_RCU

Please let me know how it goes!

Thanx, Paul

------------------------------------------------------------------------

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e750735..ccdc04c 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -545,9 +545,9 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)

if (rcu_cpu_stall_suppress)
return;
- delta = jiffies - rsp->jiffies_stall;
+ delta = jiffies - ACCESS_ONCE(rsp->jiffies_stall);
rnp = rdp->mynode;
- if ((rnp->qsmask & rdp->grpmask) && delta >= 0) {
+ if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && delta >= 0) {

/* We haven't checked in, so go dump stack. */
print_cpu_stall(rsp);
diff --git a/kernel/sched.c b/kernel/sched.c
index dc85ceb..ae8f75a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5337,7 +5337,19 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
idle->se.exec_start = sched_clock();

cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
+ /*
+ * We're having a chicken and egg problem, even though we are
+ * holding rq->lock, the cpu isn't yet set to this cpu so the
+ * lockdep check in task_group() will fail.
+ *
+ * Similar case to sched_fork(). / Alternatively we could
+ * use task_rq_lock() here and obtain the other rq->lock.
+ *
+ * Silence PROVE_RCU
+ */
+ rcu_read_lock();
__set_task_cpu(idle, cpu);
+ rcu_read_unlock();

rq->curr = rq->idle = idle;
#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index db3f674..5f996d3 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -3751,8 +3751,11 @@ static void task_fork_fair(struct task_struct *p)

update_rq_clock(rq);

- if (unlikely(task_cpu(p) != this_cpu))
+ if (unlikely(task_cpu(p) != this_cpu)) {
+ rcu_read_lock();
__set_task_cpu(p, this_cpu);
+ rcu_read_unlock();
+ }

update_curr(cfs_rq);

diff --git a/net/core/sock.c b/net/core/sock.c
index ef30e9d..7d99e13 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1078,8 +1078,11 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
#ifdef CONFIG_CGROUPS
void sock_update_classid(struct sock *sk)
{
- u32 classid = task_cls_classid(current);
+ u32 classid;

+ rcu_read_lock(); /* doing current task, which cannot vanish. */
+ classid = task_cls_classid(current);
+ rcu_read_unlock();
if (classid && classid != sk->sk_classid)
sk->sk_classid = classid;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/