Re: [RCU] kernel hangs in wait_rcu_gp during suspend path

From: Paul E. McKenney
Date: Wed Dec 17 2014 - 14:25:19 EST


On Tue, Dec 16, 2014 at 11:00:20PM +0530, Arun KS wrote:
> Hello,
>
> Adding some more info.
>
> Below is the rcu_data data structure corresponding to cpu4.

This shows that RCU is idle. What was the state of the system at the
time you collected this data?

Thanx, Paul

> struct rcu_data {
> completed = 5877,
> gpnum = 5877,
> passed_quiesce = true,
> qs_pending = false,
> beenonline = true,
> preemptible = true,
> mynode = 0xc117f340 <rcu_preempt_state>,
> grpmask = 16,
> nxtlist = 0xedaaec00,
> nxttail = {0xc54366c4, 0xe84d350c, 0xe84d350c, 0xe84d350c},
> nxtcompleted = {4294967035, 5878, 5878, 5878},
> qlen_lazy = 105,
> qlen = 415,
> qlen_last_fqs_check = 0,
> n_cbs_invoked = 86323,
> n_nocbs_invoked = 0,
> n_cbs_orphaned = 0,
> n_cbs_adopted = 139,
> n_force_qs_snap = 0,
> blimit = 10,
> dynticks = 0xc5436758,
> dynticks_snap = 7582140,
> dynticks_fqs = 41,
> offline_fqs = 0,
> n_rcu_pending = 59404,
> n_rp_qs_pending = 5,
> n_rp_report_qs = 4633,
> n_rp_cb_ready = 32,
> n_rp_cpu_needs_gp = 41088,
> n_rp_gp_completed = 2844,
> n_rp_gp_started = 1150,
> n_rp_need_nothing = 9657,
> barrier_head = {
> next = 0x0,
> func = 0x0
> },
> oom_head = {
> next = 0x0,
> func = 0x0
> },
> cpu = 4,
> rsp = 0xc117f340 <rcu_preempt_state>
> }
>
>
>
> Also pasting complete rcu_preempt_state.
>
>
>
> rcu_preempt_state = $9 = {
> node = {{
> lock = {
> raw_lock = {
> {
> slock = 3129850509,
> tickets = {
> owner = 47757,
> next = 47757
> }
> }
> },
> magic = 3735899821,
> owner_cpu = 4294967295,
> owner = 0xffffffff
> },
> gpnum = 5877,
> completed = 5877,
> qsmask = 0,
> expmask = 0,
> qsmaskinit = 240,
> grpmask = 0,
> grplo = 0,
> grphi = 7,
> grpnum = 0 '\000',
> level = 0 '\000',
> parent = 0x0,
> blkd_tasks = {
> next = 0xc117f378 <rcu_preempt_state+56>,
> prev = 0xc117f378 <rcu_preempt_state+56>
> },
> gp_tasks = 0x0,
> exp_tasks = 0x0,
> need_future_gp = {1, 0},
> fqslock = {
> raw_lock = {
> {
> slock = 0,
> tickets = {
> owner = 0,
> next = 0
> }
> }
> },
> magic = 3735899821,
> owner_cpu = 4294967295,
> owner = 0xffffffff
> }
> }},
> level = {0xc117f340 <rcu_preempt_state>},
> levelcnt = {1, 0, 0, 0, 0},
> levelspread = "\b",
> rda = 0xc115e6b0 <rcu_preempt_data>,
> call = 0xc01975ac <call_rcu>,
> fqs_state = 0 '\000',
> boost = 0 '\000',
> gpnum = 5877,
> completed = 5877,
> gp_kthread = 0xf0c9e600,
> gp_wq = {
> lock = {
> {
> rlock = {
> raw_lock = {
> {
> slock = 2160230594,
> tickets = {
> owner = 32962,
> next = 32962
> }
> }
> },
> magic = 3735899821,
> owner_cpu = 4294967295,
> owner = 0xffffffff
> }
> }
> },
> task_list = {
> next = 0xf0cd1f20,
> prev = 0xf0cd1f20
> }
> },
> gp_flags = 1,
> orphan_lock = {
> raw_lock = {
> {
> slock = 327685,
> tickets = {
> owner = 5,
> next = 5
> }
> }
> },
> magic = 3735899821,
> owner_cpu = 4294967295,
> owner = 0xffffffff
> },
> orphan_nxtlist = 0x0,
> orphan_nxttail = 0xc117f490 <rcu_preempt_state+336>,
> orphan_donelist = 0x0,
> orphan_donetail = 0xc117f498 <rcu_preempt_state+344>,
> qlen_lazy = 0,
> qlen = 0,
> onoff_mutex = {
> count = {
> counter = 1
> },
> wait_lock = {
> {
> rlock = {
> raw_lock = {
> {
> slock = 811479134,
> tickets = {
> owner = 12382,
> next = 12382
> }
> }
> },
> magic = 3735899821,
> owner_cpu = 4294967295,
> owner = 0xffffffff
> }
> }
> },
> wait_list = {
> next = 0xc117f4bc <rcu_preempt_state+380>,
> prev = 0xc117f4bc <rcu_preempt_state+380>
> },
> owner = 0x0,
> name = 0x0,
> magic = 0xc117f4a8 <rcu_preempt_state+360>
> },
> barrier_mutex = {
> count = {
> counter = 1
> },
> wait_lock = {
> {
> rlock = {
> raw_lock = {
> {
> slock = 0,
> tickets = {
> owner = 0,
> next = 0
> }
> }
> },
> magic = 3735899821,
> owner_cpu = 4294967295,
> owner = 0xffffffff
> }
> }
> },
> wait_list = {
> next = 0xc117f4e4 <rcu_preempt_state+420>,
> prev = 0xc117f4e4 <rcu_preempt_state+420>
> },
> owner = 0x0,
> name = 0x0,
> magic = 0xc117f4d0 <rcu_preempt_state+400>
> },
> barrier_cpu_count = {
> counter = 0
> },
> barrier_completion = {
> done = 0,
> wait = {
> lock = {
> {
> rlock = {
> raw_lock = {
> {
> slock = 0,
> tickets = {
> owner = 0,
> next = 0
> }
> }
> },
> magic = 0,
> owner_cpu = 0,
> owner = 0x0
> }
> }
> },
> task_list = {
> next = 0x0,
> prev = 0x0
> }
> }
> },
> n_barrier_done = 0,
> expedited_start = {
> counter = 0
> },
> expedited_done = {
> counter = 0
> },
> expedited_wrap = {
> counter = 0
> },
> expedited_tryfail = {
> counter = 0
> },
> expedited_workdone1 = {
> counter = 0
> },
> expedited_workdone2 = {
> counter = 0
> },
> expedited_normal = {
> counter = 0
> },
> expedited_stoppedcpus = {
> counter = 0
> },
> expedited_done_tries = {
> counter = 0
> },
> expedited_done_lost = {
> counter = 0
> },
> expedited_done_exit = {
> counter = 0
> },
> jiffies_force_qs = 4294963917,
> n_force_qs = 4028,
> n_force_qs_lh = 0,
> n_force_qs_ngp = 0,
> gp_start = 4294963911,
> jiffies_stall = 4294966011,
> gp_max = 17,
> name = 0xc0d833ab "rcu_preempt",
> abbr = 112 'p',
> flavors = {
> next = 0xc117f2ec <rcu_bh_state+556>,
> prev = 0xc117f300 <rcu_struct_flavors>
> },
> wakeup_work = {
> flags = 3,
> llnode = {
> next = 0x0
> },
> func = 0xc0195aa8 <rsp_wakeup>
> }
> }
>
> Hope this helps.
>
> Thanks,
> Arun
>
>
> On Tue, Dec 16, 2014 at 11:59 AM, Arun KS <arunks.linux@xxxxxxxxx> wrote:
> > Hello,
> >
> > I dig little deeper to understand the situation.
> > All other cpus are in idle thread already.
> > As per my understanding, for the grace period to end, at-least one of
> > the following should happen on all online cpus,
> >
> > 1. a context switch.
> > 2. user space switch.
> > 3. switch to idle thread.
> >
> > In this situation, since all the other cores are already in idle, non
> > of the above are meet on all online cores.
> > So grace period is getting extended and never finishes. Below is the
> > state of runqueue when the hang happens.
> > --------------start------------------------------------
> > crash> runq
> > CPU 0 [OFFLINE]
> >
> > CPU 1 [OFFLINE]
> >
> > CPU 2 [OFFLINE]
> >
> > CPU 3 [OFFLINE]
> >
> > CPU 4 RUNQUEUE: c3192e40
> > CURRENT: PID: 0 TASK: f0874440 COMMAND: "swapper/4"
> > RT PRIO_ARRAY: c3192f20
> > [no tasks queued]
> > CFS RB_ROOT: c3192eb0
> > [no tasks queued]
> >
> > CPU 5 RUNQUEUE: c31a0e40
> > CURRENT: PID: 0 TASK: f0874980 COMMAND: "swapper/5"
> > RT PRIO_ARRAY: c31a0f20
> > [no tasks queued]
> > CFS RB_ROOT: c31a0eb0
> > [no tasks queued]
> >
> > CPU 6 RUNQUEUE: c31aee40
> > CURRENT: PID: 0 TASK: f0874ec0 COMMAND: "swapper/6"
> > RT PRIO_ARRAY: c31aef20
> > [no tasks queued]
> > CFS RB_ROOT: c31aeeb0
> > [no tasks queued]
> >
> > CPU 7 RUNQUEUE: c31bce40
> > CURRENT: PID: 0 TASK: f0875400 COMMAND: "swapper/7"
> > RT PRIO_ARRAY: c31bcf20
> > [no tasks queued]
> > CFS RB_ROOT: c31bceb0
> > [no tasks queued]
> > --------------end------------------------------------
> >
> > If my understanding is correct the below patch should help, because it
> > will expedite grace periods during suspend,
> > https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=d1d74d14e98a6be740a6f12456c7d9ad47be9c9c
> >
> > But I wonder why it was not taken to stable trees. Can we take it?
> > Appreciate your help.
> >
> > Thanks,
> > Arun
> >
> > On Mon, Dec 15, 2014 at 10:34 PM, Arun KS <arunks.linux@xxxxxxxxx> wrote:
> >> Hi,
> >>
> >> Here is the backtrace of the process hanging in wait_rcu_gp,
> >>
> >> PID: 247 TASK: e16e7380 CPU: 4 COMMAND: "kworker/u16:5"
> >> #0 [<c09fead0>] (__schedule) from [<c09fcab0>]
> >> #1 [<c09fcab0>] (schedule_timeout) from [<c09fe050>]
> >> #2 [<c09fe050>] (wait_for_common) from [<c013b2b4>]
> >> #3 [<c013b2b4>] (wait_rcu_gp) from [<c0142f50>]
> >> #4 [<c0142f50>] (atomic_notifier_chain_unregister) from [<c06b2ab8>]
> >> #5 [<c06b2ab8>] (cpufreq_interactive_disable_sched_input) from [<c06b32a8>]
> >> #6 [<c06b32a8>] (cpufreq_governor_interactive) from [<c06abbf8>]
> >> #7 [<c06abbf8>] (__cpufreq_governor) from [<c06ae474>]
> >> #8 [<c06ae474>] (__cpufreq_remove_dev_finish) from [<c06ae8c0>]
> >> #9 [<c06ae8c0>] (cpufreq_cpu_callback) from [<c0a0185c>]
> >> #10 [<c0a0185c>] (notifier_call_chain) from [<c0121888>]
> >> #11 [<c0121888>] (__cpu_notify) from [<c0121a04>]
> >> #12 [<c0121a04>] (cpu_notify_nofail) from [<c09ee7f0>]
> >> #13 [<c09ee7f0>] (_cpu_down) from [<c0121b70>]
> >> #14 [<c0121b70>] (disable_nonboot_cpus) from [<c016788c>]
> >> #15 [<c016788c>] (suspend_devices_and_enter) from [<c0167bcc>]
> >> #16 [<c0167bcc>] (pm_suspend) from [<c0167d94>]
> >> #17 [<c0167d94>] (try_to_suspend) from [<c0138460>]
> >> #18 [<c0138460>] (process_one_work) from [<c0138b18>]
> >> #19 [<c0138b18>] (worker_thread) from [<c013dc58>]
> >> #20 [<c013dc58>] (kthread) from [<c01061b8>]
> >>
> >> Will this patch helps here,
> >> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=d1d74d14e98a6be740a6f12456c7d9ad47be9c9c
> >>
> >> I couldn't really understand why it got struck in synchronize_rcu().
> >> Please give some pointers to debug this further.
> >>
> >> Below are the configs enable related to RCU.
> >>
> >> CONFIG_TREE_PREEMPT_RCU=y
> >> CONFIG_PREEMPT_RCU=y
> >> CONFIG_RCU_STALL_COMMON=y
> >> CONFIG_RCU_FANOUT=32
> >> CONFIG_RCU_FANOUT_LEAF=16
> >> CONFIG_RCU_FAST_NO_HZ=y
> >> CONFIG_RCU_CPU_STALL_TIMEOUT=21
> >> CONFIG_RCU_CPU_STALL_VERBOSE=y
> >>
> >> Kernel version is 3.10.28
> >> Architecture is ARM
> >>
> >> Thanks,
> >> Arun
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/