[PATCH tip/core/rcu 28/47] rcu: Set RCU CPU stall times via sysfs

From: Paul E. McKenney
Date: Fri Feb 03 2012 - 20:46:02 EST


From: "Paul E. McKenney" <paulmck@xxxxxxxxxxxxxxxxxx>

The default CONFIG_RCU_CPU_STALL_TIMEOUT value of 60 seconds has served
Linux users well for production use for quite some time. However, for
debugging, there will be more than three minutes between subsequent
stall-warning messages. This can be an annoyingly long wait if you
are trying to work out where the offending infinite loop is hiding.

Therefore, this commit provides a rcu_cpu_stall_timeout sysfs
parameter that may be adjusted at boot time and at runtime to speed
up debugging.

Signed-off-by: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
---
kernel/rcutree.c | 31 ++++++++++++++++++++++++++-----
kernel/rcutree.h | 6 ------
2 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 61adb35..cfdab98 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -208,8 +208,11 @@ module_param(blimit, int, 0);
module_param(qhimark, int, 0);
module_param(qlowmark, int, 0);

-int rcu_cpu_stall_suppress __read_mostly;
+int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
+int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
+
module_param(rcu_cpu_stall_suppress, int, 0644);
+module_param(rcu_cpu_stall_timeout, int, 0644);

static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
static int rcu_pending(int cpu);
@@ -645,10 +648,28 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
return rcu_implicit_offline_qs(rdp);
}

+static int jiffies_till_stall_check(void)
+{
+ int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout);
+
+ /*
+ * Limit check must be consistent with the Kconfig limits
+ * for CONFIG_RCU_CPU_STALL_TIMEOUT.
+ */
+ if (till_stall_check < 3) {
+ ACCESS_ONCE(rcu_cpu_stall_timeout) = 3;
+ till_stall_check = 3;
+ } else if (till_stall_check > 300) {
+ ACCESS_ONCE(rcu_cpu_stall_timeout) = 300;
+ till_stall_check = 300;
+ }
+ return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
+}
+
static void record_gp_stall_check_time(struct rcu_state *rsp)
{
rsp->gp_start = jiffies;
- rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
+ rsp->jiffies_stall = jiffies + jiffies_till_stall_check();
}

static void print_other_cpu_stall(struct rcu_state *rsp)
@@ -667,7 +688,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
- rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
+ rsp->jiffies_stall = jiffies + 3 * jiffies_till_stall_check() + 3;

/*
* Now rat on any tasks that got kicked up to the root rcu_node
@@ -726,8 +747,8 @@ static void print_cpu_stall(struct rcu_state *rsp)

raw_spin_lock_irqsave(&rnp->lock, flags);
if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
- rsp->jiffies_stall =
- jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
+ rsp->jiffies_stall = jiffies +
+ 3 * jiffies_till_stall_check() + 3;
raw_spin_unlock_irqrestore(&rnp->lock, flags);

set_need_resched(); /* kick ourselves to get things going. */
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 58c9fc3..0328a53 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -314,12 +314,6 @@ struct rcu_data {
#else
#define RCU_STALL_DELAY_DELTA 0
#endif
-
-#define RCU_SECONDS_TILL_STALL_CHECK (CONFIG_RCU_CPU_STALL_TIMEOUT * HZ + \
- RCU_STALL_DELAY_DELTA)
- /* for rsp->jiffies_stall */
-#define RCU_SECONDS_TILL_STALL_RECHECK (3 * RCU_SECONDS_TILL_STALL_CHECK + 30)
- /* for rsp->jiffies_stall */
#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */
/* to take at least one */
/* scheduling clock irq */
--
1.7.8

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/