[RFC PATCH v3 13/21] x86/watchdog/hardlockup/hpet: Determine if HPET timer caused NMI

From: Ricardo Neri
Date: Tue May 14 2019 - 10:05:27 EST


The only direct method to determine whether an HPET timer caused an
interrupt is to read the Interrupt Status register. Unfortunately,
reading HPET registers is slow and, therefore, it is not recommended to
read them while in NMI context. Furthermore, status is not available if
the interrupt is generated vi the Front Side Bus.

An indirect manner to infer if the non-maskable interrupt we see was
caused by the HPET timer is to use the time-stamp counter. Compute the
value that the time-stamp counter should have at the next interrupt of the
HPET timer. Since the hardlockup detector operates in seconds, high
precision is not needed. This implementation considers that the HPET
caused the HMI if the time-stamp counter reads the expected value -/+ 1.5%.
This value is selected as it is equivalent to 1/64 and the division can be
performed using a bit shift operation. Experimentally, the error in the
estimation is consistently less than 1%.

The computation of the expected value of the time-stamp counter must be
performed in relation to watchdog_thresh divided by the number of
monitored CPUs. This quantity is stored in tsc_ticks_per_cpu and must be
updated whenever the number of monitored CPUs changes.

Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
Cc: Ashok Raj <ashok.raj@xxxxxxxxx>
Cc: Andi Kleen <andi.kleen@xxxxxxxxx>
Cc: Tony Luck <tony.luck@xxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Clemens Ladisch <clemens@xxxxxxxxxx>
Cc: Arnd Bergmann <arnd@xxxxxxxx>
Cc: Philippe Ombredanne <pombredanne@xxxxxxxx>
Cc: Kate Stewart <kstewart@xxxxxxxxxxxxxxxxxxx>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@xxxxxxxxx>
Cc: Mimi Zohar <zohar@xxxxxxxxxxxxx>
Cc: Jan Kiszka <jan.kiszka@xxxxxxxxxxx>
Cc: Nick Desaulniers <ndesaulniers@xxxxxxxxxx>
Cc: Masahiro Yamada <yamada.masahiro@xxxxxxxxxxxxx>
Cc: Nayna Jain <nayna@xxxxxxxxxxxxx>
Cc: Stephane Eranian <eranian@xxxxxxxxxx>
Cc: Suravee Suthikulpanit <Suravee.Suthikulpanit@xxxxxxx>
Cc: "Ravi V. Shankar" <ravi.v.shankar@xxxxxxxxx>
Cc: x86@xxxxxxxxxx
Suggested-by: Andi Kleen <andi.kleen@xxxxxxxxx>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@xxxxxxxxxxxxxxx>
---
arch/x86/include/asm/hpet.h | 2 ++
arch/x86/kernel/watchdog_hld_hpet.c | 27 ++++++++++++++++++++++++++-
2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 64acacce095d..fd99f2390714 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -115,6 +115,8 @@ struct hpet_hld_data {
u32 num;
u64 ticks_per_second;
u64 ticks_per_cpu;
+ u64 tsc_next;
+ u64 tsc_ticks_per_cpu;
u32 handling_cpu;
u32 enabled_cpus;
struct msi_msg msi_msg;
diff --git a/arch/x86/kernel/watchdog_hld_hpet.c b/arch/x86/kernel/watchdog_hld_hpet.c
index 9a3431a54616..6f1f540cfee9 100644
--- a/arch/x86/kernel/watchdog_hld_hpet.c
+++ b/arch/x86/kernel/watchdog_hld_hpet.c
@@ -23,6 +23,7 @@

static struct hpet_hld_data *hld_data;
static bool hardlockup_use_hpet;
+static u64 tsc_next_error;

/**
* kick_timer() - Reprogram timer to expire in the future
@@ -32,11 +33,22 @@ static bool hardlockup_use_hpet;
* Reprogram the timer to expire within watchdog_thresh seconds in the future.
* If the timer supports periodic mode, it is not kicked unless @force is
* true.
+ *
+ * Also, compute the expected value of the time-stamp counter at the time of
+ * expiration as well as a deviation from the expected value. The maximum
+ * deviation is of ~1.5%. This deviation can be easily computed by shifting
+ * by 6 positions the delta between the current and expected time-stamp values.
*/
static void kick_timer(struct hpet_hld_data *hdata, bool force)
{
+ u64 tsc_curr, tsc_delta, new_compare, count, period = 0;
bool kick_needed = force || !(hdata->has_periodic);
- u64 new_compare, count, period = 0;
+
+ tsc_curr = rdtsc();
+
+ tsc_delta = (unsigned long)watchdog_thresh * hdata->tsc_ticks_per_cpu;
+ hdata->tsc_next = tsc_curr + tsc_delta;
+ tsc_next_error = tsc_delta >> 6;

/*
* Update the comparator in increments of watch_thresh seconds relative
@@ -92,6 +104,15 @@ static void enable_timer(struct hpet_hld_data *hdata)
*/
static bool is_hpet_wdt_interrupt(struct hpet_hld_data *hdata)
{
+ if (smp_processor_id() == hdata->handling_cpu) {
+ u64 tsc_curr;
+
+ tsc_curr = rdtsc();
+
+ return (tsc_curr - hdata->tsc_next) + tsc_next_error <
+ 2 * tsc_next_error;
+ }
+
return false;
}

@@ -259,6 +280,10 @@ static void update_ticks_per_cpu(struct hpet_hld_data *hdata)

do_div(temp, hdata->enabled_cpus);
hdata->ticks_per_cpu = temp;
+
+ temp = (unsigned long)tsc_khz * 1000L;
+ do_div(temp, hdata->enabled_cpus);
+ hdata->tsc_ticks_per_cpu = temp;
}

/**
--
2.17.1