[Patch V1 2/3] x86, mce: Add infrastructure required to support LMCE

From: Ashok Raj
Date: Fri May 29 2015 - 12:47:48 EST


Initialization and handling for LMCE
- boot time option to disable LMCE for that boot instance
- Check for capability via IA32_MCG_CAP
- provide ability to enable/disable LMCE on demand.

See http://www.intel.com/sdm Volume 3 System Programming Guide, Chapter 15
for more information on MSR's and documentation on Local MCE.

Signed-off-by: Ashok Raj <ashok.raj@xxxxxxxxx>
---
Documentation/x86/x86_64/boot-options.txt | 3 ++
arch/x86/include/asm/mce.h | 5 +++
arch/x86/kernel/cpu/mcheck/mce.c | 3 ++
arch/x86/kernel/cpu/mcheck/mce_intel.c | 75 +++++++++++++++++++++++++++++++
4 files changed, 86 insertions(+)

diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 5223479..79edee0 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -31,6 +31,9 @@ Machine check
(e.g. BIOS or hardware monitoring applications), conflicting
with OS's error handling, and you cannot deactivate the agent,
then this option will be a help.
+ mce=no_lmce
+ Do not opt-in to Local MCE delivery. Use legacy method
+ to broadcast MCE's.
mce=bootlog
Enable logging of machine checks left over from booting.
Disabled by default on AMD because some BIOS leave bogus ones.
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 677a408..8ba4d7a 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -109,6 +109,7 @@ struct mce_log {
struct mca_config {
bool dont_log_ce;
bool cmci_disabled;
+ bool lmce_disabled;
bool ignore_ce;
bool disabled;
bool ser;
@@ -173,12 +174,16 @@ void cmci_clear(void);
void cmci_reenable(void);
void cmci_rediscover(void);
void cmci_recheck(void);
+void lmce_clear(void);
+void lmce_enable(void);
#else
static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { }
static inline void cmci_clear(void) {}
static inline void cmci_reenable(void) {}
static inline void cmci_rediscover(void) {}
static inline void cmci_recheck(void) {}
+static inline void lmce_clear(void) {}
+static inline void lmce_enable(void) {}
#endif

#ifdef CONFIG_X86_MCE_AMD
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index e535533..d10aada 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1976,6 +1976,7 @@ void mce_disable_bank(int bank)
/*
* mce=off Disables machine check
* mce=no_cmci Disables CMCI
+ * mce=no_lmce Disables LMCE
* mce=dont_log_ce Clears corrected events silently, no log created for CEs.
* mce=ignore_ce Disables polling and CMCI, corrected events are not cleared.
* mce=TOLERANCELEVEL[,monarchtimeout] (number, see above)
@@ -1999,6 +2000,8 @@ static int __init mcheck_enable(char *str)
cfg->disabled = true;
else if (!strcmp(str, "no_cmci"))
cfg->cmci_disabled = true;
+ else if (!strcmp(str, "no_lmce"))
+ cfg->lmce_disabled = true;
else if (!strcmp(str, "dont_log_ce"))
cfg->dont_log_ce = true;
else if (!strcmp(str, "ignore_ce"))
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index b4a41cf..be3a5c6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -70,6 +70,10 @@ enum {

static atomic_t cmci_storm_on_cpus;

+#define FEATURE_CONTROL_LMCE_BITS ((FEATURE_CONTROL_LOCKED) | \
+ (FEATURE_CONTROL_LMCE_SUPPORT_ENABLED))
+#define MCG_CAP_LMCE_BITS ((MCG_SER_P) | (MCG_LMCE_P))
+
static int cmci_supported(int *banks)
{
u64 cap;
@@ -91,6 +95,34 @@ static int cmci_supported(int *banks)
return !!(cap & MCG_CMCI_P);
}

+static bool lmce_supported(void)
+{
+ u64 cap, feature_ctl;
+ bool lmce_bios_support, retval;
+
+ if (mca_cfg.lmce_disabled)
+ return false;
+
+ rdmsrl(MSR_IA32_MCG_CAP, cap);
+ rdmsrl(MSR_IA32_FEATURE_CONTROL, feature_ctl);
+
+ /*
+ * BIOS should indicate support for LMCE by setting
+ * bit20 in IA32_FEATURE_CONTROL. without which touching
+ * MCG_EXT_CTL will generate #GP fault.
+ */
+ lmce_bios_support = ((feature_ctl & (FEATURE_CONTROL_LMCE_BITS)) ==
+ (FEATURE_CONTROL_LMCE_BITS));
+
+ /*
+ * MCG_CAP should indicate both MCG_SER_P and MCG_LMCE_P
+ */
+ cap = ((cap & MCG_CAP_LMCE_BITS) == (MCG_CAP_LMCE_BITS));
+ retval = (cap && lmce_bios_support);
+
+ return retval;
+}
+
bool mce_intel_cmci_poll(void)
{
if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
@@ -405,6 +437,49 @@ static void intel_init_cmci(void)
cmci_recheck();
}

+static void __lmce_enable(void)
+{
+ u64 val;
+
+ rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
+ val |= MCG_EXT_CTL_LMCE_EN;
+ wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
+}
+
+
+void intel_init_lmce(void)
+{
+ if (!lmce_supported())
+ return;
+
+ __lmce_enable();
+}
+
+void lmce_enable(void)
+{
+ intel_init_lmce();
+}
+
+void lmce_disable(void)
+{
+ u64 val;
+
+ rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
+ val &= ~MCG_EXT_CTL_LMCE_EN;
+ wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
+}
+
+/*
+ * Disable LMCE on this CPU for all banks it owns when it goes down.
+ * This allows other CPUs to claim the banks on rediscovery.
+ */
+void lmce_clear(void)
+{
+ if (!lmce_supported())
+ return;
+ lmce_disable();
+}
+
void mce_intel_feature_init(struct cpuinfo_x86 *c)
{
intel_init_thermal(c);
--
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/