Re: [PATCH] AMD Thermal Interrupt Support

From: Russell Leidich
Date: Tue Jan 08 2008 - 18:43:17 EST


OK all, I fixed the comments about accessing PCI config space in an
interrupt context.

Can someone please give me a thumbs up or down about checking this in?
If it's thumbs up, I will do a final retest and mail back a
Signed-off-by patch.

--
Russell Leidich
diff -uprN /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/Makefile /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/Makefile
--- /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/Makefile 2007-12-11 14:30:53.533297000 -0800
+++ /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/Makefile 2007-12-11 11:56:20.549185000 -0800
@@ -1,4 +1,4 @@
-obj-y = mce_$(BITS).o therm_throt.o
+obj-y = mce_$(BITS).o therm_throt.o mce_thermal.o

obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o
diff -uprN /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_amd_64.c /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
--- /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_amd_64.c 2007-12-11 14:30:53.559298000 -0800
+++ /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_amd_64.c 2008-01-08 15:21:44.241310000 -0800
@@ -20,15 +20,18 @@
#include <linux/interrupt.h>
#include <linux/kobject.h>
#include <linux/notifier.h>
+#include <linux/pci_ids.h>
#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/sysdev.h>
#include <linux/sysfs.h>
#include <asm/apic.h>
-#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/percpu.h>
#include <asm/idle.h>
+#include <asm/therm_throt.h>
+#include <asm/k8.h>
+#include "mce_thermal.h"

#define PFX "mce_threshold: "
#define VERSION "version 1.1.1"
@@ -46,6 +49,16 @@
#define MASK_ERR_COUNT_HI 0x00000FFF
#define MASK_BLKPTR_LO 0xFF000000
#define MCG_XBLK_ADDR 0xC0000400
+#define THERM_CTL_F3X64 0x64
+#define PSL_APIC_LO_EN 0x80
+#define PSL_APIC_HI_EN 0x40
+#define HTC_ACTIVE 0x10
+#define HTC_EN 1
+#define NB_PCI_DEV_BASE 0x18
+
+static int smp_thermal_interrupt_init(void);
+static int thermal_apic_init_allowed;
+static void thermal_apic_init(void *unused);

struct threshold_block {
unsigned int block;
@@ -657,6 +670,9 @@ static int threshold_cpu_callback(struct
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
threshold_create_device(cpu);
+ if (thermal_apic_init_allowed)
+ smp_call_function_single(cpu,
+ &thermal_apic_init, NULL, 1, 0);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
@@ -665,7 +681,7 @@ static int threshold_cpu_callback(struct
default:
break;
}
- out:
+out:
return NOTIFY_OK;
}

@@ -688,3 +704,182 @@ static __init int threshold_init_device(
}

device_initcall(threshold_init_device);
+
+/*
+ * AMD-specific thermal interrupt handler.
+ */
+void amd_smp_thermal_interrupt(void)
+{
+ unsigned int cpu = smp_processor_id();
+
+ ack_APIC_irq();
+ exit_idle();
+ irq_enter();
+ /*
+ * We're here because thermal throttling has just been activated -- not
+ * deactivated -- hence therm_throt_process(1).
+ */
+ if (therm_throt_process(1))
+ mce_log_therm_throt_event(cpu, 1);
+ /*
+ * We'll still get subsequent interrupts even if we don't clear the
+ * status bit in THERM_CTL_F3X64. Take advantage of this fact to avoid
+ * wasting time and icache by touching PCI config space.
+ */
+ irq_exit();
+}
+
+/*
+ * Initialize each northbridge's thermal throttling logic.
+ */
+static void smp_thermal_northbridge_init(void)
+{
+ int nb_num;
+ u32 therm_ctl_f3x64;
+
+ for (nb_num = 0; nb_num < num_k8_northbridges; nb_num++) {
+ /*
+ * Configure the thermal interrupt for this northbridge.
+ */
+ pci_read_config_dword(k8_northbridges[nb_num],
+ THERM_CTL_F3X64, &therm_ctl_f3x64);
+ therm_ctl_f3x64 |= PSL_APIC_HI_EN | HTC_EN;
+ therm_ctl_f3x64 &= (~PSL_APIC_LO_EN);
+ pci_write_config_dword(k8_northbridges[nb_num],
+ THERM_CTL_F3X64, therm_ctl_f3x64);
+ printk(KERN_INFO "Northbridge at PCI device 0x%x: "
+ "thermal monitoring enabled.\n", NB_PCI_DEV_BASE +
+ nb_num);
+ }
+}
+
+/*
+ * Enable the delivery of thermal interrupts via the local APIC.
+ */
+static void thermal_apic_init(void *unused)
+{
+ unsigned int apic_lv_therm;
+
+ /* Set up APIC_LVTTHMR to issue THERMAL_APIC_VECTOR. */
+ apic_lv_therm = apic_read(APIC_LVTTHMR);
+ /*
+ * See if some agent other than this routine has already initialized
+ * APIC_LVTTHMR, i.e. if it's unmasked, but not equal to the value that
+ * we would have programmed, had we been here before on this core.
+ */
+ if ((!(apic_lv_therm & APIC_LVT_MASKED)) && ((apic_lv_therm &
+ (APIC_MODE_MASK | APIC_VECTOR_MASK)) != (APIC_DM_FIXED |
+ THERMAL_APIC_VECTOR))) {
+ unsigned int cpu = smp_processor_id();
+
+ printk(KERN_WARNING "CPU 0x%x: Thermal monitoring not "
+ "functional.\n", cpu);
+ if ((apic_lv_therm & APIC_MODE_MASK) == APIC_DM_SMI)
+ printk(KERN_DEBUG "Thermal interrupts already "
+ "handled by SMI according to (((local APIC "
+ "base) + 0x330) bit 0x9).\n");
+ else
+ printk(KERN_DEBUG "Thermal interrupts unexpectedly "
+ "enabled at (((local APIC base) + 0x330) bit "
+ "0x10).\n");
+ } else {
+ /*
+ * Configure the Local Thermal Vector Table Entry to issue
+ * issue thermal interrupts to THERMAL_APIC_VECTOR.
+ *
+ * Start by masking off Delivery Mode and Vector.
+ */
+ apic_lv_therm &= ~(APIC_MODE_MASK | APIC_VECTOR_MASK);
+ /* Fixed interrupt, masked for now. */
+ apic_lv_therm |= APIC_LVT_MASKED | APIC_DM_FIXED |
+ THERMAL_APIC_VECTOR;
+ apic_write(APIC_LVTTHMR, apic_lv_therm);
+ /*
+ * The Intel thermal kernel code implies that there may be a
+ * race involving the mask bit, so clear it only now, after
+ * the other bits have settled.
+ */
+ apic_write(APIC_LVTTHMR, apic_lv_therm & ~APIC_LVT_MASKED);
+ }
+}
+
+/*
+* This function is intended to be called just after thermal throttling has
+ * been enabled. It warns the user if throttling is already active, which
+ * could indicate a failed cooling system. It may be the last chance to get
+ * a warning out before thermal shutdown occurs.
+ */
+static void smp_thermal_early_throttle_check(void)
+{
+ int nb_num;
+ u32 therm_ctl_f3x64;
+
+ for (nb_num = 0; nb_num < num_k8_northbridges; nb_num++) {
+ /*
+ * Read back THERM_CTL_F3X64 to check whther HTC_ACTIVE is
+ * asserted, in which case, warn the user.
+ */
+ pci_read_config_dword(k8_northbridges[nb_num],
+ THERM_CTL_F3X64, &therm_ctl_f3x64);
+ if (therm_ctl_f3x64 & HTC_ACTIVE)
+ printk(KERN_WARNING "High temperature on northbridge "
+ "at PCI device 0x%x. Throttling enabled.\n",
+ NB_PCI_DEV_BASE + nb_num);
+ }
+}
+
+/*
+ * Determine whether or not the northbridges support thermal throttling
+ * interrupts. If so, initialize them for receiving the same, then perform
+ * corresponding APIC initialization on each core.
+ *
+ * Due to an erratum involving the thermal sensor, this code does not work
+ * on any CPU prior to Family 0x10 (AMD Quad Core).
+ */
+static int smp_thermal_interrupt_init(void)
+{
+ int nb_num;
+
+ if (num_k8_northbridges == 0)
+ goto out;
+ /*
+ * If any of the northbridges has PCI ID 0x1103, then its thermal
+ * hardware suffers from an erratum which prevents this code from
+ * working, so abort. (This implies that it only works on Family
+ * 0x10 (AMD Quad Core).)
+ */
+ for (nb_num = 0; nb_num < num_k8_northbridges; nb_num++) {
+ if ((k8_northbridges[nb_num]->device) == 0x1103)
+ goto out;
+ }
+ /*
+ * Assert that we should log thermal throttling events, whenever
+ * we eventually get around to enabling them.
+ */
+ atomic_set(&therm_throt_en, 1);
+ /*
+ * Bind smp_thermal_interrupt() to amd_smp_thermal_interrupt().
+ */
+ smp_thermal_interrupt = amd_smp_thermal_interrupt;
+ smp_thermal_northbridge_init();
+ /*
+ * We've now initialized sufficient fabric to permit the
+ * initialization of the thermal interrupt APIC vectors, such as
+ * when a core comes online and calls threshold_cpu_callback().
+ */
+ thermal_apic_init_allowed = 1;
+ /*
+ * Call thermal_apic_init() on each core.
+ */
+ on_each_cpu(&thermal_apic_init, NULL, 1, 0);
+ smp_thermal_early_throttle_check();
+out:
+ return 0;
+}
+
+/*
+ * Ensure that smp_thermal_interrupt_init() executes after num_k8_northbridges
+ * has been initialized.
+ */
+late_initcall(smp_thermal_interrupt_init);
+
diff -uprN /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_intel_64.c /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
--- /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_intel_64.c 2007-12-11 14:30:53.564303000 -0800
+++ /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_intel_64.c 2008-01-04 16:25:08.547999000 -0800
@@ -8,24 +8,21 @@
#include <linux/percpu.h>
#include <asm/processor.h>
#include <asm/msr.h>
-#include <asm/mce.h>
#include <asm/hw_irq.h>
#include <asm/idle.h>
#include <asm/therm_throt.h>
+#include "mce_thermal.h"

-asmlinkage void smp_thermal_interrupt(void)
+void intel_smp_thermal_interrupt(void)
{
__u64 msr_val;

ack_APIC_irq();
-
exit_idle();
irq_enter();
-
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
if (therm_throt_process(msr_val & 1))
mce_log_therm_throt_event(smp_processor_id(), msr_val);
-
add_pda(irq_thermal_count, 1);
irq_exit();
}
@@ -75,6 +72,10 @@ static void __cpuinit intel_init_thermal
wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h);

l = apic_read(APIC_LVTTHMR);
+ /*
+ * Bind smp_thermal_interrupt() to intel_smp_thermal_interrupt().
+ */
+ smp_thermal_interrupt = intel_smp_thermal_interrupt;
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
cpu, tm2 ? "TM2" : "TM1");
diff -uprN /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_thermal.c /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_thermal.c
--- /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_thermal.c 1969-12-31 16:00:00.000000000 -0800
+++ /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_thermal.c 2008-01-04 15:45:40.686775000 -0800
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2007 Google Inc.
+ *
+ * Written by Mike Waychison <mikew@xxxxxxxxxx> and Russell Leidich
+ * <rml@xxxxxxxxxx>.
+ *
+ * CPU-independent thermal interrupt handler.
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <asm/hw_irq.h>
+#include <asm/idle.h>
+#include "mce_thermal.h"
+
+static void default_smp_thermal_interrupt(void)
+{
+ if (printk_ratelimit())
+ printk(KERN_DEBUG "Unexpected thermal throttling interrupt.\n");
+}
+
+smp_thermal_interrupt_callback_t smp_thermal_interrupt
+ = default_smp_thermal_interrupt;
+
diff -uprN /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_thermal.h /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_thermal.h
--- /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_thermal.h 1969-12-31 16:00:00.000000000 -0800
+++ /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_thermal.h 2008-01-04 15:53:07.614658000 -0800
@@ -0,0 +1,8 @@
+#include <linux/init.h>
+#include <asm/mce.h>
+
+typedef void (*smp_thermal_interrupt_callback_t)(void);
+extern smp_thermal_interrupt_callback_t smp_thermal_interrupt;
+
+void mce_log_therm_throt_event(unsigned int cpu, __u64 status);
+
diff -uprN /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/entry_64.S /home/rml/linux-2.6.24-rc5/arch/x86/kernel/entry_64.S
--- /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/entry_64.S 2007-12-11 14:30:53.756297000 -0800
+++ /home/rml/linux-2.6.24-rc5/arch/x86/kernel/entry_64.S 2008-01-04 15:34:16.761342000 -0800
@@ -649,7 +649,7 @@ END(common_interrupt)
.endm

ENTRY(thermal_interrupt)
- apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
+ apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt(%rip)
END(thermal_interrupt)

ENTRY(threshold_interrupt)