--- /2.6.18.orig/arch/x86_64/kernel/Makefile 2007-12-10 16:05:26.495988000 -0800 +++ /2.6.18/arch/x86_64/kernel/Makefile 2007-12-10 16:08:33.859235000 -0800 @@ -11,7 +11,7 @@ obj-y := process.o signal.o entry.o trap pci-dma.o pci-nommu.o alternative.o obj-$(CONFIG_STACKTRACE) += stacktrace.o -obj-$(CONFIG_X86_MCE) += mce.o mce_ecc.o therm_throt.o +obj-$(CONFIG_X86_MCE) += mce.o mce_ecc.o therm_throt.o mce_thermal.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/ --- /2.6.18.orig/arch/x86_64/kernel/mce_amd.c 2007-12-10 16:05:13.732638000 -0800 +++ /2.6.18/arch/x86_64/kernel/mce_amd.c 2007-12-10 18:00:43.301902000 -0800 @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include #include #include @@ -29,6 +31,7 @@ #include #include #include +#include #define PFX "mce_threshold: " #define VERSION "version 1.1.1" @@ -44,6 +47,22 @@ #define MASK_ERR_COUNT_HI 0x00000FFF #define MASK_BLKPTR_LO 0xFF000000 #define MCG_XBLK_ADDR 0xC0000400 +#define THERM_CTL_F3X64 0x64 +#define PSL_APIC_LO_EN 0x80 +#define PSL_APIC_HI_EN 0x40 +#define HTC_ACTIVE 0x10 +#define HTC_EN 1 +#define NB_PCI_DEV_BASE 0x18 +/* Increase NB_COUNT_MAX as systems with more northbridges get deployed. */ +#define NB_COUNT_MAX 8 +/* Increase CPU_COUNT_MAX as systems with more CPUs get deployed. */ + +extern int num_k8_northbridges; +extern struct pci_dev **k8_northbridges; + +static int smp_thermal_interrupt_init(void); +static int thermal_apic_init_allowed; +static void thermal_apic_init(void *unused); struct threshold_block { unsigned int block; @@ -634,18 +653,30 @@ static void threshold_remove_device(unsi } /* get notified when a cpu comes on/off */ -static int threshold_cpu_callback(struct notifier_block *nfb, +static int amd_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { /* cpu was unsigned int to begin with */ unsigned int cpu = (unsigned long)hcpu; - if (cpu >= NR_CPUS) - goto out; - switch (action) { case CPU_ONLINE: threshold_create_device(cpu); + if (thermal_apic_init_allowed) { + /* + * We need to run thermal_apic_init() on the core that + * just came online. If we're already on that core, + * then directly run it. Otherwise + * smp_call_function_single() to that core. + */ + if (cpu == get_cpu()) { + thermal_apic_init(NULL); + } else { + smp_call_function_single(cpu, + &thermal_apic_init, NULL, 1, 0); + } + put_cpu(); + } break; case CPU_DEAD: threshold_remove_device(cpu); @@ -657,8 +688,8 @@ static int threshold_cpu_callback(struct return NOTIFY_OK; } -static struct notifier_block threshold_cpu_notifier = { - .notifier_call = threshold_cpu_callback, +static struct notifier_block amd_cpu_notifier = { + .notifier_call = amd_cpu_callback, }; #endif /* CONFIG_HOTPLUG_CPU */ @@ -672,8 +703,192 @@ static __init int threshold_init_device( if (err) return err; } - register_hotcpu_notifier(&threshold_cpu_notifier); + register_hotcpu_notifier(&amd_cpu_notifier); return 0; } device_initcall(threshold_init_device); + +/* + * AMD-specific thermal interrupt handler. + */ +void amd_smp_thermal_interrupt(void) +{ + unsigned int cpu = smp_processor_id(); + + /* + * We're here because thermal throttling has just been activated -- not + * deactivated -- hence therm_throt_process(1). + */ + if (therm_throt_process(1)) + mce_log_therm_throt_event(cpu, 1); + /* + * We'll still get subsequent interrupts even if we don't clear the + * status bit in THERM_CTL_F3X64. Take advantage of this fact to avoid + * touching PCI space. (If this assumption fails at some point, we'll + * need to schedule_work() in order to enter a process context, so that + * PCI locks can be asserted for proper access. This requirement, in + * turn, creates the need to remember which core interrupted, as the + * core which ultimately takes the scheduled work may be different. + * With any luck, we'll never need to do this.) + */ +} + +/* + * Initialize each northbridge's thermal throttling logic. + */ +static void smp_thermal_northbridge_init(void) +{ + int nb_num; + u32 therm_ctl_f3x64; + + for (nb_num = 0; nb_num < num_k8_northbridges; nb_num++) { + /* + * Configure the thermal interrupt for this northbridge. + */ + pci_read_config_dword(k8_northbridges[nb_num], + THERM_CTL_F3X64, &therm_ctl_f3x64); + therm_ctl_f3x64 |= PSL_APIC_HI_EN | HTC_EN; + therm_ctl_f3x64 &= (~PSL_APIC_LO_EN); + pci_write_config_dword(k8_northbridges[nb_num], + THERM_CTL_F3X64, therm_ctl_f3x64); + printk(KERN_INFO "Northbridge at PCI device 0x%x: " + "thermal monitoring enabled.\n", NB_PCI_DEV_BASE + + nb_num); + } +} + +/* + * Enable the delivery of thermal interrupts via the local APIC. + */ +static void thermal_apic_init(void *unused) { + unsigned int apic_lv_therm; + + /* Set up APIC_LVTTHMR to issue THERMAL_APIC_VECTOR. */ + apic_lv_therm = apic_read(APIC_LVTTHMR); + /* + * See if some agent other than this routine has already initialized + * APIC_LVTTHMR, i.e. if it's unmasked, but not equal to the value that + * we would have programmed, had we been here before on this core. + */ + if ((!(apic_lv_therm & APIC_LVT_MASKED)) && ((apic_lv_therm & + (APIC_MODE_MASK | APIC_VECTOR_MASK)) != (APIC_DM_FIXED | + THERMAL_APIC_VECTOR))) { + unsigned int cpu = smp_processor_id(); + + printk(KERN_CRIT "CPU 0x%x: Thermal monitoring not " + "functional.\n", cpu); + if ((apic_lv_therm & APIC_MODE_MASK) == APIC_DM_SMI) { + printk(KERN_DEBUG "Thermal interrupts already " + "handled by SMI according to (((local APIC " + "base) + 0x330) bit 0x9).\n"); + } else { + printk(KERN_DEBUG "Thermal interrupts unexpectedly " + "enabled at (((local APIC base) + 0x330) bit " + "0x10).\n"); + } + } else { + /* + * Configure the Local Thermal Vector Table Entry to issue + * issue thermal interrupts to THERMAL_APIC_VECTOR. + * + * Start by masking off Delivery Mode and Vector. + */ + apic_lv_therm &= ~(APIC_MODE_MASK | APIC_VECTOR_MASK); + /* Fixed interrupt, masked for now. */ + apic_lv_therm |= APIC_LVT_MASKED | APIC_DM_FIXED | + THERMAL_APIC_VECTOR; + apic_write(APIC_LVTTHMR, apic_lv_therm); + /* + * The Intel thermal kernel code implies that there may be a + * race involving the mask bit, so clear it only now, after + * the other bits have settled. + */ + apic_write(APIC_LVTTHMR, apic_lv_therm & ~APIC_LVT_MASKED); + } +} + +/* + * This function is intended to be called just after thermal throttling has + * been enabled. It warns the user if throttling is already active, which + * could indicate a failed cooling system. It may be the last chance to get + * a warning out before thermal shutdown occurs. + */ +static void smp_thermal_early_throttle_check(void) +{ + int nb_num; + u32 therm_ctl_f3x64; + + for (nb_num = 0; nb_num < num_k8_northbridges; nb_num++) { + /* + * Read back THERM_CTL_F3X64 to check whther HTC_ACTIVE is + * asserted, in which case, warn the user. + */ + pci_read_config_dword(k8_northbridges[nb_num], + THERM_CTL_F3X64, &therm_ctl_f3x64); + if (therm_ctl_f3x64 & HTC_ACTIVE) + printk(KERN_WARNING "High temperature on northbridge " + "at PCI device 0x%x. Throttling enabled.\n", + NB_PCI_DEV_BASE + nb_num); + } +} + +/* + * Determine whether or not the northbridges support thermal throttling + * interrupts. If so, initialize them for receiving the same, then perform + * corresponding APIC initialization on each core. + */ +static int smp_thermal_interrupt_init(void) +{ + int nb_num; + int thermal_registers_functional; + + /* + * If there are no recognized northbridges, then we can't talk to the + * thermal registers. + */ + thermal_registers_functional = num_k8_northbridges; + /* + * If any of the northbridges has PCI ID 0x1103, then its thermal + * hardware suffers from an erratum which prevents this code from working, + * so abort. + */ + for (nb_num = 0; nb_num < num_k8_northbridges; nb_num++) { + if ((k8_northbridges[nb_num]->device) == 0x1103) { + thermal_registers_functional = 0; + break; + } + } + if (thermal_registers_functional) { + /* + * Assert that we should log thermal throttling events, whenever + * we eventually get around to enabling them. + */ + atomic_set(&therm_throt_en, 1); + /* + * Bind cpu_specific_smp_thermal_interrupt() to + * amd_smp_thermal_interrupt(). + */ + cpu_specific_smp_thermal_interrupt = amd_smp_thermal_interrupt; + smp_thermal_northbridge_init(); + /* + * We've now initialized sufficient fabric to permit the + * initialization of the thermal interrupt APIC vectors, such as + * when a core comes online and calls amd_cpu_callback(). + */ + thermal_apic_init_allowed = 1; + /* + * Call thermal_apic_init() on each core. + */ + on_each_cpu(&thermal_apic_init, NULL, 1, 0); + smp_thermal_early_throttle_check(); + } + return 0; +} + +/* + * smp_thermal_interrupt_init cannot execute until PCI has been fully + * initialized, hence late_initcall(). + */ +late_initcall(smp_thermal_interrupt_init); + --- /2.6.18.orig/arch/x86_64/kernel/mce_intel.c 2007-12-10 16:05:18.008914000 -0800 +++ /2.6.18/arch/x86_64/kernel/mce_intel.c 2007-12-10 16:08:25.582891000 -0800 @@ -14,25 +14,16 @@ #include #include -asmlinkage void smp_thermal_interrupt(void) +void intel_smp_thermal_interrupt(void) { __u64 msr_val; - ack_APIC_irq(); - - exit_idle(); - - irq_enter(); - ktrace_irq(KT_THERMAL_APIC_VECTOR); rdmsrl(MSR_IA32_THERM_STATUS, msr_val); if (msr_val & 0x2) wrmsrl(MSR_IA32_THERM_STATUS, (msr_val & ~2)); if (therm_throt_process(msr_val & 2)) mce_log_therm_throt_event(smp_processor_id(), msr_val); - - ktrace_irq_exit(); - irq_exit(); } static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c) @@ -82,6 +73,11 @@ static void __cpuinit intel_init_thermal wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h); l = apic_read(APIC_LVTTHMR); + /* + * Bind the cpu_specific_smp_thermal_interrupt trampoline to + * intel_smp_thermal_interrupt. + */ + cpu_specific_smp_thermal_interrupt = intel_smp_thermal_interrupt; apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", cpu, tm2 ? "TM2" : "TM1"); --- /dev/null 2006-05-22 07:25:23.000000000 -0700 +++ /2.6.18/arch/x86_64/kernel/mce_thermal.c 2007-12-10 16:08:29.547207000 -0800 @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2007 Google Inc. + * + * Written by Mike Waychison and Russell Leidich . + * + * CPU-independent thermal interrupt handler. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void default_smp_thermal_interrupt(void) {} + +cpu_specific_smp_thermal_interrupt_callback cpu_specific_smp_thermal_interrupt = + default_smp_thermal_interrupt; + +/* + * Wrapper for the CPU-specific thermal interrupt service routine. Without + * this, we'd have to discern the CPU brand at runtime (because support could + * be installed for more than one). + */ +asmlinkage void smp_thermal_interrupt(void) +{ + ack_APIC_irq(); + exit_idle(); + irq_enter(); + ktrace_irq(KT_THERMAL_APIC_VECTOR); + cpu_specific_smp_thermal_interrupt(); + ktrace_irq_exit(); + irq_exit(); +} --- /2.6.18.orig/arch/x86_64/kernel/mce.h 2007-12-10 16:05:40.787221000 -0800 +++ /2.6.18/arch/x86_64/kernel/mce.h 2007-12-10 16:08:45.451381000 -0800 @@ -118,3 +118,9 @@ extern int mce_notify_user(void); #endif #endif + +typedef void (*cpu_specific_smp_thermal_interrupt_callback)(void); + +extern cpu_specific_smp_thermal_interrupt_callback + cpu_specific_smp_thermal_interrupt; +