Re: Regression in kernel 4.13-rcX for 32-bit x86 system - bisected to commit fc8dffd379ca

From: Thomas Gleixner
Date: Mon Aug 14 2017 - 03:22:31 EST


On Mon, 14 Aug 2017, Larry Finger wrote:

> My Del Latitude D600 (single 32-bit CPU) shows the following locking message
> when booted:
>
> ============================================
> WARNING: possible recursive locking detected
> 4.12.0-rc2-00029-gfc8dffd #159 Not tainted
> --------------------------------------------
> systemd-udevd/153 is trying to acquire lock:
> (cpu_hotplug_lock.rw_sem){.+.+.+}, at: [<c030fc26>] stop_machine+0x16/0x30
>
> but task is already holding lock:
> (cpu_hotplug_lock.rw_sem){.+.+.+}, at: [<c0234353>] mtrr_add_page+0x83/0x470

....

> cpus_read_lock+0x48/0x90
> ? stop_machine+0x16/0x30
> stop_machine+0x16/0x30
> mtrr_add_page+0x18b/0x470
> mtrr_add+0x3e/0x70

> This problem was bisected to commit fc8dffd379ca ("cpu/hotplug: Convert
> hotplug locking to percpu rwsem"). I am very confident in the correctness of
> the bisection.

Fix below.

Thanks,

tglx

8<--------------------

diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index c5bb63be4ba1..40d5a8a75212 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -237,6 +237,18 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
stop_machine(mtrr_rendezvous_handler, &data, cpu_online_mask);
}

+static void set_mtrr_cpuslocked(unsigned int reg, unsigned long base,
+ unsigned long size, mtrr_type type)
+{
+ struct set_mtrr_data data = { .smp_reg = reg,
+ .smp_base = base,
+ .smp_size = size,
+ .smp_type = type
+ };
+
+ stop_machine_cpuslocked(mtrr_rendezvous_handler, &data, cpu_online_mask);
+}
+
static void set_mtrr_from_inactive_cpu(unsigned int reg, unsigned long base,
unsigned long size, mtrr_type type)
{
@@ -370,7 +382,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
/* Search for an empty MTRR */
i = mtrr_if->get_free_region(base, size, replace);
if (i >= 0) {
- set_mtrr(i, base, size, type);
+ set_mtrr_cpuslocked(i, base, size, type);
if (likely(replace < 0)) {
mtrr_usage_table[i] = 1;
} else {
@@ -378,7 +390,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
if (increment)
mtrr_usage_table[i]++;
if (unlikely(replace != i)) {
- set_mtrr(replace, 0, 0, 0);
+ set_mtrr_cpuslocked(replace, 0, 0, 0);
mtrr_usage_table[replace] = 0;
}
}
@@ -506,7 +518,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
goto out;
}
if (--mtrr_usage_table[reg] < 1)
- set_mtrr(reg, 0, 0, 0);
+ set_mtrr_cpuslocked(reg, 0, 0, 0);
error = reg;
out:
mutex_unlock(&mtrr_mutex);