Re: Machine crashes right *after* ~successful resume

From: Yinghai Lu
Date: Sun Oct 26 2014 - 17:53:15 EST


On Wed, Oct 22, 2014 at 5:53 AM, Wilmer van der Gaast <wilmer@xxxxxxxxx> wrote:
> That seems to be the case yes:
>
> [ 106.661152] PM: ... nb fw_pm_notify+0x0/0x150 done
> [ 106.665939] PM: calling nb bsp_pm_callback+0x0/0x50
> [ 106.670814] PM: ... nb bsp_pm_callback+0x0/0x50 done
> [ 106.675775] pm_restore_console() before move
>
> Then nothing, during the third resume.
>
> http://gaast.net/~wilmer/.lkml/bad3.17-patched-console-restore.txt has
> the full log.
>
> (Some of your other debug lines in your patch don't seem to be logging
> anything during my repro BTW.)

Please try attached two debug patches to check the pci registers
between the suspend/resume.
Subject: [PATCH] pci: print out about pci=dump

debug print out before later driver hang

Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>

---
drivers/pci/pci.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 51 insertions(+), 1 deletion(-)

Index: linux-2.6/drivers/pci/pci.c
===================================================================
--- linux-2.6.orig/drivers/pci/pci.c
+++ linux-2.6/drivers/pci/pci.c
@@ -3858,6 +3858,54 @@ void __weak pci_fixup_cardbus(struct pci
}
EXPORT_SYMBOL(pci_fixup_cardbus);

+static void dump_pci_device_range(struct pci_dev *dev, unsigned start_reg,
+ unsigned size)
+{
+ int i;
+ int j;
+ u32 val;
+ int end = start_reg + size;
+
+ printk(KERN_DEBUG "PCI: %s", pci_name(dev));
+
+ for (i = start_reg; i < end; i += 4) {
+ if (!(i & 0x0f))
+ printk("\n%04x:", i);
+
+ pci_read_config_dword(dev, i, &val);
+ for (j = 0; j < 4; j++) {
+ printk(" %02x", val & 0xff);
+ val >>= 8;
+ }
+ }
+ printk("\n");
+}
+
+static int dump_pci_devices(void)
+{
+ struct pci_dev *dev = NULL;
+
+ while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL)
+ dump_pci_device_range(dev, 0, dev->cfg_size);
+
+ return 0;
+}
+
+static int pci_dump_regs;
+static void pci_dump(void)
+{
+ pci_dump_regs = 1;
+}
+
+static int pci_init(void)
+{
+ if (pci_dump_regs)
+ dump_pci_devices();
+
+ return 0;
+}
+device_initcall(pci_init);
+
static int __init pci_setup(char *str)
{
while (str) {
@@ -3865,7 +3913,9 @@ static int __init pci_setup(char *str)
if (k)
*k++ = 0;
if (*str && (str = pcibios_setup(str)) && *str) {
- if (!strcmp(str, "nomsi")) {
+ if (!strcmp(str, "dump")) {
+ pci_dump();
+ } else if (!strcmp(str, "nomsi")) {
pci_no_msi();
} else if (!strcmp(str, "noaer")) {
pci_no_aer();
---
drivers/pci/pci.c | 2 +-
kernel/power/suspend.c | 2 ++
2 files changed, 3 insertions(+), 1 deletion(-)

Index: linux-2.6/drivers/pci/pci.c
===================================================================
--- linux-2.6.orig/drivers/pci/pci.c
+++ linux-2.6/drivers/pci/pci.c
@@ -4462,7 +4462,7 @@ static void dump_pci_device_range(struct
printk("\n");
}

-static int dump_pci_devices(void)
+int dump_pci_devices(void)
{
struct pci_dev *dev = NULL;

Index: linux-2.6/kernel/power/suspend.c
===================================================================
--- linux-2.6.orig/kernel/power/suspend.c
+++ linux-2.6/kernel/power/suspend.c
@@ -401,6 +401,7 @@ int suspend_devices_and_enter(suspend_st
goto Resume_devices;
}

+int dump_pci_devices(void);
/**
* suspend_finish - Clean up before finishing the suspend sequence.
*
@@ -411,6 +412,7 @@ static void suspend_finish(void)
{
suspend_thaw_processes();
pm_notifier_call_chain(PM_POST_SUSPEND);
+ dump_pci_devices();
pm_restore_console();
}