[PATCH v5 1/2] x86, pci: Reset PCIe devices at boot time

From: Takao Indoh
Date: Wed Oct 17 2012 - 02:23:27 EST


This patch resets PCIe devices at boot time by hot reset when
"reset_devices" is specified.

Signed-off-by: Takao Indoh <indou.takao@xxxxxxxxxxxxxx>
---
arch/x86/include/asm/pci-direct.h | 1
arch/x86/kernel/setup.c | 3
arch/x86/pci/early.c | 353 ++++++++++++++++++++++++++++
include/linux/pci.h | 2
init/main.c | 4
5 files changed, 361 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/pci-direct.h b/arch/x86/include/asm/pci-direct.h
index b1e7a45..de30db2 100644
--- a/arch/x86/include/asm/pci-direct.h
+++ b/arch/x86/include/asm/pci-direct.h
@@ -18,4 +18,5 @@ extern int early_pci_allowed(void);
extern unsigned int pci_early_dump_regs;
extern void early_dump_pci_device(u8 bus, u8 slot, u8 func);
extern void early_dump_pci_devices(void);
+extern void early_reset_pcie_devices(void);
#endif /* _ASM_X86_PCI_DIRECT_H */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index a2bb18e..73d3425 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -987,6 +987,9 @@ void __init setup_arch(char **cmdline_p)
generic_apic_probe();

early_quirks();
+#ifdef CONFIG_PCI
+ early_reset_pcie_devices();
+#endif

/*
* Read APIC and some other early information from ACPI tables.
diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c
index d1067d5..df7f4fc 100644
--- a/arch/x86/pci/early.c
+++ b/arch/x86/pci/early.c
@@ -1,5 +1,6 @@
#include <linux/kernel.h>
#include <linux/pci.h>
+#include <linux/bootmem.h>
#include <asm/pci-direct.h>
#include <asm/io.h>
#include <asm/pci_x86.h>
@@ -109,3 +110,355 @@ void early_dump_pci_devices(void)
}
}
}
+
+#define PCI_EXP_SAVE_REGS 7
+#define pcie_cap_has_devctl(type, flags) 1
+#define pcie_cap_has_lnkctl(type, flags) \
+ ((flags & PCI_EXP_FLAGS_VERS) > 1 || \
+ (type == PCI_EXP_TYPE_ROOT_PORT || \
+ type == PCI_EXP_TYPE_ENDPOINT || \
+ type == PCI_EXP_TYPE_LEG_END))
+#define pcie_cap_has_sltctl(type, flags) \
+ ((flags & PCI_EXP_FLAGS_VERS) > 1 || \
+ ((type == PCI_EXP_TYPE_ROOT_PORT) || \
+ (type == PCI_EXP_TYPE_DOWNSTREAM && \
+ (flags & PCI_EXP_FLAGS_SLOT))))
+#define pcie_cap_has_rtctl(type, flags) \
+ ((flags & PCI_EXP_FLAGS_VERS) > 1 || \
+ (type == PCI_EXP_TYPE_ROOT_PORT || \
+ type == PCI_EXP_TYPE_RC_EC))
+
+struct save_config {
+ u32 pci[16];
+ u16 pcie[PCI_EXP_SAVE_REGS];
+};
+
+struct pcie_dev {
+ int cap; /* position of PCI Express capability */
+ int flags; /* PCI_EXP_FLAGS */
+ struct save_config save; /* saved configration register */
+};
+
+struct pcie_port {
+ struct list_head dev;
+ u8 bus;
+ u8 slot;
+ u8 func;
+ u8 secondary;
+ struct pcie_dev child[PCI_MAX_FUNCTIONS];
+};
+
+static LIST_HEAD(device_list);
+static void __init pci_udelay(int loops)
+{
+ while (loops--) {
+ /* Approximately 1 us */
+ native_io_delay();
+ }
+}
+
+/* Derived from drivers/pci/pci.c */
+#define PCI_FIND_CAP_TTL 48
+static int __init __pci_find_next_cap_ttl(u8 bus, u8 slot, u8 func,
+ u8 pos, int cap, int *ttl)
+{
+ u8 id;
+
+ while ((*ttl)--) {
+ pos = read_pci_config_byte(bus, slot, func, pos);
+ if (pos < 0x40)
+ break;
+ pos &= ~3;
+ id = read_pci_config_byte(bus, slot, func,
+ pos + PCI_CAP_LIST_ID);
+ if (id == 0xff)
+ break;
+ if (id == cap)
+ return pos;
+ pos += PCI_CAP_LIST_NEXT;
+ }
+ return 0;
+}
+
+static int __init __pci_find_next_cap(u8 bus, u8 slot, u8 func, u8 pos, int cap)
+{
+ int ttl = PCI_FIND_CAP_TTL;
+
+ return __pci_find_next_cap_ttl(bus, slot, func, pos, cap, &ttl);
+}
+
+static int __init __pci_bus_find_cap_start(u8 bus, u8 slot, u8 func,
+ u8 hdr_type)
+{
+ u16 status;
+
+ status = read_pci_config_16(bus, slot, func, PCI_STATUS);
+ if (!(status & PCI_STATUS_CAP_LIST))
+ return 0;
+
+ switch (hdr_type) {
+ case PCI_HEADER_TYPE_NORMAL:
+ case PCI_HEADER_TYPE_BRIDGE:
+ return PCI_CAPABILITY_LIST;
+ case PCI_HEADER_TYPE_CARDBUS:
+ return PCI_CB_CAPABILITY_LIST;
+ default:
+ return 0;
+ }
+
+ return 0;
+}
+
+static int __init early_pci_find_capability(u8 bus, u8 slot, u8 func, int cap)
+{
+ int pos;
+ u8 type = read_pci_config_byte(bus, slot, func, PCI_HEADER_TYPE);
+
+ pos = __pci_bus_find_cap_start(bus, slot, func, type & 0x7f);
+ if (pos)
+ pos = __pci_find_next_cap(bus, slot, func, pos, cap);
+
+ return pos;
+}
+
+static void __init do_reset(u8 bus, u8 slot, u8 func)
+{
+ u16 ctrl;
+
+ printk(KERN_INFO "pci 0000:%02x:%02x.%d reset\n", bus, slot, func);
+
+ /* Assert Secondary Bus Reset */
+ ctrl = read_pci_config_16(bus, slot, func, PCI_BRIDGE_CONTROL);
+ ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
+ write_pci_config_16(bus, slot, func, PCI_BRIDGE_CONTROL, ctrl);
+
+ /*
+ * PCIe spec requires software to ensure a minimum reset duration
+ * (Trst == 1ms). We have here 5ms safety margin because pci_udelay is
+ * not precise.
+ */
+ pci_udelay(5000);
+
+ /* De-assert Secondary Bus Reset */
+ ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+ write_pci_config_16(bus, slot, func, PCI_BRIDGE_CONTROL, ctrl);
+}
+
+static void __init save_state(unsigned bus, unsigned slot, unsigned func,
+ struct pcie_dev *dev)
+{
+ int i;
+ int pcie, flags, pcie_type;
+ struct save_config *save;
+
+ pcie = dev->cap;
+ flags = dev->flags;
+ pcie_type = (flags & PCI_EXP_FLAGS_TYPE) >> 4;
+ save = &dev->save;
+
+ printk(KERN_INFO "pci 0000:%02x:%02x.%d save state\n", bus, slot, func);
+
+ for (i = 0; i < 16; i++)
+ save->pci[i] = read_pci_config(bus, slot, func, i * 4);
+ i = 0;
+ if (pcie_cap_has_devctl(pcie_type, flags))
+ save->pcie[i++] = read_pci_config_16(bus, slot, func,
+ pcie + PCI_EXP_DEVCTL);
+ if (pcie_cap_has_lnkctl(pcie_type, flags))
+ save->pcie[i++] = read_pci_config_16(bus, slot, func,
+ pcie + PCI_EXP_LNKCTL);
+ if (pcie_cap_has_sltctl(pcie_type, flags))
+ save->pcie[i++] = read_pci_config_16(bus, slot, func,
+ pcie + PCI_EXP_SLTCTL);
+ if (pcie_cap_has_rtctl(pcie_type, flags))
+ save->pcie[i++] = read_pci_config_16(bus, slot, func,
+ pcie + PCI_EXP_RTCTL);
+
+ if ((flags & PCI_EXP_FLAGS_VERS) >= 2) {
+ save->pcie[i++] = read_pci_config_16(bus, slot, func,
+ pcie + PCI_EXP_DEVCTL2);
+ save->pcie[i++] = read_pci_config_16(bus, slot, func,
+ pcie + PCI_EXP_LNKCTL2);
+ save->pcie[i++] = read_pci_config_16(bus, slot, func,
+ pcie + PCI_EXP_SLTCTL2);
+ }
+}
+
+static void __init restore_state(unsigned bus, unsigned slot, unsigned func,
+ struct pcie_dev *dev)
+{
+ int i = 0;
+ int pcie, flags, pcie_type;
+ struct save_config *save;
+
+ pcie = dev->cap;
+ flags = dev->flags;
+ pcie_type = (flags & PCI_EXP_FLAGS_TYPE) >> 4;
+ save = &dev->save;
+
+ printk(KERN_INFO "pci 0000:%02x:%02x.%d restore state\n",
+ bus, slot, func);
+
+ if (pcie_cap_has_devctl(pcie_type, flags))
+ write_pci_config_16(bus, slot, func,
+ pcie + PCI_EXP_DEVCTL, save->pcie[i++]);
+ if (pcie_cap_has_lnkctl(pcie_type, flags))
+ write_pci_config_16(bus, slot, func,
+ pcie + PCI_EXP_LNKCTL, save->pcie[i++]);
+ if (pcie_cap_has_sltctl(pcie_type, flags))
+ write_pci_config_16(bus, slot, func,
+ pcie + PCI_EXP_SLTCTL, save->pcie[i++]);
+ if (pcie_cap_has_rtctl(pcie_type, flags))
+ write_pci_config_16(bus, slot, func,
+ pcie + PCI_EXP_RTCTL, save->pcie[i++]);
+
+ if ((flags & PCI_EXP_FLAGS_VERS) >= 2) {
+ write_pci_config_16(bus, slot, func,
+ pcie + PCI_EXP_DEVCTL2, save->pcie[i++]);
+ write_pci_config_16(bus, slot, func,
+ pcie + PCI_EXP_LNKCTL2, save->pcie[i++]);
+ write_pci_config_16(bus, slot, func,
+ pcie + PCI_EXP_SLTCTL2, save->pcie[i++]);
+ }
+
+ for (i = 15; i >= 0; i--)
+ write_pci_config(bus, slot, func, i * 4, save->pci[i]);
+}
+
+static void __init find_pcie_device(unsigned bus, unsigned slot, unsigned func)
+{
+ int f, count;
+ int pcie, pcie_type;
+ u8 type;
+ u16 vendor, flags;
+ u32 class;
+ int secondary;
+ struct pcie_port *port;
+ int pcie_cap[PCI_MAX_FUNCTIONS];
+ int pcie_flags[PCI_MAX_FUNCTIONS];
+
+ pcie = early_pci_find_capability(bus, slot, func, PCI_CAP_ID_EXP);
+ if (!pcie)
+ return;
+
+ flags = read_pci_config_16(bus, slot, func, pcie + PCI_EXP_FLAGS);
+ pcie_type = (flags & PCI_EXP_FLAGS_TYPE) >> 4;
+ if ((pcie_type != PCI_EXP_TYPE_ROOT_PORT) &&
+ (pcie_type != PCI_EXP_TYPE_DOWNSTREAM))
+ return;
+
+ type = read_pci_config_byte(bus, slot, func, PCI_HEADER_TYPE);
+ if ((type & 0x7f) != PCI_HEADER_TYPE_BRIDGE)
+ return;
+ secondary = read_pci_config_byte(bus, slot, func, PCI_SECONDARY_BUS);
+
+ memset(pcie_cap, 0, sizeof(pcie_cap));
+ memset(pcie_flags, 0, sizeof(pcie_flags));
+ for (count = 0, f = 0; f < PCI_MAX_FUNCTIONS; f++) {
+ vendor = read_pci_config_16(secondary, 0, f, PCI_VENDOR_ID);
+ if (vendor == 0xffff)
+ continue;
+
+ pcie = early_pci_find_capability(secondary, 0, f,
+ PCI_CAP_ID_EXP);
+ if (!pcie)
+ continue;
+
+ flags = read_pci_config_16(secondary, 0, f,
+ pcie + PCI_EXP_FLAGS);
+ pcie_type = (flags & PCI_EXP_FLAGS_TYPE) >> 4;
+ if ((pcie_type == PCI_EXP_TYPE_UPSTREAM) ||
+ (pcie_type == PCI_EXP_TYPE_PCI_BRIDGE))
+ /* Don't reset switch, bridge */
+ return;
+
+ class = read_pci_config(secondary, 0, f, PCI_CLASS_REVISION);
+ if ((class >> 24) == PCI_BASE_CLASS_DISPLAY)
+ /* Don't reset VGA device */
+ return;
+
+ count++;
+ pcie_cap[f] = pcie;
+ pcie_flags[f] = flags;
+ }
+
+ if (!count)
+ return;
+
+ port = (struct pcie_port *)alloc_bootmem(sizeof(struct pcie_port));
+ if (port == NULL) {
+ printk(KERN_ERR "pci 0000:%02x:%02x.%d alloc_bootmem failed\n",
+ bus, slot, func);
+ return;
+ }
+ memset(port, 0, sizeof(*port));
+ port->bus = bus;
+ port->slot = slot;
+ port->func = func;
+ port->secondary = secondary;
+ for (f = 0; f < PCI_MAX_FUNCTIONS; f++) {
+ if (pcie_cap[f] != 0) {
+ port->child[f].cap = pcie_cap[f];
+ port->child[f].flags = pcie_flags[f];
+ save_state(secondary, 0, f, &port->child[f]);
+ }
+ }
+ list_add_tail(&port->dev, &device_list);
+}
+
+void __init early_reset_pcie_devices(void)
+{
+ unsigned bus, slot, func;
+ struct pcie_port *port, *tmp;
+
+ if (!early_pci_allowed() || !reset_devices)
+ return;
+
+ /* Find PCIe port and save config registers of its downstream devices */
+ for (bus = 0; bus < 256; bus++) {
+ for (slot = 0; slot < 32; slot++) {
+ for (func = 0; func < PCI_MAX_FUNCTIONS; func++) {
+ u16 vendor;
+ u8 type;
+ vendor = read_pci_config_16(bus, slot, func,
+ PCI_VENDOR_ID);
+
+ if (vendor == 0xffff)
+ continue;
+
+ find_pcie_device(bus, slot, func);
+
+ if (func == 0) {
+ type = read_pci_config_byte(bus, slot,
+ func,
+ PCI_HEADER_TYPE);
+ if (!(type & 0x80))
+ break;
+ }
+ }
+ }
+ }
+
+ if (list_empty(&device_list))
+ return;
+
+ /* Do bus reset */
+ list_for_each_entry(port, &device_list, dev)
+ do_reset(port->bus, port->slot, port->func);
+
+ /*
+ * According to PCIe spec, software must wait a minimum of 100 ms
+ * before sending a configuration request. We have 500ms safety margin
+ * here.
+ */
+ pci_udelay(500000);
+
+ /* Restore config registers and free memory */
+ list_for_each_entry_safe(port, tmp, &device_list, dev) {
+ for (func = 0; func < PCI_MAX_FUNCTIONS; func++)
+ if (port->child[func].cap)
+ restore_state(port->secondary, 0, func,
+ &port->child[func]);
+ free_bootmem(__pa(port), sizeof(*port));
+ }
+}
diff --git a/include/linux/pci.h b/include/linux/pci.h
index ee21795..eca3231 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -35,6 +35,8 @@
/* Include the ID list */
#include <linux/pci_ids.h>

+#define PCI_MAX_FUNCTIONS 8
+
/* pci_slot represents a physical slot */
struct pci_slot {
struct pci_bus *bus; /* The bus this slot is on */
diff --git a/init/main.c b/init/main.c
index 9cf77ab..0eb7430 100644
--- a/init/main.c
+++ b/init/main.c
@@ -144,10 +144,10 @@ EXPORT_SYMBOL(reset_devices);
static int __init set_reset_devices(char *str)
{
reset_devices = 1;
- return 1;
+ return 0;
}

-__setup("reset_devices", set_reset_devices);
+early_param("reset_devices", set_reset_devices);

static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/