Re: d63e2e1f3df breaks sparc/T5-8

From: Yinghai Lu
Date: Tue Mar 31 2015 - 16:28:45 EST


On Tue, Mar 31, 2015 at 10:04 AM, David Ahern <david.ahern@xxxxxxxxxx> wrote:
>> Clear out the old and apply these new ones.
>
>
> I take DaveM's response to mean the patches (3rd one?) needs another
> version.
>
> I will be on PTO Wed-Fri with limited access through Sunday. If you have
> something to try out later today I can do that; else it needs to wait until
> next week. Given the likelihood that Linus will release 4.0 this weekend
> that means both 3.19 and 4.0 will be broken for these systems.

Please check attached three patches on top of current linus tree.

Thanks

Yinghai
Subject: [RFC PATCH v2] PCI: Introduce pci_bus_addr_t

David Ahern found commit d63e2e1f3df9 ("sparc/PCI: Clip bridge windows
to fit in upstream windows") broke sparc/T5-8.

In the boot log, there is
pci 0000:06:00.0: reg 0x184: can't handle BAR above 4GB (bus address
0x110204000)
but that only could happen when dma_addr_t is 32-bit.

According to David Miller, all DMA occurs behind an IOMMU and these
IOMMUs only support 32-bit addressing, therefore dma_addr_t is
32-bit on sparc64.

Let's introduce pci_bus_addr_t instead of using dma_addr_t,
and pci_bus_addr_t will be 64-bit on 64-bit platform or X86 PAE kernel.

Fixes: commit d63e2e1f3df9 ("sparc/PCI: Clip bridge windows to fit in upstream windows")
Fixes: commit 23b13bc76f35 ("PCI: Fail safely if we can't handle BARs larger than 4GB")
Link: http://lkml.kernel.org/r/CAE9FiQU1gJY1LYrxs+ma5LCTEEe4xmtjRG0aXJ9K_Tsu+m9Wuw@xxxxxxxxxxxxxx
Reported-by: David Ahern <david.ahern@xxxxxxxxxx>
Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>

---
-v2: use PCI_BUS_ADDR in drivers/pci/bus.c
put config option in pci/Kconfig: as David Miller said:
PCI addresses being 64-bit or not is an attribute of the PCI
controller and the geography of the bridges behind it, not the
cpu architecture.
---
drivers/pci/Kconfig | 4 ++++
drivers/pci/bus.c | 10 +++++-----
drivers/pci/probe.c | 12 ++++++------
include/linux/pci.h | 12 +++++++++---
4 files changed, 24 insertions(+), 14 deletions(-)

Index: linux-2.6/drivers/pci/bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/bus.c
+++ linux-2.6/drivers/pci/bus.c
@@ -92,11 +92,11 @@ void pci_bus_remove_resources(struct pci
}

static struct pci_bus_region pci_32_bit = {0, 0xffffffffULL};
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT
static struct pci_bus_region pci_64_bit = {0,
- (dma_addr_t) 0xffffffffffffffffULL};
-static struct pci_bus_region pci_high = {(dma_addr_t) 0x100000000ULL,
- (dma_addr_t) 0xffffffffffffffffULL};
+ (pci_bus_addr_t) 0xffffffffffffffffULL};
+static struct pci_bus_region pci_high = {(pci_bus_addr_t) 0x100000000ULL,
+ (pci_bus_addr_t) 0xffffffffffffffffULL};
#endif

/*
@@ -200,7 +200,7 @@ int pci_bus_alloc_resource(struct pci_bu
resource_size_t),
void *alignf_data)
{
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT
int rc;

if (res->flags & IORESOURCE_MEM_64) {
Index: linux-2.6/drivers/pci/probe.c
===================================================================
--- linux-2.6.orig/drivers/pci/probe.c
+++ linux-2.6/drivers/pci/probe.c
@@ -254,8 +254,8 @@ int __pci_read_base(struct pci_dev *dev,
}

if (res->flags & IORESOURCE_MEM_64) {
- if ((sizeof(dma_addr_t) < 8 || sizeof(resource_size_t) < 8) &&
- sz64 > 0x100000000ULL) {
+ if ((sizeof(pci_bus_addr_t) < 8 || sizeof(resource_size_t) < 8)
+ && sz64 > 0x100000000ULL) {
res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED;
res->start = 0;
res->end = 0;
@@ -264,7 +264,7 @@ int __pci_read_base(struct pci_dev *dev,
goto out;
}

- if ((sizeof(dma_addr_t) < 8) && l) {
+ if ((sizeof(pci_bus_addr_t) < 8) && l) {
/* Above 32-bit boundary; try to reallocate */
res->flags |= IORESOURCE_UNSET;
res->start = 0;
@@ -399,7 +399,7 @@ static void pci_read_bridge_mmio_pref(st
struct pci_dev *dev = child->self;
u16 mem_base_lo, mem_limit_lo;
u64 base64, limit64;
- dma_addr_t base, limit;
+ pci_bus_addr_t base, limit;
struct pci_bus_region region;
struct resource *res;

@@ -426,8 +426,8 @@ static void pci_read_bridge_mmio_pref(st
}
}

- base = (dma_addr_t) base64;
- limit = (dma_addr_t) limit64;
+ base = (pci_bus_addr_t) base64;
+ limit = (pci_bus_addr_t) limit64;

if (base != base64) {
dev_err(&dev->dev, "can't handle bridge window above 4GB (bus address %#010llx)\n",
Index: linux-2.6/include/linux/pci.h
===================================================================
--- linux-2.6.orig/include/linux/pci.h
+++ linux-2.6/include/linux/pci.h
@@ -576,9 +576,15 @@ int raw_pci_read(unsigned int domain, un
int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn,
int reg, int len, u32 val);

+#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT
+typedef u64 pci_bus_addr_t;
+#else
+typedef u32 pci_bus_addr_t;
+#endif
+
struct pci_bus_region {
- dma_addr_t start;
- dma_addr_t end;
+ pci_bus_addr_t start;
+ pci_bus_addr_t end;
};

struct pci_dynids {
@@ -1127,7 +1133,7 @@ int __must_check pci_bus_alloc_resource(

int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr);

-static inline dma_addr_t pci_bus_address(struct pci_dev *pdev, int bar)
+static inline pci_bus_addr_t pci_bus_address(struct pci_dev *pdev, int bar)
{
struct pci_bus_region region;

Index: linux-2.6/drivers/pci/Kconfig
===================================================================
--- linux-2.6.orig/drivers/pci/Kconfig
+++ linux-2.6/drivers/pci/Kconfig
@@ -1,6 +1,10 @@
#
# PCI configuration
#
+config PCI_BUS_ADDR_T_64BIT
+ def_bool y if (64BIT || X86_PAE)
+ depends on PCI
+
config PCI_MSI
bool "Message Signaled Interrupts (MSI and MSI-X)"
depends on PCI
Subject: [RFC PATCH v3] sparc/PCI: Add mem64 resource parsing for root bus

Found no compatible bridge window warning in boot log from T5-8.

pci 0000:00:01.0: can't claim BAR 15 [mem 0x100000000-0x4afffffff pref]: no compatible bridge window

and root bus only report io and mem32.

pci_sun4v f02dbcfc: PCI host bridge to bus 0000:00
pci_bus 0000:00: root bus resource [io 0x804000000000-0x80400fffffff] (bus address [0x0000-0xfffffff])
pci_bus 0000:00: root bus resource [mem 0x800000000000-0x80007effffff] (bus address [0x00000000-0x7effffff])
pci_bus 0000:00: root bus resource [bus 00-77]

Add mem64 handling in pci_common for sparc, so we can have 64bit resource
registered for root bus at first.

Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>

---
-v2: mem64_space should use mem_space.start as offset.
-v3: add IORESOURCE_MEM_64 flag
---
arch/sparc/kernel/pci.c | 7 ++++++-
arch/sparc/kernel/pci_common.c | 15 +++++++++++++--
arch/sparc/kernel/pci_impl.h | 1 +
3 files changed, 20 insertions(+), 3 deletions(-)

Index: linux-2.6/arch/sparc/kernel/pci.c
===================================================================
--- linux-2.6.orig/arch/sparc/kernel/pci.c
+++ linux-2.6/arch/sparc/kernel/pci.c
@@ -185,8 +185,10 @@ static unsigned long pci_parse_of_flags(

if (addr0 & 0x02000000) {
flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
- flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64;
flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
+ if (addr0 & 0x01000000)
+ flags |= IORESOURCE_MEM_64
+ | PCI_BASE_ADDRESS_MEM_TYPE_64;
if (addr0 & 0x40000000)
flags |= IORESOURCE_PREFETCH
| PCI_BASE_ADDRESS_MEM_PREFETCH;
@@ -663,6 +665,9 @@ struct pci_bus *pci_scan_one_pbm(struct
pbm->io_space.start);
pci_add_resource_offset(&resources, &pbm->mem_space,
pbm->mem_space.start);
+ if (pbm->mem64_space.flags)
+ pci_add_resource_offset(&resources, &pbm->mem64_space,
+ pbm->mem_space.start);
pbm->busn.start = pbm->pci_first_busno;
pbm->busn.end = pbm->pci_last_busno;
pbm->busn.flags = IORESOURCE_BUS;
Index: linux-2.6/arch/sparc/kernel/pci_common.c
===================================================================
--- linux-2.6.orig/arch/sparc/kernel/pci_common.c
+++ linux-2.6/arch/sparc/kernel/pci_common.c
@@ -406,6 +406,7 @@ void pci_determine_mem_io_space(struct p
}

num_pbm_ranges = i / sizeof(*pbm_ranges);
+ memset(&pbm->mem64_space, 0, sizeof(struct resource));

for (i = 0; i < num_pbm_ranges; i++) {
const struct linux_prom_pci_ranges *pr = &pbm_ranges[i];
@@ -451,7 +452,11 @@ void pci_determine_mem_io_space(struct p
break;

case 3:
- /* XXX 64-bit MEM handling XXX */
+ /* 64-bit MEM handling */
+ pbm->mem64_space.start = a;
+ pbm->mem64_space.end = a + size - 1UL;
+ pbm->mem64_space.flags = IORESOURCE_MEM;
+ break;

default:
break;
@@ -465,15 +470,21 @@ void pci_determine_mem_io_space(struct p
prom_halt();
}

- printk("%s: PCI IO[%llx] MEM[%llx]\n",
+ printk("%s: PCI IO[%llx] MEM[%llx]",
pbm->name,
pbm->io_space.start,
pbm->mem_space.start);
+ if (pbm->mem64_space.flags)
+ printk(" MEM64[%llx]",
+ pbm->mem64_space.start);
+ printk("\n");

pbm->io_space.name = pbm->mem_space.name = pbm->name;

request_resource(&ioport_resource, &pbm->io_space);
request_resource(&iomem_resource, &pbm->mem_space);
+ if (pbm->mem64_space.flags)
+ request_resource(&iomem_resource, &pbm->mem64_space);

pci_register_legacy_regions(&pbm->io_space,
&pbm->mem_space);
Index: linux-2.6/arch/sparc/kernel/pci_impl.h
===================================================================
--- linux-2.6.orig/arch/sparc/kernel/pci_impl.h
+++ linux-2.6/arch/sparc/kernel/pci_impl.h
@@ -97,6 +97,7 @@ struct pci_pbm_info {
/* PBM I/O and Memory space resources. */
struct resource io_space;
struct resource mem_space;
+ struct resource mem64_space;
struct resource busn;

/* Base of PCI Config space, can be per-PBM or shared. */
Subject: [RFC PATCH v2] PCI: Set pref for mem64 resource of pcie device

We still get "no compatible bridge window" warning on sparc T5-8
after we add support for 64bit resource for root bus.

[ 286.647560] PCI: scan_bus[/pci@300/pci@1/pci@0/pci@6] bus no 8
[ 286.921232] PCI: Claiming 0000:00:01.0: Resource 15: 0000800100000000..00008004afffffff [220c]
[ 287.229190] PCI: Claiming 0000:01:00.0: Resource 15: 0000800100000000..00008004afffffff [220c]
[ 287.533428] PCI: Claiming 0000:02:04.0: Resource 15: 0000800100000000..000080012fffffff [220c]
[ 288.149831] PCI: Claiming 0000:03:00.0: Resource 15: 0000800100000000..000080012fffffff [220c]
[ 288.252466] PCI: Claiming 0000:04:06.0: Resource 14: 0000800100000000..000080010fffffff [220c]
[ 288.867196] PCI: Claiming 0000:05:00.0: Resource 0: 0000800100000000..0000800100001fff [204]
[ 288.968221] pci 0000:05:00.0: can't claim BAR 0 [mem 0x800100000000-0x800100001fff]: no compatible bridge window

All the bridges have pref mem 64-bit resource, but the device resource does not
have pref set, then we can not find parent for the device resource,
as we can not put non-pref mem under pref mem.

According to pcie spec errta
https://www.pcisig.com/specifications/pciexpress/base2/PCIe_Base_r2.1_Errata_08Jun10.pdf
page 13, in some case it is ok to mark some as pref.

only set pref for 64bit mmio when the entire path from the host to the adapter is
over PCI Express.

Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>

---
-v2: set pref for mmio 64 when whole path is PCI Express.
---

drivers/pci/probe.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 50 insertions(+)

Index: linux-2.6/drivers/pci/probe.c
===================================================================
--- linux-2.6.orig/drivers/pci/probe.c
+++ linux-2.6/drivers/pci/probe.c
@@ -1508,6 +1508,53 @@ static void pci_init_capabilities(struct
pci_enable_acs(dev);
}

+static bool pci_up_path_over_pcie(struct pci_bus *bus)
+{
+ if (!bus)
+ return true;
+
+ if (bus->self && !pci_is_pcie(bus->self))
+ return false;
+
+ return pci_up_path_over_pcie(bus->parent);
+}
+
+/*
+ * According to
+ * https://www.pcisig.com/specifications/pciexpress/base2/PCIe_Base_r2.1_Errata_08Jun10.pdf
+ * page 13, system firmware could put some 64bit non-pref under 64bit pref,
+ * on some cases.
+ * Let's set pref bit for 64bit mmio when entire path from the host to
+ * the adapter is over PCI Express.
+ */
+static void set_pcie_64bit_pref(struct pci_dev *dev)
+{
+ int i;
+
+ if (!pci_is_pcie(dev))
+ return;
+
+ if (!pci_up_path_over_pcie(dev->bus))
+ return;
+
+ for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) {
+ struct resource *res = &dev->resource[i];
+ enum pci_bar_type type;
+ int reg;
+
+ if (!(res->flags & IORESOURCE_MEM_64))
+ continue;
+
+ if (res->flags & IORESOURCE_PREFETCH)
+ continue;
+
+ reg = pci_resource_bar(dev, i, &type);
+ dev_printk(KERN_DEBUG, &dev->dev, "reg 0x%x %pR + pref\n",
+ reg, res);
+ res->flags |= IORESOURCE_PREFETCH;
+ }
+}
+
void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
{
int ret;
@@ -1538,6 +1585,9 @@ void pci_device_add(struct pci_dev *dev,
/* Initialize various capabilities */
pci_init_capabilities(dev);

+ /* After pcie_cap is assigned and sriov bar is probed */
+ set_pcie_64bit_pref(dev);
+
/*
* Add the device to our list of discovered devices
* and the bus list for fixup functions, etc.