Re: d63e2e1f3df breaks sparc/T5-8

From: Yinghai Lu
Date: Mon Mar 30 2015 - 21:06:11 EST


On Mon, Mar 30, 2015 at 3:54 PM, David Ahern <david.ahern@xxxxxxxxxx> wrote:
> On 3/29/15 2:07 PM, Yinghai Lu wrote:
>>
>> [ 286.647560] PCI: scan_bus[/pci@300/pci@1/pci@0/pci@6] bus no 8
>> [ 286.921232] PCI: Claiming 0000:00:01.0: Resource 15:
>> 0000800100000000..00008004afffffff [220c]
>> [ 287.229190] PCI: Claiming 0000:01:00.0: Resource 15:
>> 0000800100000000..00008004afffffff [220c]
>> [ 287.533428] PCI: Claiming 0000:02:04.0: Resource 15:
>> 0000800100000000..000080012fffffff [220c]
>> [ 288.149831] PCI: Claiming 0000:03:00.0: Resource 15:
>> 0000800100000000..000080012fffffff [220c]
>> [ 288.252466] PCI: Claiming 0000:04:06.0: Resource 14:
>> 0000800100000000..000080010fffffff [220c]
>> [ 288.867196] PCI: Claiming 0000:05:00.0: Resource 0:
>> 0000800100000000..0000800100001fff [204]
>> [ 288.968221] pci 0000:05:00.0: can't claim BAR 0 [mem
>> 0x800100000000-0x800100001fff]: no compatible bridge window
>>
>> the bridge resource has IORESOURCE_PREFETCH, but the device doesn't have
>> that.
>
> # lspci -vvxxx -s 0000:05:00.0
> 0000:05:00.0 USB controller: Renesas Technology Corp. uPD720201 USB 3.0 Host
> Controller (rev 03) (prog-if 30 [XHCI])
> Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
> Stepping- SERR- FastB2B- DisINTx+
> Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort-
> <TAbort- <MAbort- >SERR- <PERR- INTx-
> Latency: 0, Cache Line Size: 64 bytes
> Interrupt: pin A routed to IRQ 00000004
> Region 0: Memory at 100000000 (64-bit, non-prefetchable) [size=8K]

ok, that is really non-pref mmio 64bit.
We can workaround the problem by honoring firmware setting, according
to https://www.pcisig.com/specifications/pciexpress/base2/PCIe_Base_r2.1_Errata_08Jun10.pdf
page 13

Please check attached updated patches that should fix the regression
and kill those "no compatible window" warnings.

Thanks

Yinghai
Subject: [RFC PATCH v2] PCI: Introduce pci_bus_addr_t

David Ahern found commit d63e2e1f3df9 ("sparc/PCI: Clip bridge windows
to fit in upstream windows") broke sparc/T5-8.

In the boot log, there is
pci 0000:06:00.0: reg 0x184: can't handle BAR above 4GB (bus address
0x110204000)
but that only could happen when dma_addr_t is 32-bit.

According to David Miller, all DMA occurs behind an IOMMU and these
IOMMUs only support 32-bit addressing, therefore dma_addr_t is
32-bit on sparc64.

Let's introduce pci_bus_addr_t instead of using dma_addr_t,
and pci_bus_addr_t will be 64-bit on 64-bit platform or X86 PAE kernel.

Fixes: commit d63e2e1f3df9 ("sparc/PCI: Clip bridge windows to fit in upstream windows")
Fixes: commit 23b13bc76f35 ("PCI: Fail safely if we can't handle BARs larger than 4GB")
Link: http://lkml.kernel.org/r/CAE9FiQU1gJY1LYrxs+ma5LCTEEe4xmtjRG0aXJ9K_Tsu+m9Wuw@xxxxxxxxxxxxxx
Reported-by: David Ahern <david.ahern@xxxxxxxxxx>
Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>

---
-v2: use PCI_BUS_ADDR in drivers/pci/bus.c
put config option in pci/Kconfig: as David Miller said:
PCI addresses being 64-bit or not is an attribute of the PCI
controller and the geography of the bridges behind it, not the
cpu architecture.
---
drivers/pci/Kconfig | 4 ++++
drivers/pci/bus.c | 10 +++++-----
drivers/pci/probe.c | 12 ++++++------
include/linux/pci.h | 12 +++++++++---
4 files changed, 24 insertions(+), 14 deletions(-)

Index: linux-2.6/drivers/pci/bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/bus.c
+++ linux-2.6/drivers/pci/bus.c
@@ -92,11 +92,11 @@ void pci_bus_remove_resources(struct pci
}

static struct pci_bus_region pci_32_bit = {0, 0xffffffffULL};
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT
static struct pci_bus_region pci_64_bit = {0,
- (dma_addr_t) 0xffffffffffffffffULL};
-static struct pci_bus_region pci_high = {(dma_addr_t) 0x100000000ULL,
- (dma_addr_t) 0xffffffffffffffffULL};
+ (pci_bus_addr_t) 0xffffffffffffffffULL};
+static struct pci_bus_region pci_high = {(pci_bus_addr_t) 0x100000000ULL,
+ (pci_bus_addr_t) 0xffffffffffffffffULL};
#endif

/*
@@ -200,7 +200,7 @@ int pci_bus_alloc_resource(struct pci_bu
resource_size_t),
void *alignf_data)
{
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT
int rc;

if (res->flags & IORESOURCE_MEM_64) {
Index: linux-2.6/drivers/pci/probe.c
===================================================================
--- linux-2.6.orig/drivers/pci/probe.c
+++ linux-2.6/drivers/pci/probe.c
@@ -254,8 +254,8 @@ int __pci_read_base(struct pci_dev *dev,
}

if (res->flags & IORESOURCE_MEM_64) {
- if ((sizeof(dma_addr_t) < 8 || sizeof(resource_size_t) < 8) &&
- sz64 > 0x100000000ULL) {
+ if ((sizeof(pci_bus_addr_t) < 8 || sizeof(resource_size_t) < 8)
+ && sz64 > 0x100000000ULL) {
res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED;
res->start = 0;
res->end = 0;
@@ -264,7 +264,7 @@ int __pci_read_base(struct pci_dev *dev,
goto out;
}

- if ((sizeof(dma_addr_t) < 8) && l) {
+ if ((sizeof(pci_bus_addr_t) < 8) && l) {
/* Above 32-bit boundary; try to reallocate */
res->flags |= IORESOURCE_UNSET;
res->start = 0;
@@ -399,7 +399,7 @@ static void pci_read_bridge_mmio_pref(st
struct pci_dev *dev = child->self;
u16 mem_base_lo, mem_limit_lo;
u64 base64, limit64;
- dma_addr_t base, limit;
+ pci_bus_addr_t base, limit;
struct pci_bus_region region;
struct resource *res;

@@ -426,8 +426,8 @@ static void pci_read_bridge_mmio_pref(st
}
}

- base = (dma_addr_t) base64;
- limit = (dma_addr_t) limit64;
+ base = (pci_bus_addr_t) base64;
+ limit = (pci_bus_addr_t) limit64;

if (base != base64) {
dev_err(&dev->dev, "can't handle bridge window above 4GB (bus address %#010llx)\n",
Index: linux-2.6/include/linux/pci.h
===================================================================
--- linux-2.6.orig/include/linux/pci.h
+++ linux-2.6/include/linux/pci.h
@@ -576,9 +576,15 @@ int raw_pci_read(unsigned int domain, un
int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn,
int reg, int len, u32 val);

+#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT
+typedef u64 pci_bus_addr_t;
+#else
+typedef u32 pci_bus_addr_t;
+#endif
+
struct pci_bus_region {
- dma_addr_t start;
- dma_addr_t end;
+ pci_bus_addr_t start;
+ pci_bus_addr_t end;
};

struct pci_dynids {
@@ -1127,7 +1133,7 @@ int __must_check pci_bus_alloc_resource(

int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr);

-static inline dma_addr_t pci_bus_address(struct pci_dev *pdev, int bar)
+static inline pci_bus_addr_t pci_bus_address(struct pci_dev *pdev, int bar)
{
struct pci_bus_region region;

Index: linux-2.6/drivers/pci/Kconfig
===================================================================
--- linux-2.6.orig/drivers/pci/Kconfig
+++ linux-2.6/drivers/pci/Kconfig
@@ -1,6 +1,10 @@
#
# PCI configuration
#
+config PCI_BUS_ADDR_T_64BIT
+ def_bool y if (64BIT || X86_PAE)
+ depends on PCI
+
config PCI_MSI
bool "Message Signaled Interrupts (MSI and MSI-X)"
depends on PCI
Subject: [RFC PATCH v3] sparc/PCI: Add mem64 resource parsing for root bus

Found no compatible bridge window warning in boot log from T5-8.

pci 0000:00:01.0: can't claim BAR 15 [mem 0x100000000-0x4afffffff pref]: no compatible bridge window

and root bus only report io and mem32.

pci_sun4v f02dbcfc: PCI host bridge to bus 0000:00
pci_bus 0000:00: root bus resource [io 0x804000000000-0x80400fffffff] (bus address [0x0000-0xfffffff])
pci_bus 0000:00: root bus resource [mem 0x800000000000-0x80007effffff] (bus address [0x00000000-0x7effffff])
pci_bus 0000:00: root bus resource [bus 00-77]

Add mem64 handling in pci_common for sparc, so we can have 64bit resource
registered for root bus at first.

Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>

---
-v2: mem64_space should use mem_space.start as offset.
-v3: add IORESOURCE_MEM_64 flag
---
arch/sparc/kernel/pci.c | 7 ++++++-
arch/sparc/kernel/pci_common.c | 15 +++++++++++++--
arch/sparc/kernel/pci_impl.h | 1 +
3 files changed, 20 insertions(+), 3 deletions(-)

Index: linux-2.6/arch/sparc/kernel/pci.c
===================================================================
--- linux-2.6.orig/arch/sparc/kernel/pci.c
+++ linux-2.6/arch/sparc/kernel/pci.c
@@ -185,8 +185,10 @@ static unsigned long pci_parse_of_flags(

if (addr0 & 0x02000000) {
flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
- flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64;
flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
+ if (addr0 & 0x01000000)
+ flags |= IORESOURCE_MEM_64
+ | PCI_BASE_ADDRESS_MEM_TYPE_64;
if (addr0 & 0x40000000)
flags |= IORESOURCE_PREFETCH
| PCI_BASE_ADDRESS_MEM_PREFETCH;
@@ -663,6 +665,9 @@ struct pci_bus *pci_scan_one_pbm(struct
pbm->io_space.start);
pci_add_resource_offset(&resources, &pbm->mem_space,
pbm->mem_space.start);
+ if (pbm->mem64_space.flags)
+ pci_add_resource_offset(&resources, &pbm->mem64_space,
+ pbm->mem_space.start);
pbm->busn.start = pbm->pci_first_busno;
pbm->busn.end = pbm->pci_last_busno;
pbm->busn.flags = IORESOURCE_BUS;
Index: linux-2.6/arch/sparc/kernel/pci_common.c
===================================================================
--- linux-2.6.orig/arch/sparc/kernel/pci_common.c
+++ linux-2.6/arch/sparc/kernel/pci_common.c
@@ -406,6 +406,7 @@ void pci_determine_mem_io_space(struct p
}

num_pbm_ranges = i / sizeof(*pbm_ranges);
+ memset(&pbm->mem64_space, 0, sizeof(struct resource));

for (i = 0; i < num_pbm_ranges; i++) {
const struct linux_prom_pci_ranges *pr = &pbm_ranges[i];
@@ -451,7 +452,11 @@ void pci_determine_mem_io_space(struct p
break;

case 3:
- /* XXX 64-bit MEM handling XXX */
+ /* 64-bit MEM handling */
+ pbm->mem64_space.start = a;
+ pbm->mem64_space.end = a + size - 1UL;
+ pbm->mem64_space.flags = IORESOURCE_MEM;
+ break;

default:
break;
@@ -465,15 +470,21 @@ void pci_determine_mem_io_space(struct p
prom_halt();
}

- printk("%s: PCI IO[%llx] MEM[%llx]\n",
+ printk("%s: PCI IO[%llx] MEM[%llx]",
pbm->name,
pbm->io_space.start,
pbm->mem_space.start);
+ if (pbm->mem64_space.flags)
+ printk(" MEM64[%llx]",
+ pbm->mem64_space.start);
+ printk("\n");

pbm->io_space.name = pbm->mem_space.name = pbm->name;

request_resource(&ioport_resource, &pbm->io_space);
request_resource(&iomem_resource, &pbm->mem_space);
+ if (pbm->mem64_space.flags)
+ request_resource(&iomem_resource, &pbm->mem64_space);

pci_register_legacy_regions(&pbm->io_space,
&pbm->mem_space);
Index: linux-2.6/arch/sparc/kernel/pci_impl.h
===================================================================
--- linux-2.6.orig/arch/sparc/kernel/pci_impl.h
+++ linux-2.6/arch/sparc/kernel/pci_impl.h
@@ -97,6 +97,7 @@ struct pci_pbm_info {
/* PBM I/O and Memory space resources. */
struct resource io_space;
struct resource mem_space;
+ struct resource mem64_space;
struct resource busn;

/* Base of PCI Config space, can be per-PBM or shared. */
Subject: [RFC PATCH] PCI: Set pref for mem64 resource of pcie device

We still get "no compatible bridge window" warning on sparc T5-8
after we add support for 64bit resource for root bus.

[ 286.647560] PCI: scan_bus[/pci@300/pci@1/pci@0/pci@6] bus no 8
[ 286.921232] PCI: Claiming 0000:00:01.0: Resource 15: 0000800100000000..00008004afffffff [220c]
[ 287.229190] PCI: Claiming 0000:01:00.0: Resource 15: 0000800100000000..00008004afffffff [220c]
[ 287.533428] PCI: Claiming 0000:02:04.0: Resource 15: 0000800100000000..000080012fffffff [220c]
[ 288.149831] PCI: Claiming 0000:03:00.0: Resource 15: 0000800100000000..000080012fffffff [220c]
[ 288.252466] PCI: Claiming 0000:04:06.0: Resource 14: 0000800100000000..000080010fffffff [220c]
[ 288.867196] PCI: Claiming 0000:05:00.0: Resource 0: 0000800100000000..0000800100001fff [204]
[ 288.968221] pci 0000:05:00.0: can't claim BAR 0 [mem 0x800100000000-0x800100001fff]: no compatible bridge window

All the bridges have pref mem 64-bit resource, but the device resource does not
have pref set, then we can not find parent for the device resource,
as we can not put non-pref mem under pref mem.

According to pcie spec errta
https://www.pcisig.com/specifications/pciexpress/base2/PCIe_Base_r2.1_Errata_08Jun10.pdf
page 13, in some case it is ok to mark some as pref.

only set the bit when the mmio is above 4G by BIOS.

Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>

---
drivers/pci/probe.c | 40 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 40 insertions(+)

Index: linux-2.6/drivers/pci/probe.c
===================================================================
--- linux-2.6.orig/drivers/pci/probe.c
+++ linux-2.6/drivers/pci/probe.c
@@ -1508,6 +1508,43 @@ static void pci_init_capabilities(struct
pci_enable_acs(dev);
}

+/*
+ * According to
+ * https://www.pcisig.com/specifications/pciexpress/base2/PCIe_Base_r2.1_Errata_08Jun10.pdf
+ * page 13, system firmware could put some 64bit non-pref under 64bit pref,
+ * on some cases.
+ * Let's set pref bit when pci bus address is above 4G.
+ */
+static void set_pcie_64bit_pref(struct pci_dev *dev)
+{
+ int i;
+
+ if (!pci_is_pcie(dev))
+ return;
+
+ for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) {
+ struct resource *res = &dev->resource[i];
+ struct pci_bus_region r;
+ enum pci_bar_type type;
+ int reg;
+
+ if (!(res->flags & IORESOURCE_MEM_64))
+ continue;
+
+ if (res->flags & IORESOURCE_PREFETCH)
+ continue;
+
+ pcibios_resource_to_bus(dev->bus, &r, res);
+ if (r.start < 0xffffffff)
+ continue;
+
+ reg = pci_resource_bar(dev, i, &type);
+ dev_printk(KERN_DEBUG, &dev->dev, "reg %d %pR + pref\n",
+ reg, res);
+ res->flags |= IORESOURCE_PREFETCH;
+ }
+}
+
void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
{
int ret;
@@ -1538,6 +1575,9 @@ void pci_device_add(struct pci_dev *dev,
/* Initialize various capabilities */
pci_init_capabilities(dev);

+ /* After pcie_cap is assigned and sriov bar is probed */
+ set_pcie_64bit_pref(dev);
+
/*
* Add the device to our list of discovered devices
* and the bus list for fixup functions, etc.