Re: [PATCH 3/4] PCI: restrict subordinate buses to those reachablevia host bridge

From: Yinghai Lu
Date: Thu Jan 19 2012 - 15:42:23 EST


On Wed, Jan 18, 2012 at 9:46 AM, Bjorn Helgaas <bhelgaas@xxxxxxxxxx> wrote:
> On Wed, Jan 18, 2012 at 10:27 AM, Yinghai Lu <yinghai@xxxxxxxxxx> wrote:
>> On Wed, Jan 18, 2012 at 8:52 AM, Bjorn Helgaas <bhelgaas@xxxxxxxxxx> wrote:
>>> If we make a quirk for this machine, we still have the question of
>>> what to do with my patches.  I assert that if Linux ever reconfigures
>>> any bus numbers or does any configuration of hot-added P2P bridges, it
>>> must pay attention to the host bridge bus number window.  Therefore, I
>>> think we need something like this series even if we make a quirk.
>>
>> We may need more smart way to find unused bus range instead of just
>> just max+1 and ++max.
>>
>> For example:  one bridge (A) have two child bridges (B and C),
>> A: bus range: 10-2f
>> B: bus range: 11-1f
>> C: bus range: 20-2f
>>
>> when some broken case happen, B bus BIOS assigned bus range will be
>> all cleared in first pass.
>> but C bus is ok.  but in second bus, bus will be assigned to 30- .
>> that is totally wrong, We should still
>> try to use bus 11-1f at first for bus B.
>
> Yes, I agree we may need a better way to choose bus numbers in the
> first place.  My current patch (3/4) doesn't change how we choose
> them; it only rejects invalid ones.

come out draft about bus number tracking and allocate bus range instead.

please check attached patch.

Thanks

Yinghai
Subject: [RFC PATCH] PCI: allocate bus range instead of use max blindly

every bus will have extra busn_res, and linked them toghter to iobusn_resource.

later when need to find usabled bus number range, try allocate from the resource tree.

only test on x86 64 bit.

need to do:
1. add /proc/iobusn ?
2. expand bus range, when can not find enough range in parent range.
3. use list instead to track transparent bus?

Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index b9676ae..15cd4b7 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -172,7 +172,7 @@ static inline void arch_fix_phys_package_id(int num, u32 slot)
}

struct pci_bus;
-void x86_pci_root_bus_resources(int bus, struct list_head *resources);
+void x86_pci_root_bus_resources(int bus, int *bus_max, struct list_head *resources);

#ifdef CONFIG_SMP
#define mc_capable() ((boot_cpu_data.x86_max_cores > 1) && \
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index a312e76..b48f0d9 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -348,6 +348,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
struct acpi_device *device = root->device;
int domain = root->segment;
int busnum = root->secondary.start;
+ int bus_max = root->secondary.end;
LIST_HEAD(resources);
struct pci_bus *bus;
struct pci_sysdata *sd;
@@ -405,12 +406,20 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
} else {
get_current_resources(device, busnum, domain, &resources);
if (list_empty(&resources))
- x86_pci_root_bus_resources(busnum, &resources);
+ x86_pci_root_bus_resources(busnum, &bus_max, &resources);
bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, sd,
&resources);
- if (bus)
+ if (bus) {
+ /* init busn_res */
+ bus->busn_res.start = (domain << 8) | busnum;
+ bus->busn_res.end = (domain << 8) | bus_max;
+ bus->busn_res.flags = IORESOURCE_BUS;
+
+ insert_resource(&iobusn_resource, &bus->busn_res);
+ dev_printk(KERN_DEBUG, &bus->dev, "busn_res: %06llx-%06llx inserted\n", (unsigned long long)bus->busn_res.start, (unsigned long long)bus->busn_res.end);
+
bus->subordinate = pci_scan_child_bus(bus);
- else
+ } else
pci_free_resource_list(&resources);
}

diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c
index fd3f655..3256fb9 100644
--- a/arch/x86/pci/bus_numa.c
+++ b/arch/x86/pci/bus_numa.c
@@ -7,7 +7,7 @@
int pci_root_num;
struct pci_root_info pci_root_info[PCI_ROOT_NR];

-void x86_pci_root_bus_resources(int bus, struct list_head *resources)
+void x86_pci_root_bus_resources(int bus, int *bus_max, struct list_head *resources)
{
int i;
int j;
@@ -28,6 +28,7 @@ void x86_pci_root_bus_resources(int bus, struct list_head *resources)
bus);

info = &pci_root_info[i];
+ *bus_max = info->bus_max;
for (j = 0; j < info->res_num; j++) {
struct resource *res;
struct resource *root;
@@ -51,6 +52,10 @@ default_resources:
printk(KERN_DEBUG "PCI: root bus %02x: using default resources\n", bus);
pci_add_resource(resources, &ioport_resource);
pci_add_resource(resources, &iomem_resource);
+ if (!bus)
+ *bus_max = 0xff;
+ else if (!*bus_max)
+ *bus_max = bus;
}

void __devinit update_res(struct pci_root_info *info, resource_size_t start,
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 323481e..e673355 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -433,6 +433,7 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum)
LIST_HEAD(resources);
struct pci_bus *bus = NULL;
struct pci_sysdata *sd;
+ int bus_max;

while ((bus = pci_find_next_bus(bus)) != NULL) {
if (bus->number == busnum) {
@@ -454,8 +455,8 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum)
sd->node = get_mp_bus_to_node(busnum);

printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum);
- x86_pci_root_bus_resources(busnum, &resources);
- bus = pci_scan_root_bus(NULL, busnum, &pci_root_ops, sd, &resources);
+ x86_pci_root_bus_resources(busnum, &bus_max, &resources);
+ bus = pci_scan_root_bus(NULL, busnum, bus_max, &pci_root_ops, sd, &resources);
if (!bus) {
pci_free_resource_list(&resources);
kfree(sd);
@@ -643,6 +644,7 @@ struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops,
LIST_HEAD(resources);
struct pci_bus *bus = NULL;
struct pci_sysdata *sd;
+ int bus_max;

/*
* Allocate per-root-bus (not per bus) arch-specific data.
@@ -655,8 +657,8 @@ struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops,
return NULL;
}
sd->node = node;
- x86_pci_root_bus_resources(busno, &resources);
- bus = pci_scan_root_bus(NULL, busno, ops, sd, &resources);
+ x86_pci_root_bus_resources(busno, &bus_max, &resources);
+ bus = pci_scan_root_bus(NULL, busno, bus_max, ops, sd, &resources);
if (!bus) {
pci_free_resource_list(&resources);
kfree(sd);
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 05c5f76..dc5413d 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -693,6 +693,12 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
child->primary = primary;
child->subordinate = subordinate;
child->bridge_ctl = bctl;
+
+ child->busn_res.start = (pci_domain_nr(bus) << 8) | secondary;
+ child->busn_res.end = (pci_domain_nr(bus) << 8) | subordinate;
+ child->busn_res.flags = IORESOURCE_BUS | IORESOURCE_BUSY;
+ dev_printk(KERN_DEBUG, &child->dev, "busn_res: %06llx-%06llx inserted\n", (unsigned long long)child->busn_res.start, (unsigned long long)child->busn_res.end);
+ insert_resource(&bus->busn_res, &child->busn_res);
}

cmax = pci_scan_child_bus(child);
@@ -705,6 +711,11 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
* We need to assign a number to this bus which we always
* do in the second pass.
*/
+ resource_size_t n_size;
+ struct resource busn_res;
+ int ret = -ENOMEM;
+ int old_max = max;
+
if (!pass) {
if (pcibios_assign_all_busses() || broken)
/* Temporarily disable forwarding of the
@@ -721,20 +732,40 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
/* Clear errors */
pci_write_config_word(dev, PCI_STATUS, 0xffff);

- /* Prevent assigning a bus number that already exists.
- * This can happen when a bridge is hot-plugged, so in
- * this case we only re-scan this bus. */
- child = pci_find_bus(pci_domain_nr(bus), max+1);
- if (!child) {
- child = pci_add_new_bus(bus, dev, ++max);
- if (!child)
- goto out;
+ /* find bigest range in bus->busn_res that we can use */
+ n_size = resource_size(&bus->busn_res) - 1;
+ memset(&busn_res, 0, sizeof(struct resource));
+ dev_printk(KERN_DEBUG, &dev->dev, "try to find busn in busn_res: %06llx-%06llx \n", (unsigned long long)bus->busn_res.start, (unsigned long long)bus->busn_res.end);
+ while (n_size > 0) {
+ ret = allocate_resource(&bus->busn_res, &busn_res, n_size, bus->busn_res.start + 1, bus->busn_res.end, 1, NULL, NULL);
+ if (ret == 0)
+ break;
+ n_size--;
}
+
+ if (ret != 0)
+ goto out;
+
+ child = pci_add_new_bus(bus, dev, busn_res.start & 0xff);
+ if (!child)
+ goto out;
+
+ child->subordinate = busn_res.end & 0xff;
+
+ release_resource(&busn_res);
+ child->busn_res.start = busn_res.start;
+ child->busn_res.end = busn_res.end;
+ child->busn_res.flags = IORESOURCE_BUS | IORESOURCE_BUSY;
+ dev_printk(KERN_DEBUG, &child->dev, "busn_res: %06llx-%06llx inserted\n", child->busn_res.start, child->busn_res.end);
+ insert_resource(&bus->busn_res, &child->busn_res);
+
buses = (buses & 0xff000000)
| ((unsigned int)(child->primary) << 0)
| ((unsigned int)(child->secondary) << 8)
| ((unsigned int)(child->subordinate) << 16);

+ max = child->secondary;
+
/*
* yenta.c forces a secondary latency timer of 176.
* Copy that behaviour here.
@@ -802,6 +833,12 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
*/
child->subordinate = max;
pci_write_config_byte(dev, PCI_SUBORDINATE_BUS, max);
+ child->busn_res.end &= ~0xff;
+ child->busn_res.end |= max;
+ dev_printk(KERN_DEBUG, &child->dev, "busn_res: %06llx-%06llx adjusted\n", (unsigned long long)child->busn_res.start, (unsigned long long)child->busn_res.end);
+
+ if (old_max > max)
+ max = old_max;
}

sprintf(child->name,
@@ -1641,7 +1678,7 @@ err_out:
return NULL;
}

-struct pci_bus * __devinit pci_scan_root_bus(struct device *parent, int bus,
+struct pci_bus * __devinit pci_scan_root_bus(struct device *parent, int bus, int bus_max,
struct pci_ops *ops, void *sysdata, struct list_head *resources)
{
struct pci_bus *b;
@@ -1650,6 +1687,13 @@ struct pci_bus * __devinit pci_scan_root_bus(struct device *parent, int bus,
if (!b)
return NULL;

+ /* init busn_res */
+ b->busn_res.start = (pci_domain_nr(b) << 8) | bus;
+ b->busn_res.end = (pci_domain_nr(b) << 8) | bus_max;
+ b->busn_res.flags = IORESOURCE_BUS;
+
+ insert_resource(&iobusn_resource, &b->busn_res);
+
b->subordinate = pci_scan_child_bus(b);
pci_bus_add_devices(b);
return b;
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index ef3c18a..4ca0f74 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -68,6 +68,7 @@ void pci_remove_bus(struct pci_bus *pci_bus)

down_write(&pci_bus_sem);
list_del(&pci_bus->node);
+ release_resource(&pci_bus->busn_res);
up_write(&pci_bus_sem);
if (!pci_bus->is_added)
return;
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index e885ba2..6fe9e19 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -136,6 +136,7 @@ struct resource {
/* PC/ISA/whatever - the normal PC address spaces: IO and memory */
extern struct resource ioport_resource;
extern struct resource iomem_resource;
+extern struct resource iobusn_resource;

extern struct resource *request_resource_conflict(struct resource *root, struct resource *new);
extern int request_resource(struct resource *root, struct resource *new);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index d266085..edaac82 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -420,6 +420,7 @@ struct pci_bus {
struct list_head slots; /* list of slots on this bus */
struct resource *resource[PCI_BRIDGE_RESOURCE_NUM];
struct list_head resources; /* address space routed to this bus */
+ struct resource busn_res; /* track registered bus num range */

struct pci_ops *ops; /* configuration access functions */
void *sysdata; /* hook for sys-specific extension */
@@ -666,7 +667,8 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
struct pci_ops *ops, void *sysdata,
struct list_head *resources);
struct pci_bus * __devinit pci_scan_root_bus(struct device *parent, int bus,
- struct pci_ops *ops, void *sysdata,
+ int bus_max, struct pci_ops *ops,
+ void *sysdata,
struct list_head *resources);
struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev,
int busnr);
diff --git a/kernel/resource.c b/kernel/resource.c
index 7640b3a..53b42f0 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -38,6 +38,14 @@ struct resource iomem_resource = {
};
EXPORT_SYMBOL(iomem_resource);

+struct resource iobusn_resource = {
+ .name = "PCI busn",
+ .start = 0,
+ .end = 0xffffff,
+ .flags = IORESOURCE_BUS,
+};
+EXPORT_SYMBOL(iobusn_resource);
+
/* constraints to be met while allocating resources */
struct resource_constraint {
resource_size_t min, max, align;