RE: [PATCH] pci/pciehp: Allow polling/irq mode to be decided on a per-port basis

From: Guenter Roeck
Date: Fri Apr 25 2014 - 13:37:55 EST


> -----Original Message-----
> From: Bjorn Helgaas [mailto:bhelgaas@xxxxxxxxxx]
> Sent: Friday, April 25, 2014 9:44 AM
> To: Guenter Roeck
> Cc: Rajat Jain; linux-pci@xxxxxxxxxxxxxxx; linux-
> kernel@xxxxxxxxxxxxxxx; Rajat Jain
> Subject: Re: [PATCH] pci/pciehp: Allow polling/irq mode to be decided
> on a per-port basis
>
> On Fri, Apr 25, 2014 at 10:34 AM, Guenter Roeck <groeck@xxxxxxxxxxx>
> wrote:
> >
> >
> >> -----Original Message-----
> >> From: Bjorn Helgaas [mailto:bhelgaas@xxxxxxxxxx]
> >> Sent: Thursday, April 24, 2014 2:31 PM
> >> To: Rajat Jain
> >> Cc: linux-pci@xxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx; Rajat
> >> Jain; Guenter Roeck
> >> Subject: Re: [PATCH] pci/pciehp: Allow polling/irq mode to be
> decided
> >> on a per-port basis
> >>
> >> On Mon, Mar 31, 2014 at 04:51:53PM -0700, Rajat Jain wrote:
> >> > Today, there is a global pciehp_poll_mode module parameter using
> >> which
> >> > either _all_ the hot-pluggable ports are to use polling, or _all_
> >> > the ports are to use interrupts.
> >> >
> >> > In a system where a certain port has IRQ issues, today the only
> >> option
> >> > is to use the parameter that converts ALL the ports to use polling
> >> mode.
> >> > This is not good, and hence this patch intruduces a bit field that
> >> can
> >> > be set using a PCI quirk that indicates that polling should always
> >> > be used for this particular PCIe port. The remaining ports can
> >> > still hoose to continue to operate in whatever mode they wish to.
> >> >
> >> > Signed-off-by: Rajat Jain <rajatxjain@xxxxxxxxx>
> >> > Signed-off-by: Rajat Jain <rajatjain@xxxxxxxxxxx>
> >> > Signed-off-by: Guenter Roeck <groeck@xxxxxxxxxxx>
> >>
> >> I'm willing to merge this, but I'd prefer to merge it along with a
> >> quirk that actually sets dev->hotplug_polling. Otherwise it's dead
> >> code and I'll have no way to tell whether we need to keep it.
> >>
> > Bjorn,
> >
> > what would be the proper location for such a quirk ?
> > We use it to help simulating hotplug support on an IDT PES12NT3.
> > The code is a bit more invasive than just the quirk itself, since it
> > also needs to touch link and slot status registers, so quirks.c
> > doesn't seem appropriate.
> >
> > drivers/pci/pes12nt3.c, maybe, with a separate configuration option ?
> > Or in the hotplug directory ?
>
> If this is only for debug, i.e., you don't intend to ship a product
> using this simulated hotplug, maybe you should just keep both the quirk
> and this patch out of tree.
>
> If you do want to eventually ship this code for some product, I think
> it'd be fine to put the quirk in drivers/pci/quirks.c, maybe with a
> config option to enable it. But without seeing the quirk, I can't
> really tell. A new file seems overkill unless it's something really
> huge -- I don't think we really have examples of dedicated files for
> other chip idiosyncrasies.
>

I'd give it a 50:50 probability that it will ship. Current plan is that
it is for development only, but I suspect that may change at some point.

I agree, this is kind of an outlier. If we push it upstream, it might
mostly serve as a reference for others who might have similar problems -
not just for the quirk itself, but as an example on how to intercept
and manipulate pci configuration register accesses.

I attached the file so you can have a look.

Guenter

/*
* PTX5000 SIB - PCI fixup code
*
* Rajat Jain <rajatjain@xxxxxxxxxxx>
* Copyright 2014 Juniper Networks
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License v2 as published by the
* Free Software Foundation
*/

#include <linux/list.h>
#include <linux/pci.h>
#include <linux/jnx/pci_ids.h>
#include <linux/spinlock.h>

#define IDT_PES12NT3_DLSTS 0x268
#define IDT_PES12NT3_DLSTS_DLFSM 0x7
#define IDT_PES12NT3_DLSTS_LINKACTIVE 0x4

struct pes12nt3_pci_data {
struct list_head list;
struct device *dev;
struct pci_ops ops;
struct pci_ops *old_ops;
bool lnksta_dllla;
bool sltsta_dllsc;
};

static LIST_HEAD(pes12nt3_list);
static DEFINE_SPINLOCK(pes12nt3_lock);

static int pes12nt3_update_linkstatus(struct pes12nt3_pci_data *data,
struct pci_bus *bus, unsigned int devfn)
{
u32 linkactive;
bool dllla;
int retval;

retval = data->old_ops->read(bus, devfn, IDT_PES12NT3_DLSTS,
4, &linkactive);
if (retval)
return retval;
if (linkactive == 0xffffffff)
dllla = false;
else
dllla = (linkactive & IDT_PES12NT3_DLSTS_DLFSM) ==
IDT_PES12NT3_DLSTS_LINKACTIVE;
if (dllla != data->lnksta_dllla)
data->sltsta_dllsc = true;
data->lnksta_dllla = dllla;
return 0;
}

static int pes12nt3_pci_read(struct pci_bus *bus, unsigned int devfn,
int where, int size, u32 *val)
{
struct pes12nt3_pci_data *data =
container_of(bus->ops, struct pes12nt3_pci_data, ops);
int retval, pos;

retval = data->old_ops->read(bus, devfn, where, size, val);

/* Only need to change the registers at port leading to TF chip */
if (retval || devfn != 0)
return retval;

retval = pes12nt3_update_linkstatus(data, bus, devfn);
if (retval)
return retval;

pos = where - 0x40;

/*
* PCI registers smaller than 32 bits may be read using
* different lengths at diferent offsets. Consider:
*
* +-----------------------------------+
* | Reg-1 | Reg-2 | Reg-3 | Reg-4 |
* +-----------------------------------+
* ^ ^ ^ ^
* ptr ptr+1 ptr+2 ptr+3
*
* Reg-4 may be read by using a:
* 1 byte read at (ptr+3)
* 2 byte read at (ptr+2)
* 4 byte read at (ptr)
* etc
*
* We need to take of this here.
*/

switch (size) {
case 4:
switch (pos) {
case 0:
if (*val == 0xffffffff)
*val = 0;
else
*val |= (PCI_EXP_FLAGS_SLOT << 16);
break;
case PCI_EXP_LNKCAP:
if (*val == 0xffffffff)
*val = 0;
else
*val |= PCI_EXP_LNKCAP_DLLLARC;
break;
case PCI_EXP_SLTCAP:
if (*val == 0xffffffff) {
*val = 0;
} else {
*val |= PCI_EXP_SLTCAP_HPC;
*val = (*val & ~PCI_EXP_SLTCAP_PSN) |
(bus->number << 19);
}
break;
case PCI_EXP_LNKCTL:
if (*val == 0xffffffff) {
*val = 0;
} else {
if (data->lnksta_dllla)
*val |= PCI_EXP_LNKSTA_DLLLA << 16;
else
*val &= ~(PCI_EXP_LNKSTA_DLLLA << 16);
}
break;
}
break;

case 2:
switch (pos) {
case PCI_EXP_FLAGS_SLOT:
if (*val == 0xffff)
*val = 0;
else
*val |= PCI_EXP_FLAGS_SLOT;
break;
case PCI_EXP_LNKSTA:
if (*val == 0xffff) {
*val = 0;
} else {
if (data->lnksta_dllla)
*val |= PCI_EXP_LNKSTA_DLLLA;
else
*val &= ~PCI_EXP_LNKSTA_DLLLA;
}
break;
case PCI_EXP_SLTSTA:
if (*val == 0xffff)
*val = PCI_EXP_SLTSTA_DLLSC;
else if (data->sltsta_dllsc)
*val |= PCI_EXP_SLTSTA_DLLSC;
break;
}
break;
}
return 0;
}

static int pes12nt3_pci_write(struct pci_bus *bus, unsigned int devfn,
int where, int size, u32 val)
{
struct pes12nt3_pci_data *data =
container_of(bus->ops, struct pes12nt3_pci_data, ops);
int pos = where - 0x40;
int retval;

if (devfn == 0 && size == 2) {
switch (pos) {
case PCI_EXP_SLTSTA:
if (val & PCI_EXP_SLTSTA_DLLSC)
data->sltsta_dllsc = false;
break;
}
}

retval = data->old_ops->write(bus, devfn, where, size, val);
if (retval || devfn != 0)
return retval;

/* Catch situations where the link status changed after being handled */
return pes12nt3_update_linkstatus(data, bus, devfn);
}

static void pes12nt3_fake_linkstate_hotplug(struct pci_dev *dev)
{
struct pes12nt3_pci_data *data;

if (pci_pcie_type(dev) == PCI_EXP_TYPE_UPSTREAM) {
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (data == NULL)
return;
data->ops.read = pes12nt3_pci_read;
data->ops.write = pes12nt3_pci_write;
data->old_ops = pci_bus_set_ops(dev->subordinate, &data->ops);
data->dev = &dev->dev;
spin_lock(&pes12nt3_lock);
list_add(&data->list, &pes12nt3_list);
spin_unlock(&pes12nt3_lock);
} else if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM) {
dev->hotplug_polling = 1; /* No IRQ support */
dev->pcie_flags_reg |= PCI_EXP_FLAGS_SLOT;
}
}
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_IDT, PCI_DEVICE_ID_IDT_PES12NT3_TRANS_AB,
pes12nt3_fake_linkstate_hotplug);

static void pes12nt3_cleanup_entry(struct device *dev)
{
struct pes12nt3_pci_data *data, *tmp;

spin_lock(&pes12nt3_lock);
list_for_each_entry_safe(data, tmp, &pes12nt3_list, list) {
if (data->dev == dev) {
list_del(&data->list);
kfree(data);
break;
}
}
spin_unlock(&pes12nt3_lock);
}

static int pes12nt3_notifier_call(struct notifier_block *n,
unsigned long action, void *dev)
{
if (action == BUS_NOTIFY_DEL_DEVICE)
pes12nt3_cleanup_entry(dev);

return NOTIFY_DONE;
}

static struct notifier_block pes12nt3_nb = {
.notifier_call = pes12nt3_notifier_call,
};

static int __init pes12nt3_init(void)
{
bus_register_notifier(&pci_bus_type, &pes12nt3_nb);
return 0;
}

subsys_initcall(pes12nt3_init);