[RFC V1 RESEND 5/6] PCI/MSI: Free MSI-X resources by group

From: Megha Dey
Date: Fri Jun 21 2019 - 19:57:48 EST


Currently, the pci_free_irq_vectors() frees all the allocated resources
associated with a PCIe device when the device is being shut down. With
the introduction of dynamic allocation of MSI-X vectors by group ID,
there should exist an API which can free the resources allocated only
to a particular group, which can be called even if the device is not
being shut down. The pci_free_irq_vectors_grp() function provides this
type of interface.

The existing pci_free_irq_vectors() can be called along side this API.

Cc: Jacob Pan <jacob.jun.pan@xxxxxxxxxxxxxxx>
Cc: Ashok Raj <ashok.raj@xxxxxxxxx>
Signed-off-by: Megha Dey <megha.dey@xxxxxxxxxxxxxxx>
---
drivers/pci/msi.c | 130 ++++++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/msi.h | 2 +
include/linux/pci.h | 9 ++++
kernel/irq/msi.c | 26 +++++++++++
4 files changed, 167 insertions(+)

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index e947243..342e267 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -53,9 +53,23 @@ static void pci_msi_teardown_msi_irqs(struct pci_dev *dev)
else
arch_teardown_msi_irqs(dev);
}
+
+static void pci_msi_teardown_msi_irqs_grp(struct pci_dev *dev, int group_id)
+{
+ struct irq_domain *domain;
+
+ domain = dev_get_msi_domain(&dev->dev);
+
+ if (domain && irq_domain_is_hierarchy(domain))
+ msi_domain_free_irqs_grp(domain, &dev->dev, group_id);
+ else
+ arch_teardown_msi_irqs_grp(dev, group_id);
+}
+
#else
#define pci_msi_setup_msi_irqs arch_setup_msi_irqs
#define pci_msi_teardown_msi_irqs arch_teardown_msi_irqs
+#define pci_msi_teardown_msi_irqs_grp default_teardown_msi_irqs_grp
#endif

/* Arch hooks */
@@ -373,6 +387,7 @@ static void free_msi_irqs(struct pci_dev *dev)

list_for_each_entry_safe(entry, tmp, msi_list, list) {
if (entry->msi_attrib.is_msix) {
+ clear_bit(entry->msi_attrib.entry_nr, dev->entry);
if (list_is_last(&entry->list, msi_list))
iounmap(entry->mask_base);
}
@@ -381,6 +396,8 @@ static void free_msi_irqs(struct pci_dev *dev)
free_msi_entry(entry);
}

+ idr_destroy(dev->grp_idr);
+
if (dev->msi_irq_groups) {
sysfs_remove_groups(&dev->dev.kobj, dev->msi_irq_groups);
msi_attrs = dev->msi_irq_groups[0]->attrs;
@@ -398,6 +415,60 @@ static void free_msi_irqs(struct pci_dev *dev)
}
}

+static const char msix_sysfs_grp[] = "msi_irqs";
+
+static int free_msi_irqs_grp(struct pci_dev *dev, int group_id)
+{
+ struct list_head *msi_list = dev_to_msi_list(&dev->dev);
+ struct msi_desc *entry, *tmp;
+ struct attribute **msi_attrs;
+ struct device_attribute *dev_attr;
+ int i;
+ long vec;
+ struct msix_sysfs *msix_sysfs_entry, *tmp_msix;
+ struct list_head *pci_msix = &dev->msix_sysfs;
+ int num_vec = 0;
+
+ for_each_pci_msi_entry(entry, dev) {
+ if (entry->group_id == group_id && entry->irq)
+ for (i = 0; i < entry->nvec_used; i++)
+ BUG_ON(irq_has_action(entry->irq + i));
+ }
+
+ pci_msi_teardown_msi_irqs_grp(dev, group_id);
+
+ list_for_each_entry_safe(entry, tmp, msi_list, list) {
+ if (entry->group_id == group_id) {
+ clear_bit(entry->msi_attrib.entry_nr, dev->entry);
+ list_del(&entry->list);
+ free_msi_entry(entry);
+ }
+ }
+
+ list_for_each_entry_safe(msix_sysfs_entry, tmp_msix, pci_msix, list) {
+ if (msix_sysfs_entry->group_id == group_id) {
+ msi_attrs = msix_sysfs_entry->msi_irq_group->attrs;
+ for (i = 0; i < msix_sysfs_entry->vecs_in_grp; i++) {
+ if (!i)
+ num_vec = msix_sysfs_entry->vecs_in_grp;
+ dev_attr = container_of(msi_attrs[i],
+ struct device_attribute, attr);
+ sysfs_remove_file_from_group(&dev->dev.kobj,
+ &dev_attr->attr, msix_sysfs_grp);
+ if (kstrtol(dev_attr->attr.name, 10, &vec))
+ return -EINVAL;
+ kfree(dev_attr->attr.name);
+ kfree(dev_attr);
+ }
+ msix_sysfs_entry->msi_irq_group = NULL;
+ list_del(&msix_sysfs_entry->list);
+ idr_remove(dev->grp_idr, group_id);
+ kfree(msix_sysfs_entry);
+ }
+ }
+ return num_vec;
+}
+
static void pci_intx_for_msi(struct pci_dev *dev, int enable)
{
if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG))
@@ -1052,6 +1123,45 @@ void pci_disable_msix(struct pci_dev *dev)
}
EXPORT_SYMBOL(pci_disable_msix);

+static void pci_msix_shutdown_grp(struct pci_dev *dev, int group_id)
+{
+ struct msi_desc *entry;
+ int grp_present = 0;
+
+ if (pci_dev_is_disconnected(dev)) {
+ dev->msix_enabled = 0;
+ return;
+ }
+
+ /* Return the device with MSI-X masked as initial states */
+ for_each_pci_msi_entry(entry, dev) {
+ if (entry->group_id == group_id) {
+ /* Keep cached states to be restored */
+ __pci_msix_desc_mask_irq(entry, 1);
+ grp_present = 1;
+ }
+ }
+
+ if (!grp_present) {
+ pci_err(dev, "Group to be disabled not present\n");
+ return;
+ }
+}
+
+int pci_disable_msix_grp(struct pci_dev *dev, int group_id)
+{
+ int num_vecs;
+
+ if (!pci_msi_enable || !dev)
+ return -EINVAL;
+
+ pci_msix_shutdown_grp(dev, group_id);
+ num_vecs = free_msi_irqs_grp(dev, group_id);
+
+ return num_vecs;
+}
+EXPORT_SYMBOL(pci_disable_msix_grp);
+
void pci_no_msi(void)
{
pci_msi_enable = 0;
@@ -1356,6 +1466,26 @@ void pci_free_irq_vectors(struct pci_dev *dev)
EXPORT_SYMBOL(pci_free_irq_vectors);

/**
+ * pci_free_irq_vectors_grp - free previously allocated IRQs for a
+ * device associated with a group
+ * @dev: PCI device to operate on
+ * @group_id: group to be freed
+ *
+ * Undoes the allocations and enabling in pci_alloc_irq_vectors_dyn().
+ * Can be only called for MSIx vectors.
+ */
+int pci_free_irq_vectors_grp(struct pci_dev *dev, int group_id)
+{
+ if (group_id < 0) {
+ pci_err(dev, "Group should be > 0\n");
+ return -EINVAL;
+ }
+
+ return pci_disable_msix_grp(dev, group_id);
+}
+EXPORT_SYMBOL(pci_free_irq_vectors_grp);
+
+/**
* pci_irq_vector - return Linux IRQ number of a device vector
* @dev: PCI device to operate on
* @nr: device-relative interrupt vector index (0-based).
diff --git a/include/linux/msi.h b/include/linux/msi.h
index e61ba24..78929ad 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -333,6 +333,8 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode,
int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
int nvec);
void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev);
+void msi_domain_free_irqs_grp(struct irq_domain *domain, struct device *dev,
+ int group_id);
struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain);

struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode,
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 73385c0..944e539 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1404,6 +1404,7 @@ int pci_msi_vec_count(struct pci_dev *dev);
void pci_disable_msi(struct pci_dev *dev);
int pci_msix_vec_count(struct pci_dev *dev);
void pci_disable_msix(struct pci_dev *dev);
+int pci_disable_msix_grp(struct pci_dev *dev, int group_id);
void pci_restore_msi_state(struct pci_dev *dev);
int pci_msi_enabled(void);
int pci_enable_msi(struct pci_dev *dev);
@@ -1428,6 +1429,7 @@ int pci_alloc_irq_vectors_affinity_dyn(struct pci_dev *dev,
int *group_id, bool one_shot);

void pci_free_irq_vectors(struct pci_dev *dev);
+int pci_free_irq_vectors_grp(struct pci_dev *dev, int group_id);
int pci_irq_vector(struct pci_dev *dev, unsigned int nr);
int pci_irq_vector_group(struct pci_dev *dev, unsigned int nr,
unsigned int group_id);
@@ -1439,6 +1441,8 @@ static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; }
static inline void pci_disable_msi(struct pci_dev *dev) { }
static inline int pci_msix_vec_count(struct pci_dev *dev) { return -ENOSYS; }
static inline void pci_disable_msix(struct pci_dev *dev) { }
+static inline int pci_disable_msix_grp(struct pci_dev *dev, int group_id)
+ { return -ENOSYS; }
static inline void pci_restore_msi_state(struct pci_dev *dev) { }
static inline int pci_msi_enabled(void) { return 0; }
static inline int pci_enable_msi(struct pci_dev *dev)
@@ -1475,6 +1479,11 @@ static inline void pci_free_irq_vectors(struct pci_dev *dev)
{
}

+static inline void pci_free_irq_vectors_grp(struct pci_dev *dev, int group_id)
+{
+ return 0;
+}
+
static inline int pci_irq_vector(struct pci_dev *dev, unsigned int nr)
{
if (WARN_ON_ONCE(nr > 0))
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 5cfa931..d73a5dc 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -511,6 +511,32 @@ void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev)
}

/**
+ * msi_domain_free_irqs_grp - Free interrupts belonging to a group from
+ * a MSI interrupt @domain associated to @dev
+ * @domain: The domain to managing the interrupts
+ * @dev: Pointer to device struct of the device for which the interrupt
+ * should be freed
+ * @group_id: The group ID to be freed
+ */
+void msi_domain_free_irqs_grp(struct irq_domain *domain, struct device *dev,
+ int group_id)
+{
+ struct msi_desc *desc;
+
+ for_each_msi_entry(desc, dev) {
+ /*
+ * We might have failed to allocate an MSI early
+ * enough that there is no IRQ associated to this
+ * entry. If that's the case, don't do anything.
+ */
+ if (desc->group_id == group_id && desc->irq) {
+ irq_domain_free_irqs(desc->irq, desc->nvec_used);
+ desc->irq = 0;
+ }
+ }
+}
+
+/**
* msi_get_domain_info - Get the MSI interrupt domain info for @domain
* @domain: The interrupt domain to retrieve data from
*
--
2.7.4