Re: [RFC][PATCH] Add sysfs entry that displays MSI-X IRQs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Here's the new patch. It creates an "msix" directory in /sys/bus/pci/devices/.../ and populates it with a file for each MSI-X
vector used by the device. The contents of the file is the vector.

Exposing the vectors to userspace allows applications like
irqbalancer to intelligently spread device IRQs across many CPUs
since it will be able to work out which IRQ belongs to which device.

Tested on x86_64 with Mellanox Infinihost III and Connect-X HCAs and
Myricom 10GE NIC on ia64.


Thanks.


Signed-off-by: Vincent Rizza <vinnie@xxxxxxx>
Signed-off-by: Brett Grandbois <brettg@xxxxxxx>
Signed-off-by: Greg Banks <gnb@xxxxxxx> ---
Documentation/ABI/testing/sysfs-bus-pci |   13 +++++++
drivers/pci/msi.c                       |   61 +++++++++++++++++++++++++++++++
include/linux/msi.h                     |    5 +++
include/linux/pci.h                     |    1 +
4 files changed, 80 insertions(+), 0 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index ceddcff..5b3203d 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -9,3 +9,16 @@ Description:
		that some devices may have malformatted data.  If the
		underlying VPD has a writable section then the
		corresponding section of this file will be writable.
+
+What:		/sys/bus/pci/devices/.../msix/msix*
+Date:		November 2008
+Contact:	Vincent Rizza <vinnie@xxxxxxx>
+Description:
+		If a pci device uses any MSI-X IRQs a new directory
+		is created called "msix". The directory contains a
+		file for each MSI-X IRQ used and goes by the name
+		"msix" followed by four digits. eg. "msix0000". The
+		number increments per MSI-X IRQ the device is using,
+		so the next file would be called "msix0001" and so on.
+		The four digits are enough to account for the highest
+		possible amount of MSI-X IRQs a device can use, 2048.
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 74801f7..fd0af43 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -25,6 +25,19 @@

static int pci_msi_enable = 1;

+static ssize_t
+pci_read_msix(struct kobject *kobj, struct bin_attribute *bin_attr,
+	      char *buf, loff_t off, size_t count)
+{
+	struct msi_desc *entry = container_of(bin_attr, struct msi_desc,
+					       sysfs_entry);
+	/* EOF */
+	if (count == 0)
+		return 0;
+
+	return sprintf(buf, "%u\n", entry->irq);
+}
+
/* Arch hooks */

int __attribute__ ((weak))
@@ -427,6 +440,8 @@ static int msix_capability_init(struct pci_dev *dev,
				struct msix_entry *entries, int nvec)
{
	struct msi_desc *entry;
+	const char *msix_str = "msix";
+	int fsize = 0;
	int pos, i, j, nr_entries, ret;
	unsigned long phys_addr;
	u32 table_offset;
@@ -488,12 +503,41 @@ static int msix_capability_init(struct pci_dev *dev,
		return avail;
	}

+	/* create directory "msix" */
+	dev->msix_dir = kobject_create_and_add(msix_str, &dev->dev.kobj);
+	if (!dev->msix_dir)
+		return -ENOMEM;
+
	i = 0;
	list_for_each_entry(entry, &dev->msi_list, list) {
		entries[i].vector = entry->irq;
		set_irq_msi(entry->irq, entry);
+
+		/*
+		 *  Maximum MSI-X vectors is 2048. Filename will start at
+		 *  "msix0000" and can go up to "msix2047"
+		 */
+		snprintf(entry->msix_fname, sizeof(entry->msix_fname), "%s%04d",
+			 msix_str, i);
+		entry->sysfs_entry.attr.name = (const char *) entry->msix_fname;
+
+		/* Filesize = number of digits in irq plus newline */
+		fsize = snprintf(NULL, 0, "%d", entry->irq) + 1;
+		entry->sysfs_entry.size = fsize;
+		entry->sysfs_entry.attr.mode = S_IRUGO;
+		entry->sysfs_entry.read = pci_read_msix;
+
+		ret = sysfs_create_bin_file(dev->msix_dir,
+					    &entry->sysfs_entry);
+		if (ret) {
+			/* Using name as a flag during clean-up */
+			entry->sysfs_entry.attr.name = NULL;
+			msi_free_irqs(dev);
+			return ret;
+		}
		i++;
	}
+
	/* Set MSI-X enabled bits */
	pci_intx_for_msi(dev, 0);
	msix_set_enable(dev, 1);
@@ -641,10 +685,27 @@ static int msi_free_irqs(struct pci_dev* dev)
			if (list_is_last(&entry->list, &dev->msi_list))
				iounmap(entry->mask_base);
		}
+
+		/*
+		 *  Only remove if it's been created. If the attribute's name is
+		 *  set then this particular "entry" corresponds to a sysfs
+		 *  file.
+		 */
+		if (entry->sysfs_entry.attr.name)
+			sysfs_remove_bin_file(&dev->dev.kobj,
+					      &entry->sysfs_entry);
+
		list_del(&entry->list);
		kfree(entry);
	}

+	/* Also doesn't exists in MSI mode */
+	if (dev->msix_dir) {
+		sysfs_remove_dir(dev->msix_dir);
+		kobject_put(dev->msix_dir);
+		dev->msix_dir = NULL;
+	}
+
	return 0;
}

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 8f29392..937639f 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -2,6 +2,7 @@
#define LINUX_MSI_H

#include <linux/list.h>
+#include <linux/kobject.h>

struct msi_msg {
	u32	address_lo;	/* low 32 bits of msi message address */
@@ -33,6 +34,10 @@ struct msi_desc {
	void __iomem *mask_base;
	struct pci_dev *dev;

+	struct	bin_attribute sysfs_entry;
+	/* filename = "msixXXXX" */
+	char	msix_fname[9];
+
	/* Last set MSI message */
	struct msi_msg msg;
};
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c75b82b..6b818f7 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -234,6 +234,7 @@ struct pci_dev {
	struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
#ifdef CONFIG_PCI_MSI
	struct list_head msi_list;
+	struct kobject *msix_dir;
#endif
	struct pci_vpd *vpd;
};
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [DMA Engine]     [Linux Coverity]     [Linux USB]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Greybus]

  Powered by Linux