[Patch Part1 v8 10/37] irq_remapping/vt-d: Enhance Intel IR driver to support hierarchy irqdomain

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Enhance Intel interrupt remapping driver to support hierarchy irqdomain,
it will simplify the code eventually. It also implements intel_ir_chip
to support stacked irq_chip.

Signed-off-by: Jiang Liu <jiang.liu@xxxxxxxxxxxxxxx>
Acked-by: Joerg Roedel <jroedel@xxxxxxx>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
Cc: Tony Luck <tony.luck@xxxxxxxxx>
Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
Cc: iommu@xxxxxxxxxxxxxxxxxxxxxxxxxx
Cc: Bjorn Helgaas <bhelgaas@xxxxxxxxxx>
Cc: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx>
Cc: Rafael J. Wysocki <rjw@xxxxxxxxxxxxx>
Cc: Randy Dunlap <rdunlap@xxxxxxxxxxxxx>
Cc: Yinghai Lu <yinghai@xxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: Joerg Roedel <joro@xxxxxxxxxx>
Cc: David Woodhouse <dwmw2@xxxxxxxxxxxxx>
Link: http://lkml.kernel.org/r/1416894816-23245-11-git-send-email-jiang.liu@xxxxxxxxxxxxxxx
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
---
 drivers/iommu/intel_irq_remapping.c |  337 ++++++++++++++++++++++++++++++++++-
 include/linux/intel-iommu.h         |    4 +
 2 files changed, 333 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index d3adaf32b3ec..f2dbb0d54d17 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -8,6 +8,7 @@
 #include <linux/irq.h>
 #include <linux/intel-iommu.h>
 #include <linux/acpi.h>
+#include <linux/irqdomain.h>
 #include <asm/io_apic.h>
 #include <asm/smp.h>
 #include <asm/cpu.h>
@@ -31,6 +32,14 @@ struct hpet_scope {
 	unsigned int devfn;
 };
 
+struct intel_ir_data {
+	struct irq_2_iommu			irq_2_iommu;
+	struct irte				irte_entry;
+	union {
+		struct msi_msg			msi_entry;
+	};
+};
+
 #define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
 #define IRTE_DEST(dest) ((eim_mode) ? dest : dest << 8)
 
@@ -50,6 +59,7 @@ static struct hpet_scope ir_hpet[MAX_HPET_TBS];
  * the dmar_global_lock.
  */
 static DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
+static struct irq_domain_ops intel_ir_domain_ops;
 
 static int __init parse_ioapics_under_ir(void);
 
@@ -263,7 +273,7 @@ static int free_irte(int irq)
 	unsigned long flags;
 	int rc;
 
-	if (!irq_iommu)
+	if (!irq_iommu || irq_iommu->iommu == NULL)
 		return -1;
 
 	raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
@@ -488,7 +498,6 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
 
 	pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO,
 				 INTR_REMAP_PAGE_ORDER);
-
 	if (!pages) {
 		pr_err("IR%d: failed to allocate pages of order %d\n",
 		       iommu->seq_id, INTR_REMAP_PAGE_ORDER);
@@ -502,11 +511,23 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
 		goto out_free_pages;
 	}
 
+	iommu->ir_domain = irq_domain_add_hierarchy(arch_get_ir_parent_domain(),
+						    0, INTR_REMAP_TABLE_ENTRIES,
+						    NULL, &intel_ir_domain_ops,
+						    iommu);
+	if (!iommu->ir_domain) {
+		pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id);
+		goto out_free_bitmap;
+	}
+	iommu->ir_msi_domain = arch_create_msi_irq_domain(iommu->ir_domain);
+
 	ir_table->base = page_address(pages);
 	ir_table->bitmap = bitmap;
 	iommu->ir_table = ir_table;
 	return 0;
 
+out_free_bitmap:
+	kfree(bitmap);
 out_free_pages:
 	__free_pages(pages, INTR_REMAP_PAGE_ORDER);
 out_free_table:
@@ -517,6 +538,14 @@ out_free_table:
 static void intel_teardown_irq_remapping(struct intel_iommu *iommu)
 {
 	if (iommu && iommu->ir_table) {
+		if (iommu->ir_msi_domain) {
+			irq_domain_remove(iommu->ir_msi_domain);
+			iommu->ir_msi_domain = NULL;
+		}
+		if (iommu->ir_domain) {
+			irq_domain_remove(iommu->ir_domain);
+			iommu->ir_domain = NULL;
+		}
 		free_pages((unsigned long)iommu->ir_table->base,
 			   INTR_REMAP_PAGE_ORDER);
 		kfree(iommu->ir_table->bitmap);
@@ -1062,12 +1091,6 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
 	struct irte irte;
 	int err;
 
-	if (!config_enabled(CONFIG_SMP))
-		return -EINVAL;
-
-	if (!cpumask_intersects(mask, cpu_online_mask))
-		return -EINVAL;
-
 	if (get_irte(irq, &irte))
 		return -EBUSY;
 
@@ -1100,6 +1123,7 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
 		send_cleanup_vector(cfg);
 
 	cpumask_copy(data->affinity, mask);
+
 	return 0;
 }
 
@@ -1205,6 +1229,53 @@ static int intel_alloc_hpet_msi(unsigned int irq, unsigned int id)
 	return ret;
 }
 
+static struct irq_domain *intel_get_ir_irq_domain(struct irq_alloc_info *info)
+{
+	struct intel_iommu *iommu = NULL;
+
+	if (!info)
+		return NULL;
+
+	switch (info->type) {
+	case X86_IRQ_ALLOC_TYPE_IOAPIC:
+		iommu = map_ioapic_to_ir(info->ioapic_id);
+		break;
+	case X86_IRQ_ALLOC_TYPE_HPET:
+		iommu = map_hpet_to_ir(info->hpet_id);
+		break;
+	case X86_IRQ_ALLOC_TYPE_MSI:
+	case X86_IRQ_ALLOC_TYPE_MSIX:
+		iommu = map_dev_to_ir(info->msi_dev);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+
+	return iommu ? iommu->ir_domain : NULL;
+}
+
+static struct irq_domain *intel_get_irq_domain(struct irq_alloc_info *info)
+{
+	struct intel_iommu *iommu;
+
+	if (!info)
+		return NULL;
+
+	switch (info->type) {
+	case X86_IRQ_ALLOC_TYPE_MSI:
+	case X86_IRQ_ALLOC_TYPE_MSIX:
+		iommu = map_dev_to_ir(info->msi_dev);
+		if (iommu)
+			return iommu->ir_msi_domain;
+		break;
+	default:
+		break;
+	}
+
+	return NULL;
+}
+
 struct irq_remap_ops intel_irq_remap_ops = {
 	.prepare		= intel_prepare_irq_remapping,
 	.enable			= intel_enable_irq_remapping,
@@ -1218,6 +1289,256 @@ struct irq_remap_ops intel_irq_remap_ops = {
 	.msi_alloc_irq		= intel_msi_alloc_irq,
 	.msi_setup_irq		= intel_msi_setup_irq,
 	.alloc_hpet_msi		= intel_alloc_hpet_msi,
+	.get_ir_irq_domain	= intel_get_ir_irq_domain,
+	.get_irq_domain		= intel_get_irq_domain,
+};
+
+/*
+ * Migrate the IO-APIC irq in the presence of intr-remapping.
+ *
+ * For both level and edge triggered, irq migration is a simple atomic
+ * update(of vector and cpu destination) of IRTE and flush the hardware cache.
+ *
+ * For level triggered, we eliminate the io-apic RTE modification (with the
+ * updated vector information), by using a virtual vector (io-apic pin number).
+ * Real vector that is used for interrupting cpu will be coming from
+ * the interrupt-remapping table entry.
+ *
+ * As the migration is a simple atomic update of IRTE, the same mechanism
+ * is used to migrate MSI irq's in the presence of interrupt-remapping.
+ */
+static int
+intel_ir_set_affinity(struct irq_data *data, const struct cpumask *mask,
+		      bool force)
+{
+	struct intel_ir_data *ir_data = data->chip_data;
+	struct irte *irte = &ir_data->irte_entry;
+	struct irq_cfg *cfg = irqd_cfg(data);
+	struct irq_data *parent = data->parent_data;
+	int ret;
+
+	ret = parent->chip->irq_set_affinity(parent, mask, force);
+	if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
+		return ret;
+
+	/*
+	 * Atomically updates the IRTE with the new destination, vector
+	 * and flushes the interrupt entry cache.
+	 */
+	irte->vector = cfg->vector;
+	irte->dest_id = IRTE_DEST(cfg->dest_apicid);
+	modify_irte(&ir_data->irq_2_iommu, irte);
+
+	/*
+	 * After this point, all the interrupts will start arriving
+	 * at the new destination. So, time to cleanup the previous
+	 * vector allocation.
+	 */
+	if (cfg->move_in_progress)
+		send_cleanup_vector(cfg);
+
+	return IRQ_SET_MASK_OK_DONE;
+}
+
+static void intel_ir_compose_msi_msg(struct irq_data *irq_data,
+				     struct msi_msg *msg)
+{
+	struct intel_ir_data *ir_data = irq_data->chip_data;
+
+	*msg = ir_data->msi_entry;
+}
+
+static struct irq_chip intel_ir_chip = {
+	.irq_ack = ir_ack_apic_edge,
+	.irq_set_affinity = intel_ir_set_affinity,
+	.irq_compose_msi_msg = intel_ir_compose_msi_msg,
+};
+
+static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data,
+					     struct irq_cfg *irq_cfg,
+					     struct irq_alloc_info *info,
+					     int index, int sub_handle)
+{
+	struct IR_IO_APIC_route_entry *entry;
+	struct irte *irte = &data->irte_entry;
+	struct msi_msg *msg = &data->msi_entry;
+
+	prepare_irte(irte, irq_cfg->vector, irq_cfg->dest_apicid);
+	switch (info->type) {
+	case X86_IRQ_ALLOC_TYPE_IOAPIC:
+		/* Set source-id of interrupt request */
+		set_ioapic_sid(irte, info->ioapic_id);
+		apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: Set IRTE entry (P:%d FPD:%d Dst_Mode:%d Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X Avail:%X Vector:%02X Dest:%08X SID:%04X SQ:%X SVT:%X)\n",
+			info->ioapic_id, irte->present, irte->fpd,
+			irte->dst_mode, irte->redir_hint,
+			irte->trigger_mode, irte->dlvry_mode,
+			irte->avail, irte->vector, irte->dest_id,
+			irte->sid, irte->sq, irte->svt);
+
+		entry = (struct IR_IO_APIC_route_entry *)info->ioapic_entry;
+		info->ioapic_entry = NULL;
+		memset(entry, 0, sizeof(*entry));
+		entry->index2	= (index >> 15) & 0x1;
+		entry->zero	= 0;
+		entry->format	= 1;
+		entry->index	= (index & 0x7fff);
+		/*
+		 * IO-APIC RTE will be configured with virtual vector.
+		 * irq handler will do the explicit EOI to the io-apic.
+		 */
+		entry->vector	= info->ioapic_pin;
+		entry->mask	= 0;			/* enable IRQ */
+		entry->trigger	= info->ioapic_trigger;
+		entry->polarity	= info->ioapic_polarity;
+		if (info->ioapic_trigger)
+			entry->mask = 1; /* Mask level triggered irqs. */
+		break;
+
+	case X86_IRQ_ALLOC_TYPE_HPET:
+	case X86_IRQ_ALLOC_TYPE_MSI:
+	case X86_IRQ_ALLOC_TYPE_MSIX:
+		if (info->type == X86_IRQ_ALLOC_TYPE_HPET)
+			set_hpet_sid(irte, info->hpet_id);
+		else
+			set_msi_sid(irte, info->msi_dev);
+
+		msg->address_hi = MSI_ADDR_BASE_HI;
+		msg->data = sub_handle;
+		msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
+				  MSI_ADDR_IR_SHV |
+				  MSI_ADDR_IR_INDEX1(index) |
+				  MSI_ADDR_IR_INDEX2(index);
+		break;
+
+	default:
+		BUG_ON(1);
+		break;
+	}
+}
+
+static void intel_free_irq_resources(struct irq_domain *domain,
+				     unsigned int virq, unsigned int nr_irqs)
+{
+	struct irq_data *irq_data;
+	struct intel_ir_data *data;
+	struct irq_2_iommu *irq_iommu;
+	unsigned long flags;
+	int i;
+
+	for (i = 0; i < nr_irqs; i++) {
+		irq_data = irq_domain_get_irq_data(domain, virq  + i);
+		if (irq_data && irq_data->chip_data) {
+			data = irq_data->chip_data;
+			irq_iommu = &data->irq_2_iommu;
+			raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
+			clear_entries(irq_iommu);
+			raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+			irq_domain_reset_irq_data(irq_data);
+			kfree(data);
+		}
+	}
+}
+
+static int intel_irq_remapping_alloc(struct irq_domain *domain,
+				     unsigned int virq, unsigned int nr_irqs,
+				     void *arg)
+{
+	struct intel_iommu *iommu = domain->host_data;
+	struct irq_alloc_info *info = arg;
+	struct intel_ir_data *data;
+	struct irq_data *irq_data;
+	struct irq_cfg *irq_cfg;
+	int i, ret, index;
+
+	if (!info || !iommu)
+		return -EINVAL;
+	if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_MSI &&
+	    info->type != X86_IRQ_ALLOC_TYPE_MSIX)
+		return -EINVAL;
+
+	/*
+	 * With IRQ remapping enabled, don't need contigious CPU vectors
+	 * to support multiple MSI interrupts.
+	 */
+	if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
+		info->flags &= ~X86_IRQ_ALLOC_CONTIGOUS_VECTORS;
+
+	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
+	if (ret < 0)
+		return ret;
+
+	ret = -ENOMEM;
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		goto out_free_parent;
+
+	down_read(&dmar_global_lock);
+	index = alloc_irte(iommu, virq, &data->irq_2_iommu, nr_irqs);
+	up_read(&dmar_global_lock);
+	if (index < 0) {
+		pr_warn("Failed to allocate IRTE\n");
+		kfree(data);
+		goto out_free_parent;
+	}
+
+	for (i = 0; i < nr_irqs; i++) {
+		irq_data = irq_domain_get_irq_data(domain, virq + i);
+		irq_cfg = irqd_cfg(irq_data);
+		if (!irq_data || !irq_cfg) {
+			ret = -EINVAL;
+			goto out_free_data;
+		}
+
+		if (i > 0) {
+			data = kzalloc(sizeof(*data), GFP_KERNEL);
+			if (!data)
+				goto out_free_data;
+		}
+		irq_data->hwirq = (index << 16) + i;
+		irq_data->chip_data = data;
+		irq_data->chip = &intel_ir_chip;
+		intel_irq_remapping_prepare_irte(data, irq_cfg, info, index, i);
+		irq_set_status_flags(virq + i, IRQ_MOVE_PCNTXT);
+	}
+	return 0;
+
+out_free_data:
+	intel_free_irq_resources(domain, virq, i);
+out_free_parent:
+	irq_domain_free_irqs_common(domain, virq, nr_irqs);
+	return ret;
+}
+
+static void intel_irq_remapping_free(struct irq_domain *domain,
+				     unsigned int virq, unsigned int nr_irqs)
+{
+	intel_free_irq_resources(domain, virq, nr_irqs);
+	irq_domain_free_irqs_common(domain, virq, nr_irqs);
+}
+
+static void intel_irq_remapping_activate(struct irq_domain *domain,
+					 struct irq_data *irq_data)
+{
+	struct intel_ir_data *data = irq_data->chip_data;
+
+	modify_irte(&data->irq_2_iommu, &data->irte_entry);
+}
+
+static void intel_irq_remapping_deactivate(struct irq_domain *domain,
+					   struct irq_data *irq_data)
+{
+	struct intel_ir_data *data = irq_data->chip_data;
+	struct irte entry;
+
+	memset(&entry, 0, sizeof(entry));
+	modify_irte(&data->irq_2_iommu, &entry);
+}
+
+static struct irq_domain_ops intel_ir_domain_ops = {
+	.alloc = intel_irq_remapping_alloc,
+	.free = intel_irq_remapping_free,
+	.activate = intel_irq_remapping_activate,
+	.deactivate = intel_irq_remapping_deactivate,
 };
 
 /*
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index a65208a8fe18..ecaf3a937845 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -286,6 +286,8 @@ struct q_inval {
 
 #define INTR_REMAP_TABLE_ENTRIES	65536
 
+struct irq_domain;
+
 struct ir_table {
 	struct irte *base;
 	unsigned long *bitmap;
@@ -335,6 +337,8 @@ struct intel_iommu {
 
 #ifdef CONFIG_IRQ_REMAP
 	struct ir_table *ir_table;	/* Interrupt remapping info */
+	struct irq_domain *ir_domain;
+	struct irq_domain *ir_msi_domain;
 #endif
 	struct device	*iommu_dev; /* IOMMU-sysfs device */
 	int		node;
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [DMA Engine]     [Linux Coverity]     [Linux USB]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Greybus]

  Powered by Linux