+ genirq-x86_64-irq-make-vector_irq-per-cpu.patch added to -mm tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The patch titled

     genirq: x86_64 irq: make vector_irq per cpu

has been added to the -mm tree.  Its filename is

     genirq-x86_64-irq-make-vector_irq-per-cpu.patch

See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find
out what to do about this

------------------------------------------------------
Subject: genirq: x86_64 irq: make vector_irq per cpu
From: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx>


This refactors the irq handling code to make the vectors a per cpu resource so
the same vector number can be simultaneously used on multiple cpus for
different irqs.

This should make systems that were hitting limits on the total number of irqs
much more livable.

Signed-off-by: Eric W. Biederman <ebiederm@xxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx>
Cc: Rajesh Shah <rajesh.shah@xxxxxxxxx>
Cc: Andi Kleen <ak@xxxxxx>
Cc: "Protasevich, Natalie" <Natalie.Protasevich@xxxxxxxxxx>
Cc: "Luck, Tony" <tony.luck@xxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxx>
---

 arch/x86_64/kernel/i8259.c   |    4 
 arch/x86_64/kernel/io_apic.c |  181 ++++++++++++++++++++++++---------
 arch/x86_64/kernel/irq.c     |    2 
 include/asm-x86_64/hw_irq.h  |    5 
 include/asm-x86_64/irq.h     |    4 
 5 files changed, 143 insertions(+), 53 deletions(-)

diff -puN arch/x86_64/kernel/i8259.c~genirq-x86_64-irq-make-vector_irq-per-cpu arch/x86_64/kernel/i8259.c
--- a/arch/x86_64/kernel/i8259.c~genirq-x86_64-irq-make-vector_irq-per-cpu
+++ a/arch/x86_64/kernel/i8259.c
@@ -394,7 +394,7 @@ device_initcall(i8259A_init_sysfs);
  */
 
 static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL};
-int vector_irq[NR_VECTORS] __read_mostly = {
+DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
 	[0 ... FIRST_EXTERNAL_VECTOR - 1] = -1,
 	[FIRST_EXTERNAL_VECTOR + 0] = 0,
 	[FIRST_EXTERNAL_VECTOR + 1] = 1,
@@ -522,7 +522,7 @@ void __init init_IRQ(void)
 	 * IRQ0 must be given a fixed assignment and initialized,
 	 * because it's used before the IO-APIC is set up.
 	 */
-	vector_irq[FIRST_DEVICE_VECTOR] = 0;
+	__get_cpu_var(vector_irq)[FIRST_DEVICE_VECTOR] = 0;
 
 	/*
 	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
diff -puN arch/x86_64/kernel/io_apic.c~genirq-x86_64-irq-make-vector_irq-per-cpu arch/x86_64/kernel/io_apic.c
--- a/arch/x86_64/kernel/io_apic.c~genirq-x86_64-irq-make-vector_irq-per-cpu
+++ a/arch/x86_64/kernel/io_apic.c
@@ -44,7 +44,7 @@
 #include <asm/nmi.h>
 #include <asm/msidef.h>
 
-static int assign_irq_vector(int irq);
+static int assign_irq_vector(int irq, cpumask_t mask);
 
 #define __apicdebuginit  __init
 
@@ -107,12 +107,36 @@ static struct irq_pin_list {
 	FINAL;								\
 }
 
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
+{
+	int apic, pin;
+	struct irq_pin_list *entry = irq_2_pin + irq;
+
+	BUG_ON(irq >= NR_IRQS);
+	for (;;) {
+		unsigned int reg;
+		apic = entry->apic;
+		pin = entry->pin;
+		if (pin == -1)
+			break;
+		io_apic_write(apic, 0x11 + pin*2, dest);
+		reg = io_apic_read(apic, 0x10 + pin*2);
+		reg &= ~0x000000ff;
+		reg |= vector;
+		io_apic_modify(apic, reg);
+		if (!entry->next)
+			break;
+		entry = irq_2_pin + entry->next;
+	}
+}
+
 #ifdef CONFIG_SMP
 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 {
 	unsigned long flags;
 	unsigned int dest;
 	cpumask_t tmp;
+	int vector;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -120,7 +144,13 @@ static void set_ioapic_affinity_irq(unsi
 
 	cpus_and(mask, tmp, CPU_MASK_ALL);
 
-	dest = cpu_mask_to_apicid(mask);
+	vector = assign_irq_vector(irq, mask);
+	if (vector < 0)
+		return;
+
+	cpus_clear(tmp);
+	cpu_set(vector >> 8, tmp);
+	dest = cpu_mask_to_apicid(tmp);
 
 	/*
 	 * Only the high 8 bits are valid.
@@ -128,7 +158,7 @@ static void set_ioapic_affinity_irq(unsi
 	dest = SET_APIC_LOGICAL_ID(dest);
 
 	spin_lock_irqsave(&ioapic_lock, flags);
-	__DO_ACTION(1, = dest, )
+	__target_IO_APIC_irq(irq, dest, vector & 0xff);
 	set_native_irq_info(irq, mask);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
@@ -715,7 +745,7 @@ int gsi_irq_sharing(int gsi)
 
 	tries = NR_IRQS;
   try_again:
-	vector = assign_irq_vector(gsi);
+	vector = assign_irq_vector(gsi, TARGET_CPUS);
 
 	/*
 	 * Sharing vectors means sharing IRQs, so scan irq_vectors for previous
@@ -826,45 +856,77 @@ static inline int IO_APIC_irq_trigger(in
 }
 
 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
-u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
+unsigned int irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_EXTERNAL_VECTOR, 0 };
 
-static int __assign_irq_vector(int irq)
+static int __assign_irq_vector(int irq, cpumask_t mask)
 {
-	static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
-	int vector;
+	/*
+	 * NOTE! The local APIC isn't very good at handling
+	 * multiple interrupts at the same interrupt level.
+	 * As the interrupt level is determined by taking the
+	 * vector number and shifting that right by 4, we
+	 * want to spread these out a bit so that they don't
+	 * all fall in the same interrupt level.
+	 *
+	 * Also, we've got to be careful not to trash gate
+	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
+	 */
+	static struct {
+		int vector;
+		int offset;
+	} pos[NR_CPUS] = { [ 0 ... NR_CPUS - 1] = {FIRST_DEVICE_VECTOR, 0} };
+	int old_vector = -1;
+	int cpu;
 
 	BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
 
-	if (IO_APIC_VECTOR(irq) > 0) {
-		return IO_APIC_VECTOR(irq);
+	if (IO_APIC_VECTOR(irq) > 0)
+		old_vector = IO_APIC_VECTOR(irq);
+	if ((old_vector > 0) && cpu_isset(old_vector >> 8, mask)) {
+		return old_vector;
 	}
+
+	for_each_cpu_mask(cpu, mask) {
+		int vector, offset;
+		vector = pos[cpu].vector;
+		offset = pos[cpu].offset;
 next:
-	current_vector += 8;
-	if (current_vector == IA32_SYSCALL_VECTOR)
-		goto next;
-
-	if (current_vector >= FIRST_SYSTEM_VECTOR) {
-		/* If we run out of vectors on large boxen, must share them. */
-		offset = (offset + 1) % 8;
-		current_vector = FIRST_DEVICE_VECTOR + offset;
+		vector += 8;
+		if (vector >= FIRST_SYSTEM_VECTOR) {
+			/* If we run out of vectors on large boxen, must share them. */
+			offset = (offset + 1) % 8;
+			vector = FIRST_DEVICE_VECTOR + offset;
+		}
+		if (unlikely(pos[cpu].vector == vector))
+			continue;
+		if (vector == IA32_SYSCALL_VECTOR)
+			goto next;
+		if (per_cpu(vector_irq, cpu)[vector] != -1)
+			goto next;
+		/* Found one! */
+		pos[cpu].vector = vector;
+		pos[cpu].offset = offset;
+		if (old_vector >= 0) {
+			int old_cpu = old_vector >> 8;
+			old_vector &= 0xff;
+			per_cpu(vector_irq, old_cpu)[old_vector] = -1;
+		}
+		per_cpu(vector_irq, cpu)[vector] = irq;
+		vector |= cpu << 8;
+		IO_APIC_VECTOR(irq) = vector;
+		return vector;
 	}
-
-	vector = current_vector;
-	vector_irq[vector] = irq;
-	IO_APIC_VECTOR(irq) = vector;
-
-	return vector;
+	return -ENOSPC;
 }
 
-static int assign_irq_vector(int irq)
+static int assign_irq_vector(int irq, cpumask_t mask)
 {
 	int vector;
 	unsigned long flags;
 
 	spin_lock_irqsave(&vector_lock, flags);
-	vector = __assign_irq_vector(irq);
+	vector = __assign_irq_vector(irq, mask);
 	spin_unlock_irqrestore(&vector_lock, flags);
-
 	return vector;
 }
 
@@ -934,8 +996,15 @@ static void __init setup_IO_APIC_irqs(vo
 			continue;
 
 		if (IO_APIC_IRQ(irq)) {
-			vector = assign_irq_vector(irq);
-			entry.vector = vector;
+			cpumask_t mask;
+			vector = assign_irq_vector(irq, TARGET_CPUS);
+			if (vector < 0)
+				continue;
+
+			cpus_clear(mask);
+			cpu_set(vector >> 8, mask);
+			entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
+			entry.vector = vector & 0xff;
 
 			ioapic_register_intr(irq, vector, IOAPIC_AUTO);
 			if (!apic && (irq < 16))
@@ -1514,7 +1583,14 @@ static unsigned int startup_ioapic_irq(u
 
 static int ioapic_retrigger_irq(unsigned int irq)
 {
-	send_IPI_self(IO_APIC_VECTOR(irq));
+	cpumask_t mask;
+	unsigned vector;
+
+	vector = irq_vector[irq];
+	cpus_clear(mask);
+	cpu_set(vector >> 8, mask);
+
+	send_IPI_mask(mask, vector & 0xff);
 
 	return 1;
 }
@@ -1726,7 +1802,7 @@ static inline void check_timer(void)
 	 * get/set the timer IRQ vector:
 	 */
 	disable_8259A_irq(0);
-	vector = assign_irq_vector(0);
+	vector = assign_irq_vector(0, TARGET_CPUS);
 
 	/*
 	 * Subtle, code in do_timer_interrupt() expects an AEOI
@@ -1979,7 +2055,7 @@ int create_irq(void)
 			continue;
 		if (irq_vector[new] != 0)
 			continue;
-		vector = __assign_irq_vector(new);
+		vector = __assign_irq_vector(new, TARGET_CPUS);
 		if (likely(vector > 0))
 			irq = new;
 		break;
@@ -2015,12 +2091,12 @@ static int msi_msg_setup(struct pci_dev 
 	int vector;
 	unsigned dest;
 
-	vector = assign_irq_vector(irq);
+	vector = assign_irq_vector(irq, TARGET_CPUS);
 	if (vector >= 0) {
 		cpumask_t tmp;
 
 		cpus_clear(tmp);
-		cpu_set(first_cpu(cpu_online_map), tmp);
+		cpu_set(vector >> 8, tmp);
 		dest = cpu_mask_to_apicid(tmp);
 
 		msg->address_hi = MSI_ADDR_BASE_HI;
@@ -2055,9 +2131,13 @@ static void msi_msg_set_affinity(unsigne
 	int vector;
 	unsigned dest;
 
-	vector = assign_irq_vector(irq);
+	vector = assign_irq_vector(irq, mask);
 	if (vector > 0) {
-		dest = cpu_mask_to_apicid(mask);
+		cpumask_t tmp;
+
+		cpus_clear(tmp);
+		cpu_set(vector >> 8, tmp);
+		dest = cpu_mask_to_apicid(tmp);
 
 		msg->data &= ~MSI_DATA_VECTOR_MASK;
 		msg->data |= MSI_DATA_VECTOR(vector);
@@ -2113,6 +2193,8 @@ int io_apic_set_pci_routing (int ioapic,
 {
 	struct IO_APIC_route_entry entry;
 	unsigned long flags;
+	int vector;
+	cpumask_t mask;
 
 	if (!IO_APIC_IRQ(irq)) {
 		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
@@ -2120,6 +2202,21 @@ int io_apic_set_pci_routing (int ioapic,
 		return -EINVAL;
 	}
 
+	irq = gsi_irq_sharing(irq);
+	/*
+	 * IRQs < 16 are already in the irq_2_pin[] map
+	 */
+	if (irq >= 16)
+		add_pin_to_irq(irq, ioapic, pin);
+
+
+	vector = assign_irq_vector(irq, TARGET_CPUS);
+	if (vector < 0)
+		return vector;
+
+	cpus_clear(mask);
+	cpu_set(vector >> 8, mask);
+
 	/*
 	 * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
 	 * Note that we mask (disable) IRQs now -- these get enabled when the
@@ -2130,19 +2227,11 @@ int io_apic_set_pci_routing (int ioapic,
 
 	entry.delivery_mode = INT_DELIVERY_MODE;
 	entry.dest_mode = INT_DEST_MODE;
-	entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+	entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
 	entry.trigger = triggering;
 	entry.polarity = polarity;
 	entry.mask = 1;					 /* Disabled (masked) */
-
-	irq = gsi_irq_sharing(irq);
-	/*
-	 * IRQs < 16 are already in the irq_2_pin[] map
-	 */
-	if (irq >= 16)
-		add_pin_to_irq(irq, ioapic, pin);
-
-	entry.vector = assign_irq_vector(irq);
+	entry.vector = vector & 0xff;
 
 	apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
 		"IRQ %d Mode:%i Active:%i)\n", ioapic, 
diff -puN arch/x86_64/kernel/irq.c~genirq-x86_64-irq-make-vector_irq-per-cpu arch/x86_64/kernel/irq.c
--- a/arch/x86_64/kernel/irq.c~genirq-x86_64-irq-make-vector_irq-per-cpu
+++ a/arch/x86_64/kernel/irq.c
@@ -99,7 +99,7 @@ asmlinkage unsigned int do_IRQ(struct pt
 	exit_idle();
 	irq_enter();
 
-	irq = vector_irq[vector];
+	irq = __get_cpu_var(vector_irq)[vector];
 	if (likely(irq < NR_IRQS))
 		generic_handle_irq(irq, regs);
 
diff -puN include/asm-x86_64/hw_irq.h~genirq-x86_64-irq-make-vector_irq-per-cpu include/asm-x86_64/hw_irq.h
--- a/include/asm-x86_64/hw_irq.h~genirq-x86_64-irq-make-vector_irq-per-cpu
+++ a/include/asm-x86_64/hw_irq.h
@@ -73,8 +73,9 @@
 
 
 #ifndef __ASSEMBLY__
-extern u8 irq_vector[NR_IRQ_VECTORS];
-extern int vector_irq[NR_VECTORS];
+extern unsigned int irq_vector[NR_IRQ_VECTORS];
+typedef int vector_irq_t[NR_VECTORS];
+DECLARE_PER_CPU(vector_irq_t, vector_irq);
 #define IO_APIC_VECTOR(irq)	(irq_vector[irq])
 
 /*
diff -puN include/asm-x86_64/irq.h~genirq-x86_64-irq-make-vector_irq-per-cpu include/asm-x86_64/irq.h
--- a/include/asm-x86_64/irq.h~genirq-x86_64-irq-make-vector_irq-per-cpu
+++ a/include/asm-x86_64/irq.h
@@ -31,8 +31,8 @@
 
 #define FIRST_SYSTEM_VECTOR	0xef   /* duplicated in hw_irq.h */
 
-#define NR_IRQS 224
-#define NR_IRQ_VECTORS (32 * NR_CPUS)
+#define NR_IRQS (NR_VECTORS + (32 *NR_CPUS))
+#define NR_IRQ_VECTORS NR_IRQS
 
 static __inline__ int irq_canonicalize(int irq)
 {
_

Patches currently in -mm which might be from ebiederm@xxxxxxxxxxxx are

add-a-sysfs-file-to-determine-if-a-kexec-kernel-is-loaded.patch
avoid-tasklist_lock-at-getrusage-for-multithreaded-case-too.patch
ptrace-document-the-locking-rules.patch
proc-fix-the-inode-number-on-proc-pid-fd.patch
proc-remove-useless-bkl-in-proc_pid_readlink.patch
proc-remove-unnecessary-and-misleading-assignments.patch
proc-simplify-the-ownership-rules-for-proc.patch
proc-replace-proc_inodetype-with-proc_inodefd.patch
proc-remove-bogus-proc_task_permission.patch
proc-kill-proc_mem_inode_operations.patch
proc-properly-filter-out-files-that-are-not-visible.patch
proc-fix-the-link-count-for-proc-pid-task.patch
proc-move-proc_maps_operations-into-task_mmuc.patch
proc-rewrite-the-proc-dentry-flush-on-exit.patch
proc-close-the-race-of-a-process-dying-durning.patch
proc-refactor-reading-directories-of-tasks.patch
proc-remove-tasklist_lock-from-proc_pid_readdir.patch
proc-remove-tasklist_lock-from-proc_pid_lookup-and.patch
proc-remove-tasklist_lock-from-proc_pid_readdir-simply-fix-first_tgid.patch
proc-make-proc_numbuf-the-buffer-size-for-holding-a.patch
proc-dont-lock-task_structs-indefinitely.patch
proc-dont-lock-task_structs-indefinitely-task_mmu-small-fixes.patch
proc-use-struct-pid-not-struct-task_ref.patch
proc-optimize-proc_check_dentry_visible.patch
proc-use-sane-permission-checks-on-the-proc-pid-fd.patch
proc-cleanup-proc_fd_access_allowed.patch
proc-remove-tasklist_lock-from-proc_task_readdir.patch
simplify-fix-first_tid.patch
cleanup-next_tid.patch
de_thread-fix-lockless-do_each_thread.patch
coredump-optimize-mm-users-traversal.patch
coredump-speedup-sigkill-sending.patch
coredump-kill-ptrace-related-stuff.patch
coredump-kill-ptrace-related-stuff-fix.patch
coredump-dont-take-tasklist_lock.patch
proc-sysctl-add-_proc_do_string-helper.patch
namespaces-add-nsproxy.patch
namespaces-add-nsproxy-dont-include-compileh.patch
namespaces-incorporate-fs-namespace-into-nsproxy.patch
namespaces-utsname-introduce-temporary-helpers.patch
namespaces-utsname-switch-to-using-uts-namespaces.patch
namespaces-utsname-switch-to-using-uts-namespaces-alpha-fix.patch
namespaces-utsname-switch-to-using-uts-namespaces-cleanup.patch
namespaces-utsname-use-init_utsname-when-appropriate.patch
namespaces-utsname-use-init_utsname-when-appropriate-cifs-update.patch
namespaces-utsname-implement-utsname-namespaces.patch
namespaces-utsname-implement-utsname-namespaces-export.patch
namespaces-utsname-implement-utsname-namespaces-dont-include-compileh.patch
namespaces-utsname-sysctl-hack.patch
namespaces-utsname-sysctl-hack-cleanup.patch
namespaces-utsname-sysctl-hack-cleanup-2.patch
namespaces-utsname-sysctl-hack-cleanup-2-fix.patch
namespaces-utsname-remove-system_utsname.patch
namespaces-utsname-implement-clone_newuts-flag.patch
uts-copy-nsproxy-only-when-needed.patch
ipc-namespace-core-fix.patch
ipc-namespace-core-unshare-fix.patch
ipc-namespace-utils-compilation-fix.patch
genirq-irq-convert-the-move_irq-flag-from-a-32bit-word-to-a-single-bit.patch
genirq-irq-add-moved_masked_irq.patch
genirq-x86_64-irq-reenable-migrating-irqs-to-other-cpus.patch
genirq-msi-simplify-msi-enable-and-disable.patch
genirq-msi-simplify-msi-enable-and-disable-fix.patch
genirq-msi-make-the-msi-boolean-tests-return-either-0-or-1.patch
genirq-msi-implement-helper-functions-read_msi_msg-and-write_msi_msg.patch
genirq-msi-refactor-the-msi_ops.patch
genirq-msi-simplify-the-msi-irq-limit-policy.patch
genirq-irq-add-a-dynamic-irq-creation-api.patch
genirq-ia64-irq-dynamic-irq-support.patch
genirq-i386-irq-dynamic-irq-support.patch
genirq-i386-irq-dynamic-irq-support-fix.patch
genirq-x86_64-irq-dynamic-irq-support.patch
genirq-msi-make-the-msi-code-irq-based-and-not-vector-based.patch
genirq-x86_64-irq-move-msi-message-composition-into-io_apicc.patch
genirq-i386-irq-move-msi-message-composition-into-io_apicc.patch
genirq-msi-only-build-msi-apicc-on-ia64.patch
genirq-x86_64-irq-remove-the-msi-assumption-that-irq-==-vector.patch
genirq-i386-irq-remove-the-msi-assumption-that-irq-==-vector.patch
genirq-i386-irq-remove-the-msi-assumption-that-irq-==-vector-fix.patch
genirq-irq-remove-msi-hacks.patch
genirq-irq-generalize-the-check-for-hardirq_bits.patch
genirq-x86_64-irq-make-the-external-irq-handlers-report-their-vector-not-the-irq-number.patch
genirq-x86_64-irq-make-vector_irq-per-cpu.patch
genirq-x86_64-irq-kill-gsi_irq_sharing.patch
genirq-x86_64-irq-kill-irq-compression.patch
genirq-irq-document-what-an-irq-is.patch
pidhash-temporary-debug-checks.patch
genirq-i386-irq-remove-the-msi-assumption-that-irq-==-vector-fix-tidies.patch

-
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Kernel Newbies FAQ]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Photo]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux