[RFC 5/6] LVHPT - setup infrastructure

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



LVHPT setup

The following patch sets up the LVHPT on boot.

For the initial boot processor, we allocate the VHPT in ia64_mmu_init.
Other CPUs get the LVHPT allocated from do_boot_cpu before they are
woken up.

The logic is per CPU, but it attempts to choose a reasonable size that
can be pinned in the TLB.  There are facilities to clamp it to a
specific size.

Signed-Off-By: Ian Wienand <ianw@xxxxxxxxxxxxxxxxxx>

---

 Documentation/kernel-parameters.txt |   14 +++
 arch/ia64/kernel/setup.c            |   31 +++++++
 arch/ia64/kernel/smpboot.c          |   12 ++
 arch/ia64/mm/Makefile               |    1 
 arch/ia64/mm/init.c                 |   55 ++++++++++---
 arch/ia64/mm/lvhpt.c                |  150 ++++++++++++++++++++++++++++++++++++
 include/asm-ia64/kregs.h            |    1 
 include/asm-ia64/lvhpt.h            |   24 +++++
 include/asm-ia64/pgtable.h          |   10 ++
 9 files changed, 286 insertions(+), 12 deletions(-)

Index: linux-2.6.17-rc3-lvhpt-v2-fresh/arch/ia64/kernel/setup.c
===================================================================
--- linux-2.6.17-rc3-lvhpt-v2-fresh.orig/arch/ia64/kernel/setup.c	2006-05-10 10:00:50.000000000 +1000
+++ linux-2.6.17-rc3-lvhpt-v2-fresh/arch/ia64/kernel/setup.c	2006-05-10 10:01:49.000000000 +1000
@@ -62,6 +62,10 @@
 #include <asm/unistd.h>
 #include <asm/system.h>
 
+#if defined(CONFIG_IA64_LONG_FORMAT_VHPT)
+#include <asm/lvhpt.h>
+#endif
+
 #if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE)
 # error "struct cpuinfo_ia64 too big!"
 #endif
@@ -284,6 +288,18 @@
 #endif
 }
 
+static void __init parse_cmdline_early (char ** cmdline_p)
+{
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+ 	char *p;
+ 	extern int lvhpt_bits_clamp_setup(char *s);
+
+ 	strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE);
+ 	if ((p = strstr(*cmdline_p, "lvhpt_bits_clamp=")))
+ 		lvhpt_bits_clamp_setup(p + 17);
+#endif
+}
+
 static void __init
 io_port_init (void)
 {
@@ -400,12 +416,14 @@
 void __init
 setup_arch (char **cmdline_p)
 {
+	extern void __devinit ia64_tlb_early_init(void);
+
 	unw_init();
 
 	ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist);
 
 	*cmdline_p = __va(ia64_boot_param->command_line);
-	strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE);
+	parse_cmdline_early(cmdline_p);
 
 	efi_init();
 	io_port_init();
@@ -438,6 +456,17 @@
 
 	ia64_setup_printk_clock();
 
+	/* Setup some information about the TLBS */
+	ia64_tlb_early_init();
+
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+	/*
+	 * put this after all the ACPI walking so we can get the size
+	 * of memory on nodes
+	 */
+	compute_vhpt_size();
+#endif
+
 #ifdef CONFIG_SMP
 	cpu_physical_id(0) = hard_smp_processor_id();
 
Index: linux-2.6.17-rc3-lvhpt-v2-fresh/arch/ia64/kernel/smpboot.c
===================================================================
--- linux-2.6.17-rc3-lvhpt-v2-fresh.orig/arch/ia64/kernel/smpboot.c	2006-05-10 10:00:50.000000000 +1000
+++ linux-2.6.17-rc3-lvhpt-v2-fresh/arch/ia64/kernel/smpboot.c	2006-05-10 10:01:49.000000000 +1000
@@ -61,6 +61,11 @@
 #include <asm/tlbflush.h>
 #include <asm/unistd.h>
 
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+#include <asm/lvhpt.h>
+#endif
+
+
 #define SMP_DEBUG 0
 
 #if SMP_DEBUG
@@ -512,6 +517,13 @@
 do_rest:
 	task_for_booting_cpu = c_idle.idle;
 
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+	if (alloc_vhpt(cpu)) {
+		panic("Couldn't allocate VHPT on CPU %d\n", cpu);
+	}
+	Dprintk("Allocated long format VHPT for CPU %d at: 0x%lx, size: 0x%lx\n", cpu, vhpt_base[cpu], long_vhpt_size(cpu));
+#endif /* CONFIG_IA64_LONG_FORMAT_VHPT */
+
 	Dprintk("Sending wakeup vector %lu to AP 0x%x/0x%x.\n", ap_wakeup_vector, cpu, sapicid);
 
 	set_brendez_area(cpu);
Index: linux-2.6.17-rc3-lvhpt-v2-fresh/arch/ia64/mm/init.c
===================================================================
--- linux-2.6.17-rc3-lvhpt-v2-fresh.orig/arch/ia64/mm/init.c	2006-05-10 10:00:50.000000000 +1000
+++ linux-2.6.17-rc3-lvhpt-v2-fresh/arch/ia64/mm/init.c	2006-05-10 10:01:49.000000000 +1000
@@ -37,6 +37,10 @@
 #include <asm/unistd.h>
 #include <asm/mca.h>
 
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+#include <asm/lvhpt.h>
+#endif
+
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
 DEFINE_PER_CPU(unsigned long *, __pgtable_quicklist);
@@ -338,7 +342,7 @@
 void __devinit
 ia64_mmu_init (void *my_cpu_data)
 {
-	unsigned long psr, pta, impl_va_bits;
+	unsigned long psr, pta;
 	extern void __devinit tlb_init (void);
 
 #ifdef CONFIG_DISABLE_VHPT
@@ -347,16 +351,47 @@
 #	define VHPT_ENABLE_BIT	1
 #endif
 
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+	int cpu = smp_processor_id();
+
+	/* Allocate the VHPT for the boot processor.  VHPT for other
+	 * processors is allocated in smpboot.c:do_boot_cpu() as they
+	 * are bought online.
+	 */
+	if (cpu == 0)
+	{
+		unsigned long size = lvhpt_size(cpu);
+		lvhpt_per_cpu_info[0].base = (unsigned long)__alloc_bootmem(size, size, __pa(MAX_DMA_ADDRESS));
+		if (lvhpt_per_cpu_info[0].base == 0)
+			panic("Couldn't allocate VHPT on CPU %d, size: 0x%lx!\n",
+			      cpu, size);
+		printk(KERN_INFO "Allocated long format VHPT for boot processor (CPU %d) at: 0x%lx, size: 0x%lx\n",
+		       cpu, lvhpt_per_cpu_info[0].base, size);
+	}
+#else /* !CONFIG_IA64_LONG_FORMAT_VHPT */
+	unsigned long impl_va_bits;
+#endif
 	/* Pin mapping for percpu area into TLB */
 	psr = ia64_clear_ic();
 	ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR,
 		 pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)),
 		 PERCPU_PAGE_SHIFT);
 
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+	/* Insert the permanent translation for the VHPT */
+	ia64_itr(0x2, IA64_TR_LONG_VHPT, LONG_VHPT_BASE,
+	pte_val(pfn_pte(__pa(lvhpt_per_cpu_info[cpu].base) >> PAGE_SHIFT, PAGE_KERNEL)), lvhpt_per_cpu_info[cpu].bits);
+#endif
 	ia64_set_psr(psr);
 	ia64_srlz_i();
-
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+#	define VHPT_FORMAT_BIT		1
+#	define vhpt_bits		lvhpt_per_cpu_info[cpu].bits
+	pta = LONG_VHPT_BASE;
+#else
 	/*
+	 * SHORT FORMAT VHPT (virtually mapped linear pagetable)
+	 *
 	 * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped
 	 * address space.  The IA-64 architecture guarantees that at least 50 bits of
 	 * virtual address space are implemented but if we pick a large enough page size
@@ -367,6 +402,7 @@
 	 * address space to not permit mappings that would overlap with the VMLPT.
 	 * --davidm 00/12/06
 	 */
+#	define VHPT_FORMAT_BIT		0
 #	define pte_bits			3
 #	define mapped_space_bits	(3*(PAGE_SHIFT - pte_bits) + PAGE_SHIFT)
 	/*
@@ -376,28 +412,27 @@
 	 * non-speculative accesses to the virtual page table, so the address range of the
 	 * virtual page table itself needs to be covered by virtual page table.
 	 */
-#	define vmlpt_bits		(impl_va_bits - PAGE_SHIFT + pte_bits)
+#	define vhpt_bits		(impl_va_bits - PAGE_SHIFT + pte_bits)
 #	define POW2(n)			(1ULL << (n))
-
 	impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61)));
 
 	if (impl_va_bits < 51 || impl_va_bits > 61)
 		panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits - 1);
 	/*
 	 * mapped_space_bits - PAGE_SHIFT is the total number of ptes we need,
-	 * which must fit into "vmlpt_bits - pte_bits" slots. Second half of
+	 * which must fit into "vhpt_bits - pte_bits" slots. Second half of
 	 * the test makes sure that our mapped space doesn't overlap the
 	 * unimplemented hole in the middle of the region.
 	 */
-	if ((mapped_space_bits - PAGE_SHIFT > vmlpt_bits - pte_bits) ||
+	if ((mapped_space_bits - PAGE_SHIFT > vhpt_bits - pte_bits) ||
 	    (mapped_space_bits > impl_va_bits - 1))
 		panic("Cannot build a big enough virtual-linear page table"
 		      " to cover mapped address space.\n"
 		      " Try using a smaller page size.\n");
 
-
 	/* place the VMLPT at the end of each page-table mapped region: */
-	pta = POW2(61) - POW2(vmlpt_bits);
+	pta = POW2(61) - POW2(vhpt_bits);
+#endif
 
 	/*
 	 * Set the (virtually mapped linear) page table address.  Bit
@@ -405,10 +440,8 @@
 	 * size of the table, and bit 0 whether the VHPT walker is
 	 * enabled.
 	 */
-	ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | VHPT_ENABLE_BIT);
-
+	ia64_set_pta(pta | (VHPT_FORMAT_BIT << 8) | (vhpt_bits << 2) | VHPT_ENABLE_BIT);
 	ia64_tlb_init();
-
 #ifdef	CONFIG_HUGETLB_PAGE
 	ia64_set_rr(HPAGE_REGION_BASE, HPAGE_SHIFT << 2);
 	ia64_srlz_d();
Index: linux-2.6.17-rc3-lvhpt-v2-fresh/include/asm-ia64/pgtable.h
===================================================================
--- linux-2.6.17-rc3-lvhpt-v2-fresh.orig/include/asm-ia64/pgtable.h	2006-05-10 10:00:50.000000000 +1000
+++ linux-2.6.17-rc3-lvhpt-v2-fresh/include/asm-ia64/pgtable.h	2006-05-10 10:08:46.000000000 +1000
@@ -156,6 +156,10 @@
 #include <asm/mmu_context.h>
 #include <asm/processor.h>
 
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+#include <asm/lvhpt.h>
+#endif
+
 /*
  * Next come the mappings that determine how mmap() protection bits
  * (PROT_EXEC, PROT_READ, PROT_WRITE, PROT_NONE) get implemented.  The
@@ -556,6 +560,7 @@
     extern void memmap_init (unsigned long size, int nid, unsigned long zone,
 			     unsigned long start_pfn);
 #  endif /* CONFIG_VIRTUAL_MEM_MAP */
+
 # endif /* !__ASSEMBLY__ */
 
 /*
@@ -576,6 +581,11 @@
 #define KERNEL_TR_PAGE_SIZE	(1 << KERNEL_TR_PAGE_SHIFT)
 
 /*
+ * Long format VHPT
+ */
+#define LONG_VHPT_BASE		(0xc000000000000000 - (1UL << lvhpt_per_cpu_info[smp_processor_id()].bits))
+
+/*
  * No page table caches to initialise
  */
 #define pgtable_cache_init()	do { } while (0)
Index: linux-2.6.17-rc3-lvhpt-v2-fresh/Documentation/kernel-parameters.txt
===================================================================
--- linux-2.6.17-rc3-lvhpt-v2-fresh.orig/Documentation/kernel-parameters.txt	2006-05-10 10:00:50.000000000 +1000
+++ linux-2.6.17-rc3-lvhpt-v2-fresh/Documentation/kernel-parameters.txt	2006-05-10 10:01:49.000000000 +1000
@@ -50,6 +50,7 @@
 	ISDN	Appropriate ISDN support is enabled.
 	JOY	Appropriate joystick support is enabled.
 	LP	Printer support is enabled.
+	LONG_FORMAT_VHPT Long Format VHPT is enabled
 	LOOP	Loopback device support is enabled.
 	M68k	M68k architecture is enabled.
 			These options have more detailed description inside of
@@ -805,6 +806,19 @@
 	ltpc=		[NET]
 			Format: <io>,<irq>,<dma>
 
+	lvhpt_bits_clamp=	[IA64,LONG_FORMAT_VHPT]
+				Format: <1-39>
+
+				Clamp the size of the LVHPT (on all
+				nodes for a NUMA system) to 2^n bits.
+				E.g. 2^22 gives a LVHPT of 4MB.  We
+				pin a TLB entry of this size, so the
+				size must be valid for the
+				architecture, otherwise your kernel
+				will not boot.  By default we take a
+				good guess at sizing this for optimal
+				operation.
+
 	mac5380=	[HW,SCSI] Format:
 			<can_queue>,<cmd_per_lun>,<sg_tablesize>,<hostid>,<use_tags>
 
Index: linux-2.6.17-rc3-lvhpt-v2-fresh/arch/ia64/mm/Makefile
===================================================================
--- linux-2.6.17-rc3-lvhpt-v2-fresh.orig/arch/ia64/mm/Makefile	2006-05-10 10:00:50.000000000 +1000
+++ linux-2.6.17-rc3-lvhpt-v2-fresh/arch/ia64/mm/Makefile	2006-05-10 10:01:49.000000000 +1000
@@ -9,3 +9,4 @@
 obj-$(CONFIG_DISCONTIGMEM) += discontig.o
 obj-$(CONFIG_SPARSEMEM)	   += discontig.o
 obj-$(CONFIG_FLATMEM)	   += contig.o
+obj-$(CONFIG_IA64_LONG_FORMAT_VHPT) += lvhpt.o
\ No newline at end of file
Index: linux-2.6.17-rc3-lvhpt-v2-fresh/arch/ia64/mm/lvhpt.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.17-rc3-lvhpt-v2-fresh/arch/ia64/mm/lvhpt.c	2006-05-10 13:05:53.000000000 +1000
@@ -0,0 +1,150 @@
+/* Long Format VHPT support functions */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+
+#include <linux/acpi.h>
+#include <linux/bootmem.h>
+#include <linux/efi.h>
+
+#include <asm/tlb.h>
+#include <asm/lvhpt.h>
+
+/*
+ * This allows you to clamp the number of bits used for the long
+ * format vhpt. TODO check for invalid values here.
+ */
+static int lvhpt_bits_clamp;
+
+int __init
+lvhpt_bits_clamp_setup(char *s)
+{
+	if (sscanf(s, "%d", &lvhpt_bits_clamp) <= 0)
+		lvhpt_bits_clamp = 0;
+	return 1;
+}
+
+__setup("lvhpt_bits_clamp=", lvhpt_bits_clamp_setup);
+
+
+/* We try to size the LVHPT to cover the node local memory for the
+ * CPU.  On initalisation we stash the location and size of each CPU's
+ * LVHPT table here.  */
+struct lvhpt_per_cpu_info_struct lvhpt_per_cpu_info[NR_CPUS];
+
+/*
+ * This code must be called on a CPU which has it's MMU
+ * initialized. The page allocator seems to depend on it.
+ *
+ * Returns 0 on success.
+ */
+unsigned int
+alloc_vhpt(int cpu)
+{
+	int lvhpt_bits = lvhpt_per_cpu_info[cpu].bits;
+
+#ifdef CONFIG_NUMA
+	struct page *page;
+
+	page = alloc_pages_node(cpu_to_node(cpu), __GFP_HIGHMEM|GFP_ATOMIC, lvhpt_bits - PAGE_SHIFT);
+	if (!page)
+		return -1;
+	lvhpt_per_cpu_info[cpu].base = (unsigned long) page_address(page);
+#else
+	lvhpt_per_cpu_info[cpu].base = (unsigned long)__get_free_pages(__GFP_HIGHMEM|GFP_ATOMIC,
+							 lvhpt_bits - PAGE_SHIFT);
+#endif
+	return (lvhpt_per_cpu_info[cpu].base == 0UL);
+}
+
+/*
+ * Passed to efi_memmap_walk to simply add up how much memory we have.
+ * This is used to size the LVHPT
+ */
+static int
+get_total_ram(unsigned long start, unsigned long end, void *arg)
+{
+	unsigned long *s = arg;
+	*s += (end - start);
+	return 0;
+}
+
+/* We use this for sizing the lvhpt */
+static unsigned long lvhpt_node_addressable_memory[MAX_NUMNODES];
+
+#ifdef CONFIG_NUMA
+static void compute_vhpt_size_numa(void)
+{
+	int i;
+
+	if (lvhpt_bits_clamp)
+	{
+		printk(KERN_INFO "Clamping LVHPT to %d bits\n", lvhpt_bits_clamp);
+		for (i = 0; i < NR_CPUS; i++)
+			lvhpt_per_cpu_info[i].bits = lvhpt_bits_clamp;
+		return;
+	}
+
+	/* In the NUMA case, we evaluate how much memory each node has
+	 * and then try to size it to three times the physical memory
+	 * of the node (as this gives us the best coverage.  As we pin
+	 * this with a TLB entry, we need to make sure the size we
+	 * choose is however suitable for the architecture.
+	 */
+	for (i = 0; i < num_node_memblks; i++) {
+		printk(KERN_ERR "vhpt_addr_mem[%d] = %lx\n", node_memblk[i].nid, node_memblk[i].size);
+		lvhpt_node_addressable_memory[node_memblk[i].nid] +=
+			node_memblk[i].size;
+	}
+
+	for (i = 0; i < NR_CPUS; i++)
+	{
+		unsigned long size =
+			3 * (lvhpt_node_addressable_memory[cpu_to_node(i)] >> PAGE_SHIFT);
+		lvhpt_per_cpu_info[i].bits = find_largest_page_size(size);
+	}
+}
+#endif
+
+/* This version in both NUMA and non-NUMA, since we can use it in either. */
+static void compute_vhpt_size_non_numa(void)
+{
+	int i;
+	int bits;
+
+	if (lvhpt_bits_clamp)
+	{
+		printk(KERN_INFO "Clamping LVHPT to %d bits\n", lvhpt_bits_clamp);
+		for (i=0; i < NR_CPUS; i++)
+			lvhpt_per_cpu_info[i].bits = lvhpt_bits_clamp;
+		return;
+	}
+
+	/* If this doesn't work, try to find the total memory in the
+	 * system, which we will then size the lvhpt table to cover
+	 * three times over.
+	 */
+	efi_memmap_walk(get_total_ram, &lvhpt_node_addressable_memory);
+
+	/* Put a lower clamp on this of a fairly reasonable 4 megabytes */
+	bits = max(22UL, find_largest_page_size(lvhpt_node_addressable_memory[0] >> PAGE_SHIFT));
+
+	for (i=0; i < NR_CPUS; i++)
+		lvhpt_per_cpu_info[i].bits = bits;
+}
+
+void __init
+compute_vhpt_size(void)
+{
+#ifdef CONFIG_NUMA
+	/* Machines like the ZX1 don't setup all the node info we
+	 * require, but someone might still try to boot a NUMA kernel
+	 * on it.  In this case, fall back to our non-numa case.*/
+	if (num_node_memblks == 0)
+		compute_vhpt_size_non_numa();
+	else
+		compute_vhpt_size_numa();
+#else
+	compute_vhpt_size_non_numa();
+#endif
+}
Index: linux-2.6.17-rc3-lvhpt-v2-fresh/include/asm-ia64/lvhpt.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.17-rc3-lvhpt-v2-fresh/include/asm-ia64/lvhpt.h	2006-05-10 10:01:49.000000000 +1000
@@ -0,0 +1,24 @@
+/* Long Format VHPT includes */
+#ifndef _ASM_IA64_LVHPT_H
+#define _ASM_IA64_LVHPT_H
+
+/* We keep a per-cpu record of the VHPT base and size for each
+ * processor */
+struct lvhpt_per_cpu_info_struct {
+	unsigned long base;
+	int bits;
+};
+
+extern struct lvhpt_per_cpu_info_struct lvhpt_per_cpu_info[NR_CPUS];
+
+/* Initalise and compute the VHPT size for each CPU */
+void __init compute_vhpt_size(void);
+/* Allocate the VHPT for a CPU */
+unsigned int alloc_vhpt(int cpu);
+
+static inline unsigned long lvhpt_size(int cpu)
+{
+       return (1UL << lvhpt_per_cpu_info[cpu].bits);
+}
+
+#endif /* _ASM_IA64_LVHPT_H */
Index: linux-2.6.17-rc3-lvhpt-v2-fresh/include/asm-ia64/kregs.h
===================================================================
--- linux-2.6.17-rc3-lvhpt-v2-fresh.orig/include/asm-ia64/kregs.h	2006-05-10 10:00:50.000000000 +1000
+++ linux-2.6.17-rc3-lvhpt-v2-fresh/include/asm-ia64/kregs.h	2006-05-10 10:01:49.000000000 +1000
@@ -31,6 +31,7 @@
 #define IA64_TR_PALCODE		1	/* itr1: maps PALcode as required by EFI */
 #define IA64_TR_PERCPU_DATA	1	/* dtr1: percpu data */
 #define IA64_TR_CURRENT_STACK	2	/* dtr2: maps kernel's memory- & register-stacks */
+#define IA64_TR_LONG_VHPT	3	/* dtr3: maps long format VHPT */
 
 /* Processor status register bits: */
 #define IA64_PSR_BE_BIT		1
-
: send the line "unsubscribe linux-ia64" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel]     [Sparc Linux]     [DCCP]     [Linux ARM]     [Yosemite News]     [Linux SCSI]     [Linux x86_64]     [Linux for Ham Radio]

  Powered by Linux