[PATCH, experimental] i386 Allow the fixmap to be relocated at boot time

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This most curious patch allows the fixmap on i386 to be unfixed.  The 
result is that we can create a dynamically sizable hole at the top of 
kernel linear address space.  I know at least some virtualization 
developers are interested in being able to achieve this to achieve 
run-time sizing of a hole in which a hypervisor can live, or at least to 
test out the performance characteristics of different sized holes.

I have not run any performance numbers yet to see how much the cost of 
making this dynamic affects native performance, but again I would stress 
this is a highly experimental patch and I am looking for feedback and 
any performance data from other systems that people are kind enough to 
share.  I'm not advocating that this get pushed into the mainline Linux 
tree at this point by any means!

I believe at least the Xen folks would be interested in playing around 
with this for experimenting with different MPT and frame table sizes for 
PAE support in a way that doesn't require recompiling the Linux guest 
each time - if the performance impact proves to be negligble, this gives 
a lot of flexibility to any virtual machine which runs a hypervisor 
aware kernel.

Although I did as much as possible to make the vsyscall relocation 
appear clean to userspace, I can't guarantee this patch won't set fire 
to your chair and electrocute your cat.  Please move all pets to a safe 
location before attempting to use this.

Zachary Amsden <zach@xxxxxxxxxx>
-------------- next part --------------
Allow creation of an compile time hole at the top of linear address space.

Extended to allow a dynamic hole in linear address space, 7/2005.  This
required some serious hacking to get everything perfect, but the end result
appears to function quite nicely.  Everyone can now share the appreciation
of pseudo-undocumented ELF OS fields, which means core dumps, debuggers
and even broken or obsolete linkers may continue to work.

Signed-off-by: Zachary Amsden <zach@xxxxxxxxxx>
Index: linux-2.6.13/arch/i386/Kconfig
===================================================================
--- linux-2.6.13.orig/arch/i386/Kconfig	2005-08-04 14:14:24.000000000 -0700
+++ linux-2.6.13/arch/i386/Kconfig	2005-08-05 15:28:42.000000000 -0700
@@ -127,6 +127,20 @@
 
 endchoice
 
+config RELOCATABLE_FIXMAP
+	bool "Allow the fixmap to be placed dynamically at runtime"
+	depends on EXPERIMENTAL
+	help
+	  Crazy hackers only.
+
+config MEMORY_HOLE
+	int "Create hole at top of memory (0-512 MB)"
+	range 0 512
+	default "0"
+	help
+	  Useful for creating a hole in the top of memory when running
+	  inside of a virtual machine monitor.
+
 config ACPI_SRAT
 	bool
 	default y
Index: linux-2.6.13/arch/i386/kernel/sysenter.c
===================================================================
--- linux-2.6.13.orig/arch/i386/kernel/sysenter.c	2005-08-02 17:04:12.000000000 -0700
+++ linux-2.6.13/arch/i386/kernel/sysenter.c	2005-08-05 15:47:53.000000000 -0700
@@ -46,22 +46,90 @@
 extern const char vsyscall_int80_start, vsyscall_int80_end;
 extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
 
+#ifdef CONFIG_RELOCATABLE_FIXMAP
+extern const char SYSENTER_RETURN;
+const char *SYSENTER_RETURN_ADDR;
+
+static void fixup_vsyscall_elf(char *page)
+{
+	Elf32_Ehdr *hdr;
+	Elf32_Shdr *sechdrs;
+	Elf32_Phdr *phdr;
+	char *secstrings;
+	int i, j, n;
+
+	hdr = (Elf32_Ehdr *)page;
+
+	/* Sanity checks against insmoding binaries or wrong arch,
+           weird elf version */
+	if (memcmp(hdr->e_ident, ELFMAG, 4) != 0 ||
+		!elf_check_arch(hdr) ||
+		hdr->e_type != ET_DYN)
+		panic("Bogus ELF in vsyscall DSO\n");
+
+	hdr->e_entry += VSYSCALL_RELOCATION;
+
+	sechdrs = (void *)hdr + hdr->e_shoff;
+	secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+
+	for (i = 1; i < hdr->e_shnum; i++) {
+		if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+			continue;
+
+		sechdrs[i].sh_addr += VSYSCALL_RELOCATION;
+		if (strcmp(secstrings+sechdrs[i].sh_name, ".dynsym") == 0) {
+			Elf32_Sym  *sym =  (void *)hdr + sechdrs[i].sh_offset;
+			n = sechdrs[i].sh_size / sizeof(*sym);
+			for (j = 1; j < n;  j++) {
+				int ndx = sym[j].st_shndx;
+				if (ndx == SHN_UNDEF || ndx == SHN_ABS)
+					continue;
+				sym[j].st_value += VSYSCALL_RELOCATION;
+			}
+		} else if (strcmp(secstrings+sechdrs[i].sh_name, ".dynamic") == 0) {
+			Elf32_Dyn *dyn = (void *)hdr + sechdrs[i].sh_offset;
+			int tag;
+			while ((tag = (++dyn)->d_tag) != DT_NULL) {
+				if (tag == DT_PLTGOT || tag == DT_HASH ||
+				    tag == DT_STRTAB || tag == DT_SYMTAB ||
+				    tag == DT_RELA || tag == DT_INIT ||
+				    tag == DT_FINI || tag == DT_REL ||
+				    tag == DT_JMPREL || tag == DT_VERSYM ||
+				    tag == DT_VERDEF || tag == DT_VERNEED)
+					dyn->d_un.d_val += VSYSCALL_RELOCATION;
+			}
+		} else if (strcmp(secstrings+sechdrs[i].sh_name, ".useless") == 0) {
+			uint32_t *got = (void *)hdr + sechdrs[i].sh_offset;
+			*got += VSYSCALL_RELOCATION;
+		}
+	}
+	phdr = (void *)hdr + hdr->e_phoff;
+	for (i = 0; i < hdr->e_phnum; i++) {
+		phdr[i].p_vaddr += VSYSCALL_RELOCATION;
+		phdr[i].p_paddr += VSYSCALL_RELOCATION;
+	}
+	SYSENTER_RETURN_ADDR = (char *)&SYSENTER_RETURN + VSYSCALL_RELOCATION;
+}
+#endif
+
 int __init sysenter_setup(void)
 {
 	void *page = (void *)get_zeroed_page(GFP_ATOMIC);
 
-	__set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY_EXEC);
-
-	if (!boot_cpu_has(X86_FEATURE_SEP)) {
+	if (!boot_cpu_has(X86_FEATURE_SEP))
 		memcpy(page,
 		       &vsyscall_int80_start,
 		       &vsyscall_int80_end - &vsyscall_int80_start);
-		return 0;
-	}
+	else
+		memcpy(page,
+			&vsyscall_sysenter_start,
+			&vsyscall_sysenter_end - &vsyscall_sysenter_start);
 
-	memcpy(page,
-	       &vsyscall_sysenter_start,
-	       &vsyscall_sysenter_end - &vsyscall_sysenter_start);
+#ifdef CONFIG_RELOCATABLE_FIXMAP
+	fixup_vsyscall_elf((char *)page);
+#endif
+
+	__set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY_EXEC);
 
 	return 0;
 }
Index: linux-2.6.13/arch/i386/kernel/asm-offsets.c
===================================================================
--- linux-2.6.13.orig/arch/i386/kernel/asm-offsets.c	2005-08-04 14:28:35.000000000 -0700
+++ linux-2.6.13/arch/i386/kernel/asm-offsets.c	2005-08-05 15:11:45.000000000 -0700
@@ -68,5 +68,9 @@
 		 sizeof(struct tss_struct));
 
 	DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
+#ifdef CONFIG_RELOCATABLE_FIXMAP
+	DEFINE(VSYSCALL_BASE, 0);
+#else
 	DEFINE(VSYSCALL_BASE, __fix_to_virt(FIX_VSYSCALL));
+#endif
 }
Index: linux-2.6.13/arch/i386/kernel/signal.c
===================================================================
--- linux-2.6.13.orig/arch/i386/kernel/signal.c	2005-08-03 23:36:46.000000000 -0700
+++ linux-2.6.13/arch/i386/kernel/signal.c	2005-08-05 15:11:33.000000000 -0700
@@ -345,6 +345,8 @@
    See vsyscall-sigreturn.S.  */
 extern void __user __kernel_sigreturn;
 extern void __user __kernel_rt_sigreturn;
+#define kernel_sigreturn  (VSYSCALL_RELOCATION + (void __user *)&__kernel_sigreturn)
+#define kernel_rt_sigreturn  (VSYSCALL_RELOCATION + (void __user *)&__kernel_rt_sigreturn)
 
 static int setup_frame(int sig, struct k_sigaction *ka,
 		       sigset_t *set, struct pt_regs * regs)
@@ -380,7 +382,7 @@
 			goto give_sigsegv;
 	}
 
-	restorer = &__kernel_sigreturn;
+	restorer = kernel_sigreturn;
 	if (ka->sa.sa_flags & SA_RESTORER)
 		restorer = ka->sa.sa_restorer;
 
@@ -476,7 +478,7 @@
 		goto give_sigsegv;
 
 	/* Set up to return from userspace.  */
-	restorer = &__kernel_rt_sigreturn;
+	restorer = kernel_rt_sigreturn;
 	if (ka->sa.sa_flags & SA_RESTORER)
 		restorer = ka->sa.sa_restorer;
 	err |= __put_user(restorer, &frame->pretcode);
Index: linux-2.6.13/arch/i386/kernel/entry.S
===================================================================
--- linux-2.6.13.orig/arch/i386/kernel/entry.S	2005-08-04 14:17:15.000000000 -0700
+++ linux-2.6.13/arch/i386/kernel/entry.S	2005-08-05 14:09:15.000000000 -0700
@@ -200,7 +200,11 @@
 	pushl %ebp
 	pushfl
 	pushl $(__USER_CS)
+#ifdef CONFIG_RELOCATABLE_FIXMAP
+	pushl %ss:SYSENTER_RETURN_ADDR
+#else
 	pushl $SYSENTER_RETURN
+#endif
 
 /*
  * Load the potential sixth argument from user stack.
Index: linux-2.6.13/arch/i386/mm/init.c
===================================================================
--- linux-2.6.13.orig/arch/i386/mm/init.c	2005-08-04 14:39:17.000000000 -0700
+++ linux-2.6.13/arch/i386/mm/init.c	2005-08-05 15:20:04.000000000 -0700
@@ -42,6 +42,10 @@
 
 unsigned int __VMALLOC_RESERVE = 128 << 20;
 
+#ifdef CONFIG_RELOCATABLE_FIXMAP
+unsigned long __FIXADDR_TOP = 0;
+#endif
+
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 unsigned long highstart_pfn, highend_pfn;
 
@@ -478,6 +482,12 @@
 		printk("NX (Execute Disable) protection: active\n");
 #endif
 
+#ifdef CONFIG_RELOCATABLE_FIXMAP
+	if (!__FIXADDR_TOP) 
+		__FIXADDR_TOP =  0xfffff000UL-(CONFIG_MEMORY_HOLE << 20);
+	printk(KERN_INFO "Fixmap top relocated to %lxh\n", __FIXADDR_TOP);
+#endif
+
 	pagetable_init();
 
 	load_cr3(swapper_pg_dir);
Index: linux-2.6.13/include/asm-i386/fixmap.h
===================================================================
--- linux-2.6.13.orig/include/asm-i386/fixmap.h	2005-08-04 14:14:24.000000000 -0700
+++ linux-2.6.13/include/asm-i386/fixmap.h	2005-08-05 15:36:13.000000000 -0700
@@ -20,7 +20,13 @@
  * Leave one empty page between vmalloc'ed areas and
  * the start of the fixmap.
  */
-#define __FIXADDR_TOP	0xfffff000
+#ifdef CONFIG_RELOCATABLE_FIXMAP
+extern unsigned long __FIXADDR_TOP;
+#define VSYSCALL_RELOCATION __fix_to_virt(FIX_VSYSCALL)
+#else
+#define __FIXADDR_TOP	(0xfffff000-(CONFIG_MEMORY_HOLE << 20))
+#define VSYSCALL_RELOCATION 0
+#endif
 
 #ifndef __ASSEMBLY__
 #include <linux/kernel.h>
Index: linux-2.6.13/include/asm-i386/elf.h
===================================================================
--- linux-2.6.13.orig/include/asm-i386/elf.h	2005-08-02 17:06:23.000000000 -0700
+++ linux-2.6.13/include/asm-i386/elf.h	2005-08-05 15:31:32.000000000 -0700
@@ -129,7 +129,7 @@
 
 #define VSYSCALL_BASE	(__fix_to_virt(FIX_VSYSCALL))
 #define VSYSCALL_EHDR	((const struct elfhdr *) VSYSCALL_BASE)
-#define VSYSCALL_ENTRY	((unsigned long) &__kernel_vsyscall)
+#define VSYSCALL_ENTRY	((unsigned long) (VSYSCALL_RELOCATION+&__kernel_vsyscall))
 extern void __kernel_vsyscall;
 
 #define ARCH_DLINFO						\
Index: linux-2.6.13/include/linux/elf.h
===================================================================
--- linux-2.6.13.orig/include/linux/elf.h	2005-08-02 17:06:24.000000000 -0700
+++ linux-2.6.13/include/linux/elf.h	2005-08-05 12:06:17.000000000 -0700
@@ -138,6 +138,9 @@
 #define DT_DEBUG	21
 #define DT_TEXTREL	22
 #define DT_JMPREL	23
+#define DT_VERSYM	0x6ffffff0
+#define DT_VERDEF	0x6ffffffc
+#define DT_VERNEED	0x6ffffffe
 #define DT_LOPROC	0x70000000
 #define DT_HIPROC	0x7fffffff
 

[Index of Archives]     [KVM Development]     [Libvirt Development]     [Libvirt Users]     [CentOS Virtualization]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux