+ x86-espfix-cleanup.patch added to -mm tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The patch titled

     x86: espfix cleanup

has been added to the -mm tree.  Its filename is

     x86-espfix-cleanup.patch

See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find
out what to do about this

------------------------------------------------------
Subject: x86: espfix cleanup
From: Stas Sergeev <stsp@xxxxxxxx>

- Introduced PER_CPU() macro to be used from asm

- Introduced GET_DESC_BASE() macro to be used from asm

- Rewrote the fixup code in asm, as calling a C code with the altered %ss
  appeared to be unsafe

- No longer altering the stack from a .fixup section

- 16bit per-cpu stack is no longer used, instead the 32bit stack is used,
  and the base is patched the way so that the high word of the kernel and
  user %esp are the same.

- I think that the limit of regs->esp+THREAD_SIZE*2 is very permissive
  too (and I don't want to mess with granularity), so I left the default 4G
  limit, unless Zach will really complain on that.  :)

Signed-off-by: Stas Sergeev <stsp@xxxxxxxx>
Acked-by: Zachary Amsden <zach@xxxxxxxxxx>
Cc: Jan Beulich <jbeulich@xxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxx>
---

 arch/i386/kernel/asm-offsets.c |    5 ++
 arch/i386/kernel/cpu/common.c  |   11 ----
 arch/i386/kernel/entry.S       |   73 ++++++++++++++-----------------
 arch/i386/kernel/head.S        |    2 
 arch/i386/kernel/traps.c       |   53 +++++-----------------
 include/asm-i386/desc.h        |   27 +++++++++--
 include/asm-i386/percpu.h      |   21 ++++++++
 7 files changed, 95 insertions(+), 97 deletions(-)

diff -puN arch/i386/kernel/asm-offsets.c~x86-espfix-cleanup arch/i386/kernel/asm-offsets.c
--- a/arch/i386/kernel/asm-offsets.c~x86-espfix-cleanup
+++ a/arch/i386/kernel/asm-offsets.c
@@ -58,6 +58,11 @@ void foo(void)
 	OFFSET(TI_sysenter_return, thread_info, sysenter_return);
 	BLANK();
 
+	OFFSET(GDS_size, Xgt_desc_struct, size);
+	OFFSET(GDS_address, Xgt_desc_struct, address);
+	OFFSET(GDS_pad, Xgt_desc_struct, pad);
+	BLANK();
+
 	OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
 	OFFSET(RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
 	BLANK();
diff -puN arch/i386/kernel/cpu/common.c~x86-espfix-cleanup arch/i386/kernel/cpu/common.c
--- a/arch/i386/kernel/cpu/common.c~x86-espfix-cleanup
+++ a/arch/i386/kernel/cpu/common.c
@@ -24,9 +24,6 @@
 DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
 EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
 
-DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
-EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
-
 static int cachesize_override __cpuinitdata = -1;
 static int disable_x86_fxsr __cpuinitdata;
 static int disable_x86_serial_nr __cpuinitdata = 1;
@@ -594,7 +591,6 @@ void __cpuinit cpu_init(void)
 	struct tss_struct * t = &per_cpu(init_tss, cpu);
 	struct thread_struct *thread = &current->thread;
 	struct desc_struct *gdt;
-	__u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu);
 	struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
 
 	if (cpu_test_and_set(cpu, cpu_initialized)) {
@@ -642,13 +638,6 @@ old_gdt:
 	 * and set up the GDT descriptor:
 	 */
  	memcpy(gdt, cpu_gdt_table, GDT_SIZE);
-
-	/* Set up GDT entry for 16bit stack */
- 	*(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |=
-		((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) |
-		((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) |
-		(CPU_16BIT_STACK_SIZE - 1);
-
 	cpu_gdt_descr->size = GDT_SIZE - 1;
  	cpu_gdt_descr->address = (unsigned long)gdt;
 
diff -puN arch/i386/kernel/entry.S~x86-espfix-cleanup arch/i386/kernel/entry.S
--- a/arch/i386/kernel/entry.S~x86-espfix-cleanup
+++ a/arch/i386/kernel/entry.S
@@ -48,6 +48,7 @@
 #include <asm/smp.h>
 #include <asm/page.h>
 #include <asm/desc.h>
+#include <asm/percpu.h>
 #include <asm/dwarf2.h>
 #include "irq_vectors.h"
 
@@ -403,23 +404,18 @@ ldt_ss:
 	 * This is an "official" bug of all the x86-compatible
 	 * CPUs, which we can try to work around to make
 	 * dosemu and wine happy. */
-	subl $8, %esp		# reserve space for switch16 pointer
-	CFI_ADJUST_CFA_OFFSET 8
+	movl OLDESP(%esp), %eax
+	movl %esp, %edx
+	call patch_espfix_base
+	pushl $__ESPFIX_SS
+	CFI_ADJUST_CFA_OFFSET 4
+	pushl %eax
+	CFI_ADJUST_CFA_OFFSET 4
 	cli
 	TRACE_IRQS_OFF
-	movl %esp, %eax
-	/* Set up the 16bit stack frame with switch32 pointer on top,
-	 * and a switch16 pointer on top of the current frame. */
-	call setup_x86_bogus_stack
-	CFI_ADJUST_CFA_OFFSET -8	# frame has moved
-	TRACE_IRQS_IRET
-	RESTORE_REGS
-	lss 20+4(%esp), %esp	# switch to 16bit stack
-1:	iret
-.section __ex_table,"a"
-	.align 4
-	.long 1b,iret_exc
-.previous
+	lss (%esp), %esp
+	CFI_ADJUST_CFA_OFFSET -8
+	jmp restore_nocheck
 	CFI_ENDPROC
 
 	# perform work that needs to be done immediately before resumption
@@ -509,30 +505,30 @@ syscall_badsys:
 	CFI_ENDPROC
 
 #define FIXUP_ESPFIX_STACK \
-	movl %esp, %eax; \
-	/* switch to 32bit stack using the pointer on top of 16bit stack */ \
-	lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \
-	/* copy data from 16bit stack to 32bit stack */ \
-	call fixup_x86_bogus_stack; \
-	/* put ESP to the proper location */ \
-	movl %eax, %esp;
-#define UNWIND_ESPFIX_STACK \
+	/* since we are on a wrong stack, we cant make it a C code :( */ \
+	GET_THREAD_INFO(%ebp); \
+	movl TI_cpu(%ebp), %ebx; \
+	PER_CPU(cpu_gdt_descr, %ebx); \
+	movl GDS_address(%ebx), %ebx; \
+	GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
+	addl %esp, %eax; \
+	pushl $__KERNEL_DS; \
+	CFI_ADJUST_CFA_OFFSET 4; \
 	pushl %eax; \
 	CFI_ADJUST_CFA_OFFSET 4; \
+	lss (%esp), %esp; \
+	CFI_ADJUST_CFA_OFFSET -8;
+#define UNWIND_ESPFIX_STACK \
 	movl %ss, %eax; \
-	/* see if on 16bit stack */ \
+	/* see if on espfix stack */ \
 	cmpw $__ESPFIX_SS, %ax; \
-	je 28f; \
-27:	popl %eax; \
-	CFI_ADJUST_CFA_OFFSET -4; \
-.section .fixup,"ax"; \
-28:	movl $__KERNEL_DS, %eax; \
+	jne 27f; \
+	movl $__KERNEL_DS, %eax; \
 	movl %eax, %ds; \
 	movl %eax, %es; \
-	/* switch to 32bit stack */ \
+	/* switch to normal stack */ \
 	FIXUP_ESPFIX_STACK; \
-	jmp 27b; \
-.previous
+27:;
 
 /*
  * Build the entry stubs and pointer table with
@@ -601,7 +597,6 @@ error_code:
 	pushl %eax
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_REL_OFFSET eax, 0
-	xorl %eax, %eax
 	pushl %ebp
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_REL_OFFSET ebp, 0
@@ -614,7 +609,6 @@ error_code:
 	pushl %edx
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_REL_OFFSET edx, 0
-	decl %eax			# eax = -1
 	pushl %ecx
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_REL_OFFSET ecx, 0
@@ -631,7 +625,7 @@ error_code:
 	/*CFI_REGISTER es, ecx*/
 	movl ES(%esp), %edi		# get the function address
 	movl ORIG_EAX(%esp), %edx	# get the error code
-	movl %eax, ORIG_EAX(%esp)
+	movl $-1, ORIG_EAX(%esp)
 	movl %ecx, ES(%esp)
 	/*CFI_REL_OFFSET es, ES*/
 	movl $(__USER_DS), %ecx
@@ -733,7 +727,7 @@ ENTRY(nmi)
 	cmpw $__ESPFIX_SS, %ax
 	popl %eax
 	CFI_ADJUST_CFA_OFFSET -4
-	je nmi_16bit_stack
+	je nmi_espfix_stack
 	cmpl $sysenter_entry,(%esp)
 	je nmi_stack_fixup
 	pushl %eax
@@ -772,14 +766,13 @@ nmi_debug_stack_check:
 	FIX_STACK(24,nmi_stack_correct, 1)
 	jmp nmi_stack_correct
 
-nmi_16bit_stack:
+nmi_espfix_stack:
 	RING0_INT_FRAME
 	/* create the pointer to lss back */
 	pushl %ss
 	CFI_ADJUST_CFA_OFFSET 4
 	pushl %esp
 	CFI_ADJUST_CFA_OFFSET 4
-	movzwl %sp, %esp
 	addw $4, (%esp)
 	/* copy the iret frame of 12 bytes */
 	.rept 3
@@ -790,11 +783,11 @@ nmi_16bit_stack:
 	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
 	FIXUP_ESPFIX_STACK		# %eax == %esp
-	CFI_ADJUST_CFA_OFFSET -20	# the frame has now moved
 	xorl %edx,%edx			# zero error code
 	call do_nmi
 	RESTORE_REGS
-	lss 12+4(%esp), %esp		# back to 16bit stack
+	lss 12+4(%esp), %esp		# back to espfix stack
+	CFI_ADJUST_CFA_OFFSET -24
 1:	iret
 	CFI_ENDPROC
 .section __ex_table,"a"
diff -puN arch/i386/kernel/head.S~x86-espfix-cleanup arch/i386/kernel/head.S
--- a/arch/i386/kernel/head.S~x86-espfix-cleanup
+++ a/arch/i386/kernel/head.S
@@ -590,7 +590,7 @@ ENTRY(cpu_gdt_table)
 	.quad 0x00009a000000ffff	/* 0xc0 APM CS 16 code (16 bit) */
 	.quad 0x004092000000ffff	/* 0xc8 APM DS    data */
 
-	.quad 0x0000920000000000	/* 0xd0 - ESPFIX 16-bit SS */
+	.quad 0x00cf92000000ffff	/* 0xd0 - ESPFIX SS */
 	.quad 0x0000000000000000	/* 0xd8 - unused */
 	.quad 0x0000000000000000	/* 0xe0 - unused */
 	.quad 0x0000000000000000	/* 0xe8 - unused */
diff -puN arch/i386/kernel/traps.c~x86-espfix-cleanup arch/i386/kernel/traps.c
--- a/arch/i386/kernel/traps.c~x86-espfix-cleanup
+++ a/arch/i386/kernel/traps.c
@@ -1017,49 +1017,20 @@ fastcall void do_spurious_interrupt_bug(
 #endif
 }
 
-fastcall void setup_x86_bogus_stack(unsigned char * stk)
+fastcall unsigned long patch_espfix_base(unsigned long uesp,
+					  unsigned long kesp)
 {
-	unsigned long *switch16_ptr, *switch32_ptr;
-	struct pt_regs *regs;
-	unsigned long stack_top, stack_bot;
-	unsigned short iret_frame16_off;
 	int cpu = smp_processor_id();
-	/* reserve the space on 32bit stack for the magic switch16 pointer */
-	memmove(stk, stk + 8, sizeof(struct pt_regs));
-	switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs));
-	regs = (struct pt_regs *)stk;
-	/* now the switch32 on 16bit stack */
-	stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
-	stack_top = stack_bot +	CPU_16BIT_STACK_SIZE;
-	switch32_ptr = (unsigned long *)(stack_top - 8);
-	iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20;
-	/* copy iret frame on 16bit stack */
-	memcpy((void *)(stack_bot + iret_frame16_off), &regs->eip, 20);
-	/* fill in the switch pointers */
-	switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off;
-	switch16_ptr[1] = __ESPFIX_SS;
-	switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) +
-		8 - CPU_16BIT_STACK_SIZE;
-	switch32_ptr[1] = __KERNEL_DS;
-}
-
-fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp)
-{
-	unsigned long *switch32_ptr;
-	unsigned char *stack16, *stack32;
-	unsigned long stack_top, stack_bot;
-	int len;
-	int cpu = smp_processor_id();
-	stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
-	stack_top = stack_bot +	CPU_16BIT_STACK_SIZE;
-	switch32_ptr = (unsigned long *)(stack_top - 8);
-	/* copy the data from 16bit stack to 32bit stack */
-	len = CPU_16BIT_STACK_SIZE - 8 - sp;
-	stack16 = (unsigned char *)(stack_bot + sp);
-	stack32 = (unsigned char *)
-		(switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len);
-	memcpy(stack32, stack16, len);
-	return stack32;
+	struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+	struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address;
+	unsigned long base = (kesp - uesp) & -THREAD_SIZE;
+	__u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
+	/* Set up base for espfix segment */
+ 	desc &= 0x00ffff000000ffffULL;
+ 	desc |=	((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
+		((((__u64)base) << 32) & 0xff00000000000000ULL);
+	*(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
+	return kesp - base;
 }
 
 /*
diff -puN include/asm-i386/desc.h~x86-espfix-cleanup include/asm-i386/desc.h
--- a/include/asm-i386/desc.h~x86-espfix-cleanup
+++ a/include/asm-i386/desc.h
@@ -4,8 +4,6 @@
 #include <asm/ldt.h>
 #include <asm/segment.h>
 
-#define CPU_16BIT_STACK_SIZE 1024
-
 #ifndef __ASSEMBLY__
 
 #include <linux/preempt.h>
@@ -16,8 +14,6 @@
 
 extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
 
-DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
-
 struct Xgt_desc_struct {
 	unsigned short size;
 	unsigned long address __attribute__((packed));
@@ -162,6 +158,29 @@ static inline unsigned long get_desc_bas
 	return base;
 }
 
+#else /* __ASSEMBLY__ */
+
+/*
+ * GET_DESC_BASE reads the descriptor base of the specified segment.
+ *
+ * Args:
+ *    idx - descriptor index
+ *    gdt - GDT pointer
+ *    base - 32bit register to which the base will be written
+ *    lo_w - lo word of the "base" register
+ *    lo_b - lo byte of the "base" register
+ *    hi_b - hi byte of the low word of the "base" register
+ *
+ * Example:
+ *    GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
+ *    Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax.
+ */
+#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \
+	movb idx*8+4(gdt), lo_b; \
+	movb idx*8+7(gdt), hi_b; \
+	shll $16, base; \
+	movw idx*8+2(gdt), lo_w;
+
 #endif /* !__ASSEMBLY__ */
 
 #endif
diff -puN include/asm-i386/percpu.h~x86-espfix-cleanup include/asm-i386/percpu.h
--- a/include/asm-i386/percpu.h~x86-espfix-cleanup
+++ a/include/asm-i386/percpu.h
@@ -1,6 +1,27 @@
 #ifndef __ARCH_I386_PERCPU__
 #define __ARCH_I386_PERCPU__
 
+#ifndef __ASSEMBLY__
 #include <asm-generic/percpu.h>
+#else
+
+/*
+ * PER_CPU finds an address of a per-cpu variable.
+ *
+ * Args:
+ *    var - variable name
+ *    cpu - 32bit register containing the current CPU number
+ *
+ * The resulting address is stored in the "cpu" argument.
+ *
+ * Example:
+ *    PER_CPU(cpu_gdt_descr, %ebx)
+ */
+#define PER_CPU(var, cpu) \
+	shll $2, cpu; \
+	movl __per_cpu_offset(cpu), cpu; \
+	addl $per_cpu__/**/var, cpu;
+
+#endif /* !__ASSEMBLY__ */
 
 #endif /* __ARCH_I386_PERCPU__ */
_

Patches currently in -mm which might be from stsp@xxxxxxxx are

x86-espfix-cleanup.patch

-
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Kernel Newbies FAQ]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Photo]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux