[PATCH 1/3] Paravirtualization: Kernel Ring Cleanups

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi all,

	I've been looking at finding common ground between the VMI, Xen and
other paravirtualization approaches, and after some discussion, we're
getting somewhere.

	These first two patches are the fundamentals, stolen mainly from the
VMI patches: removing assumptions about the kernel running in ring 0,
and macro-izing all the obvious para-virtualize-needing insns.  The
third patch is more ambitious: it introduces a "paravirt_ops" structure
(a-la PPC's ppc_md) through which all these ops are indirected.  This
should allow Xen, VMI and other variants to build on a common base.

	These patches also live at
http://kernel.org/pub/linux/kernel/people/rusty/Paravirt

Feedback welcome!
Rusty.

Name: Kernel Ring Cleanups
Status: Booted on 2.6.16-rc2-git7
Signed-off-by: Rusty Russell <rusty@xxxxxxxxxxxxxxx>

This is Zach's patch to clean up assumptions about the kernel running
in ring 0 (which it doesn't when running paravirtualized).

1) Remove the hardcoded 3 and introduce #define SEGMENT_RPL_MASK 3
2) Add a get_kernel_rpl() function
3) Create COMPARE_SEGMENT_STACK and COMPARE_SEGMENT_REG macros which
   can mask out the bottom two bits (RPL) when comparing for
   paravirtualization.

diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.6.17-rc2-git7/arch/i386/kernel/entry.S tmp/arch/i386/kernel/entry.S
--- linux-2.6.17-rc2-git7/arch/i386/kernel/entry.S	2006-04-21 12:05:02.000000000 +1000
+++ tmp/arch/i386/kernel/entry.S	2006-05-02 16:00:17.000000000 +1000
@@ -144,9 +144,11 @@ ret_from_exception:
 ret_from_intr:
 	GET_THREAD_INFO(%ebp)
 	movl EFLAGS(%esp), %eax		# mix EFLAGS and CS
+	andl $VM_MASK, %eax
 	movb CS(%esp), %al
-	testl $(VM_MASK | 3), %eax
-	jz resume_kernel
+	andb $SEGMENT_RPL_MASK, %al
+	cmpl $SEGMENT_RPL_MASK, %eax
+	jb resume_kernel		# returning to kernel or vm86-space
 ENTRY(resume_userspace)
  	cli				# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
@@ -386,17 +388,14 @@ syscall_badsys:
 	/* put ESP to the proper location */ \
 	movl %eax, %esp;
 #define UNWIND_ESPFIX_STACK \
-	pushl %eax; \
-	movl %ss, %eax; \
-	/* see if on 16bit stack */ \
-	cmpw $__ESPFIX_SS, %ax; \
+	COMPARE_SEGMENT_REG(__ESPFIX_SS, %ss); \
 	jne 28f; \
-	movl $__KERNEL_DS, %edx; \
+	movl $__USER_DS, %edx; \
 	movl %edx, %ds; \
 	movl %edx, %es; \
 	/* switch to 32bit stack */ \
 	FIXUP_ESPFIX_STACK \
-28:	popl %eax;
+28:;
 
 /*
  * Build the entry stubs and pointer table with
@@ -455,6 +454,7 @@ error_code:
 	pushl %es
 	UNWIND_ESPFIX_STACK
 	popl %ecx
+	movl EAX(%esp), %eax
 	movl ES(%esp), %edi		# get the function address
 	movl ORIG_EAX(%esp), %edx	# get the error code
 	movl %eax, ORIG_EAX(%esp)
@@ -505,12 +505,12 @@ device_not_available_emulate:
  * the instruction that would have done it for sysenter.
  */
 #define FIX_STACK(offset, ok, label)		\
-	cmpw $__KERNEL_CS,4(%esp);		\
+	COMPARE_SEGMENT_STACK(__KERNEL_CS, 4);	\
 	jne ok;					\
 label:						\
 	movl TSS_sysenter_esp0+offset(%esp),%esp;	\
 	pushfl;					\
-	pushl $__KERNEL_CS;			\
+	push  %cs;				\
 	pushl $sysenter_past_esp
 
 KPROBE_ENTRY(debug)
@@ -534,10 +534,7 @@ debug_stack_correct:
  * fault happened on the sysenter path.
  */
 ENTRY(nmi)
-	pushl %eax
-	movl %ss, %eax
-	cmpw $__ESPFIX_SS, %ax
-	popl %eax
+	COMPARE_SEGMENT_REG(__ESPFIX_SS, %ss)
 	je nmi_16bit_stack
 	cmpl $sysenter_entry,(%esp)
 	je nmi_stack_fixup
@@ -564,7 +561,7 @@ nmi_stack_fixup:
 	FIX_STACK(12,nmi_stack_correct, 1)
 	jmp nmi_stack_correct
 nmi_debug_stack_check:
-	cmpw $__KERNEL_CS,16(%esp)
+	COMPARE_SEGMENT_STACK(__KERNEL_CS, 16)
 	jne nmi_stack_correct
 	cmpl $debug,(%esp)
 	jb nmi_stack_correct
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.6.17-rc2-git7/arch/i386/kernel/process.c tmp/arch/i386/kernel/process.c
--- linux-2.6.17-rc2-git7/arch/i386/kernel/process.c	2006-04-21 12:05:02.000000000 +1000
+++ tmp/arch/i386/kernel/process.c	2006-05-02 15:57:41.000000000 +1000
@@ -347,7 +347,7 @@ int kernel_thread(int (*fn)(void *), voi
 	regs.xes = __USER_DS;
 	regs.orig_eax = -1;
 	regs.eip = (unsigned long) kernel_thread_helper;
-	regs.xcs = __KERNEL_CS;
+	regs.xcs = __KERNEL_CS | get_kernel_rpl();
 	regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
 
 	/* Ok, create the new process.. */
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.6.17-rc2-git7/arch/i386/kernel/traps.c tmp/arch/i386/kernel/traps.c
--- linux-2.6.17-rc2-git7/arch/i386/kernel/traps.c	2006-04-21 12:05:02.000000000 +1000
+++ tmp/arch/i386/kernel/traps.c	2006-05-02 15:57:41.000000000 +1000
@@ -1013,10 +1013,10 @@ fastcall void setup_x86_bogus_stack(unsi
 	memcpy((void *)(stack_bot + iret_frame16_off), &regs->eip, 20);
 	/* fill in the switch pointers */
 	switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off;
-	switch16_ptr[1] = __ESPFIX_SS;
+	switch16_ptr[1] = __ESPFIX_SS | get_kernel_rpl();
 	switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) +
 		8 - CPU_16BIT_STACK_SIZE;
-	switch32_ptr[1] = __KERNEL_DS;
+	switch32_ptr[1] = __KERNEL_DS | get_kernel_rpl();
 }
 
 fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp)
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.6.17-rc2-git7/include/asm-i386/ptrace.h tmp/include/asm-i386/ptrace.h
--- linux-2.6.17-rc2-git7/include/asm-i386/ptrace.h	2006-03-23 12:44:59.000000000 +1100
+++ tmp/include/asm-i386/ptrace.h	2006-05-02 15:57:41.000000000 +1000
@@ -60,6 +60,7 @@ struct pt_regs {
 #ifdef __KERNEL__
 
 #include <asm/vm86.h>
+#include <asm/segment.h>
 
 struct task_struct;
 extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code);
@@ -73,11 +74,11 @@ extern void send_sigtrap(struct task_str
  */
 static inline int user_mode(struct pt_regs *regs)
 {
-	return (regs->xcs & 3) != 0;
+	return (regs->xcs & SEGMENT_RPL_MASK) == 3;
 }
 static inline int user_mode_vm(struct pt_regs *regs)
 {
-	return ((regs->xcs & 3) | (regs->eflags & VM_MASK)) != 0;
+	return (((regs->xcs & SEGMENT_RPL_MASK) | (regs->eflags & VM_MASK)) >= 3);
 }
 #define instruction_pointer(regs) ((regs)->eip)
 #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.6.17-rc2-git7/include/asm-i386/segment.h tmp/include/asm-i386/segment.h
--- linux-2.6.17-rc2-git7/include/asm-i386/segment.h	2006-03-23 12:44:59.000000000 +1100
+++ tmp/include/asm-i386/segment.h	2006-05-02 15:57:41.000000000 +1000
@@ -112,4 +112,18 @@
  */
 #define IDT_ENTRIES 256
 
+/* Bottom three bits of xcs give the ring privilege level */
+#define SEGMENT_RPL_MASK 0x3
+
+#define get_kernel_rpl()  0
+
+#define COMPARE_SEGMENT_STACK(segment, offset)	\
+	cmpw $segment, offset(%esp);
+
+#define COMPARE_SEGMENT_REG(segment, reg)	\
+	pushl %eax;				\
+	mov   reg, %eax;			\
+	cmpw  $segment,%ax;			\
+	popl  %eax;
+
 #endif

-- 
 ccontrol: http://ccontrol.ozlabs.org


[Index of Archives]     [KVM Development]     [Libvirt Development]     [Libvirt Users]     [CentOS Virtualization]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux