On 02/09/2018 04:25 AM, Joerg Roedel wrote: > From: Joerg Roedel <jroedel@xxxxxxx> > > Add unconditional cr3 switches between user and kernel cr3 > to all non-NMI entry and exit points. > > Signed-off-by: Joerg Roedel <jroedel@xxxxxxx> > --- > arch/x86/entry/entry_32.S | 59 ++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 58 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S > index 9693485..b5ef003 100644 > --- a/arch/x86/entry/entry_32.S > +++ b/arch/x86/entry/entry_32.S > @@ -328,6 +328,25 @@ > #endif /* CONFIG_X86_ESPFIX32 */ > .endm > > +/* Unconditionally switch to user cr3 */ > +.macro SWITCH_TO_USER_CR3 scratch_reg:req > + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI > + > + movl %cr3, \scratch_reg > + orl $PTI_SWITCH_MASK, \scratch_reg > + movl \scratch_reg, %cr3 > +.Lend_\@: > +.endm > + > +/* Unconditionally switch to kernel cr3 */ > +.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req > + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI > + movl %cr3, \scratch_reg > + andl $(~PTI_SWITCH_MASK), \scratch_reg > + movl \scratch_reg, %cr3 > +.Lend_\@: > +.endm > + > > /* > * Called with pt_regs fully populated and kernel segments loaded, > @@ -343,6 +362,8 @@ > > ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV > > + SWITCH_TO_KERNEL_CR3 scratch_reg=%eax > + > /* Are we on the entry stack? Bail out if not! */ > movl PER_CPU_VAR(cpu_entry_area), %edi > addl $CPU_ENTRY_AREA_entry_stack, %edi > @@ -637,6 +658,18 @@ ENTRY(xen_sysenter_target) > * 0(%ebp) arg6 > */ > ENTRY(entry_SYSENTER_32) > + /* > + * On entry-stack with all userspace-regs live - save and > + * restore eflags and %eax to use it as scratch-reg for the cr3 > + * switch. > + */ > + pushfl > + pushl %eax > + SWITCH_TO_KERNEL_CR3 scratch_reg=%eax > + popl %eax > + popfl > + > + /* Stack empty again, switch to task stack */ > movl TSS_entry_stack(%esp), %esp > > .Lsysenter_past_esp: > @@ -691,6 +724,10 @@ ENTRY(entry_SYSENTER_32) > movl PT_OLDESP(%esp), %ecx /* pt_regs->sp */ > 1: mov PT_FS(%esp), %fs > PTGS_TO_GS > + > + /* Segments are restored - switch to user cr3 */ > + SWITCH_TO_USER_CR3 scratch_reg=%eax > + > popl %ebx /* pt_regs->bx */ > addl $2*4, %esp /* skip pt_regs->cx and pt_regs->dx */ > popl %esi /* pt_regs->si */ > @@ -778,7 +815,23 @@ restore_all: > .Lrestore_all_notrace: > CHECK_AND_APPLY_ESPFIX > .Lrestore_nocheck: > - RESTORE_REGS 4 # skip orig_eax/error_code > + /* > + * First restore user segments. This can cause exceptions, so we > + * run it with kernel cr3. > + */ > + RESTORE_SEGMENTS > + > + /* > + * Segments are restored - no more exceptions from here on except on > + * iret, but that handled safely. > + */ > + SWITCH_TO_USER_CR3 scratch_reg=%eax > + > + /* Restore rest */ > + RESTORE_INT_REGS > + > + /* Unwind stack to the iret frame */ > + RESTORE_SKIP_SEGMENTS 4 # skip orig_eax/error_code > .Lirq_return: > INTERRUPT_RETURN > > @@ -1139,6 +1192,10 @@ ENTRY(debug) > > SAVE_ALL > ENCODE_FRAME_POINTER > + > + /* Make sure we are running on kernel cr3 */ > + SWITCH_TO_KERNEL_CR3 scratch_reg=%eax > + > xorl %edx, %edx # error code 0 > movl %esp, %eax # pt_regs pointer > The debug exception calls ret_from_exception on exit. If coming from userspace, the C function prepare_exit_to_usermode() will be called. With the PTI-32 code, it means that function will be called with the entry stack instead of the task stack. This can be problematic as macro like current won't work anymore. -Longman -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>