On Fri, Mar 27, 2015 at 10:54 PM, Andy Lutomirski <luto@xxxxxxxxxx> wrote: > --- a/arch/x86/ia32/ia32entry.S > +++ b/arch/x86/ia32/ia32entry.S > @@ -180,28 +180,34 @@ sysenter_dispatch: > testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) > jnz sysexit_audit > sysexit_from_sys_call: > + /* > + * NB: sysexit is not obviously safe for 64-bit kernels -- an > + * NMI between sti and sysexit has poorly specified behavior, > + * and and NMI followed by an IRQ with usergs is fatal. So > + * we just pretend we're using sysexit but we really use > + * sysretl instead. > + * > + * This code path is still called sysexit because it pairs > + * with sysenter and it uses the sysenter calling convention. > + */ > andl $~TS_COMPAT,ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) > - /* clear IF, that popfq doesn't enable interrupts early */ > - andl $~0x200,EFLAGS(%rsp) > - movl RIP(%rsp),%edx /* User %eip */ > - CFI_REGISTER rip,rdx > + movl RIP(%rsp),%ecx /* User %eip */ > + CFI_REGISTER rip,rcx > RESTORE_RSI_RDI I think you need to replace RESTORE_RSI_RDI with RESTORE_RSI_RDI_RDX > - /* pop everything except ss,rsp,rflags slots */ > - REMOVE_PT_GPREGS_FROM_STACK 3*8 > + xorl %edx,%edx Why do you clear %edx? > xorq %r8,%r8 > xorq %r9,%r9 > xorq %r10,%r10 > - xorq %r11,%r11 > - popfq_cfi > + movl EFLAGS(%rsp),%r11d /* User eflags */ > /*CFI_RESTORE rflags*/ > - popq_cfi %rcx /* User %esp */ > - CFI_REGISTER rsp,rcx > TRACE_IRQS_ON > /* > - * 32bit SYSEXIT restores eip from edx, esp from ecx. > - * cs and ss are loaded from MSRs. > + * Sysretl works even on Intel CPUs. Use it in preference to sysexit, > + * since it avoids a dicey window with interrupts enabled. > + * CS and SS are loaded from MSRs. Please do not remove the mini-doc which says what is restored from where. These instructions are not that obvious. I propose: * 64bit->32bit SYSRET restores eip from ecx, * eflags from r11 (but RF and VM bits are forced to 0), * CS and SS are loaded from MSRs. CS and SS are loaded from MSRs. Or maybe just ... sysexit_from_sys_call: /* * Sysretl works even on Intel CPUs. Use it in preference to sysexit, * since it avoids a dicey window with interrupts enabled. */ jmp to sysretl_from_sys_call and remove the entire "sysexit tail" code path? -- To unsubscribe from this list: send the line "unsubscribe stable" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html