> On Sep 12, 2018, at 7:18 AM, Paolo Bonzini <pbonzini@xxxxxxxxxx> wrote: > >> On 12/09/2018 15:33, Sebastian Andrzej Siewior wrote: >> From: Rik van Riel <riel@xxxxxxxxxxx> >> >> While most of a task's FPU state is only needed in user space, >> the protection keys need to be in place immediately after a >> context switch. >> >> The reason is that any accesses to userspace memory while running >> in kernel mode also need to abide by the memory permissions >> specified in the protection keys. >> >> The pkru info is put in its own cache line in the fpu struct because >> that cache line is accessed anyway at context switch time, and the >> location of the pkru info in the xsave buffer changes depending on >> what other FPU registers are in use if the CPU uses compressed xsave >> state (on by default). >> >> The initial state of pkru is zeroed out automatically by fpstate_init. >> >> Signed-off-by: Rik van Riel <riel@xxxxxxxxxxx> >> [bigeasy: load PKRU state only if we also load FPU content] >> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> >> --- >> arch/x86/include/asm/fpu/internal.h | 11 +++++++++-- >> arch/x86/include/asm/fpu/types.h | 10 ++++++++++ >> arch/x86/include/asm/pgtable.h | 6 +----- >> arch/x86/mm/pkeys.c | 14 ++++++++++++++ >> 4 files changed, 34 insertions(+), 7 deletions(-) >> >> diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h >> index 16c4077ffc945..57bd1576e033d 100644 >> --- a/arch/x86/include/asm/fpu/internal.h >> +++ b/arch/x86/include/asm/fpu/internal.h >> @@ -573,8 +573,15 @@ static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu) >> bool preload = static_cpu_has(X86_FEATURE_FPU) && >> new_fpu->initialized; >> >> - if (preload) >> - __fpregs_load_activate(new_fpu, cpu); >> + if (!preload) >> + return; >> + >> + __fpregs_load_activate(new_fpu, cpu); >> + /* Protection keys need to be in place right at context switch time. */ >> + if (boot_cpu_has(X86_FEATURE_OSPKE)) { >> + if (new_fpu->pkru != __read_pkru()) >> + __write_pkru(new_fpu->pkru); >> + } >> } >> >> /* >> diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h >> index 202c53918ecfa..6fa58d37938d2 100644 >> --- a/arch/x86/include/asm/fpu/types.h >> +++ b/arch/x86/include/asm/fpu/types.h >> @@ -293,6 +293,16 @@ struct fpu { >> */ >> unsigned int last_cpu; >> >> + /* >> + * Protection key bits. These also live inside fpu.state.xsave, >> + * but the location varies if the CPU uses the compressed format >> + * for XSAVE(OPT). >> + * >> + * The protection key needs to be switched out immediately at context >> + * switch time, so it is in place for things like copy_to_user. >> + */ >> + unsigned int pkru; >> + >> /* >> * @initialized: >> * >> diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h >> index 690c0307afed0..cc36f91011ad7 100644 >> --- a/arch/x86/include/asm/pgtable.h >> +++ b/arch/x86/include/asm/pgtable.h >> @@ -132,11 +132,7 @@ static inline u32 read_pkru(void) >> return 0; >> } >> >> -static inline void write_pkru(u32 pkru) >> -{ >> - if (boot_cpu_has(X86_FEATURE_OSPKE)) >> - __write_pkru(pkru); >> -} >> +extern void write_pkru(u32 pkru); >> >> static inline int pte_young(pte_t pte) >> { >> diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c >> index 6e98e0a7c9231..c7a7b6bd64009 100644 >> --- a/arch/x86/mm/pkeys.c >> +++ b/arch/x86/mm/pkeys.c >> @@ -18,6 +18,20 @@ >> >> #include <asm/cpufeature.h> /* boot_cpu_has, ... */ >> #include <asm/mmu_context.h> /* vma_pkey() */ >> +#include <asm/fpu/internal.h> >> + >> +void write_pkru(u32 pkru) >> +{ >> + if (!boot_cpu_has(X86_FEATURE_OSPKE)) >> + return; >> + >> + current->thread.fpu.pkru = pkru; >> + >> + __fpregs_changes_begin(); >> + __fpregs_load_activate(¤t->thread.fpu, smp_processor_id()); >> + __write_pkru(pkru); >> + __fpregs_changes_end(); >> +} >> >> int __execute_only_pkey(struct mm_struct *mm) >> { >> > > I think you can go a step further and exclude PKRU state from > copy_kernel_to_fpregs altogether; you just use RDPKRU/WRPKRU. This also > means you don't need to call __fpregs_* functions in write_pkru. > > Except that the signal ABI has PKRU in the xstate. So we’d need to fake it or do something special for signals.