On Wed, May 24, 2023 at 02:28:51AM +0300, kirill.shutemov@xxxxxxxxxxxxxxx wrote: > On Tue, May 23, 2023 at 03:43:15PM -0700, Dave Hansen wrote: > > On 5/23/23 15:37, kirill.shutemov@xxxxxxxxxxxxxxx wrote: > > >> How does this work with load_unaligned_zeropad()? Couldn't it be > > >> running around poking at one of these vmalloc()'d pages via the direct > > >> map during a shared->private conversion before the page has been accepted? > > > Alias processing in __change_page_attr_set_clr() will change direct > > > mapping if you call it on vmalloc()ed memory. I think we are safe wrt > > > load_unaligned_zeropad() here. > > > > We're *eventually* OK: > > > > > /* Notify hypervisor that we are about to set/clr encryption attribute. */ > > > x86_platform.guest.enc_status_change_prepare(addr, numpages, enc); > > > > > > ret = __change_page_attr_set_clr(&cpa, 1); > > > > But what about in the middle between enc_status_change_prepare() and > > __change_page_attr_set_clr()? Don't the direct map and the > > shared/private status of the page diverge in there? > > Hmm. Maybe we would need to go through making the range in direct mapping > non-present before notifying VMM about the change. > > I need to look at this again in the morning. Okay, I've got around to it finally. Private->Shared conversion is safe: we first set shared bit in the direct mapping and all aliases and then call MapGPA enc_status_change_finish(). So we don't have privately mapped pages that we converted to shared with MapGPA. Shared->Private is not safe. As with Private->Shared, we adjust direct mapping before notifying VMM and accepting the memory, so there's short window when privately mapped memory that is neither mapped into SEPT nor accepted. It is a problem as it can race with load_unaligned_zeropad(). Shared->Private conversion is rare. I only see one call total during the boot in my setup. Worth fixing anyway. The patch below fixes the issue by hooking up enc_status_change_prepare() and doing conversion from Shared to Private there. enc_status_change_finish() only covers Private to Shared conversion. The patch is on top of unaccepted memory patchset. It is more convenient base. I will rebase to Linus' tree if the approach looks sane to you. Any comments? diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 32501277ef84..b73ec2449c64 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -713,16 +713,32 @@ static bool tdx_cache_flush_required(void) return true; } +static bool tdx_enc_status_change_prepare(unsigned long vaddr, int numpages, + bool enc) +{ + phys_addr_t start = __pa(vaddr); + phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE); + + if (!enc) + return true; + + return tdx_enc_status_changed_phys(start, end, enc); +} + /* * Inform the VMM of the guest's intent for this physical page: shared with * the VMM or private to the guest. The VMM is expected to change its mapping * of the page in response. */ -static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc) +static bool tdx_enc_status_change_finish(unsigned long vaddr, int numpages, + bool enc) { phys_addr_t start = __pa(vaddr); phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE); + if (enc) + return true; + return tdx_enc_status_changed_phys(start, end, enc); } @@ -753,9 +769,10 @@ void __init tdx_early_init(void) */ physical_mask &= cc_mask - 1; - x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required; - x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; - x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed; + x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required; + x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; + x86_platform.guest.enc_status_change_prepare = tdx_enc_status_change_prepare; + x86_platform.guest.enc_status_change_finish = tdx_enc_status_change_finish; pr_info("Guest detected\n"); } diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 88085f369ff6..1ca9701917c5 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -150,7 +150,7 @@ struct x86_init_acpi { * @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status */ struct x86_guest { - void (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); + bool (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc); bool (*enc_tlb_flush_required)(bool enc); bool (*enc_cache_flush_required)(void); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index d82f4fa2f1bf..64664311ac2b 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -130,8 +130,8 @@ struct x86_cpuinit_ops x86_cpuinit = { static void default_nmi_init(void) { }; -static void enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { } -static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; } +static bool enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { return true; } +static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return true; } static bool enc_tlb_flush_required_noop(bool enc) { return false; } static bool enc_cache_flush_required_noop(void) { return false; } static bool is_private_mmio_noop(u64 addr) {return false; } diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index e0b51c09109f..4f95c449a406 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -319,7 +319,7 @@ static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) #endif } -static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) +static bool amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) { /* * To maintain the security guarantees of SEV-SNP guests, make sure @@ -327,6 +327,8 @@ static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool */ if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !enc) snp_set_memory_shared(vaddr, npages); + + return true; } /* Return true unconditionally: return value doesn't matter for the SEV side */ diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 7159cf787613..b8f48ebe753c 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -2151,7 +2151,8 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) cpa_flush(&cpa, x86_platform.guest.enc_cache_flush_required()); /* Notify hypervisor that we are about to set/clr encryption attribute. */ - x86_platform.guest.enc_status_change_prepare(addr, numpages, enc); + if (!x86_platform.guest.enc_status_change_prepare(addr, numpages, enc)) + return -EIO; ret = __change_page_attr_set_clr(&cpa, 1); -- Kiryl Shutsemau / Kirill A. Shutemov