On Mon, 20 Mar 2023 13:05:43 -0500 Michael Roth <michael.roth@xxxxxxx> wrote: > On Fri, Mar 17, 2023 at 09:56:11PM -0700, Isaku Yamahata wrote: > > On Mon, Feb 20, 2023 at 12:37:53PM -0600, > > Michael Roth <michael.roth@xxxxxxx> wrote: > > > > > This callback will do any platform-specific handling needed for > > > converting pages between shared/private. > > > > > > Signed-off-by: Michael Roth <michael.roth@xxxxxxx> > > > --- > > > arch/x86/include/asm/kvm-x86-ops.h | 1 + > > > arch/x86/include/asm/kvm_host.h | 2 ++ > > > arch/x86/kvm/mmu/mmu.c | 13 +++++++++++++ > > > include/linux/kvm_host.h | 4 ++++ > > > virt/kvm/kvm_main.c | 29 +++++++++++++++++++++++++++++ > > > 5 files changed, 49 insertions(+) > > > > > > diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h > > > index 72183da010b8..a8aaf532c2ab 100644 > > > --- a/arch/x86/include/asm/kvm-x86-ops.h > > > +++ b/arch/x86/include/asm/kvm-x86-ops.h > > > @@ -132,6 +132,7 @@ KVM_X86_OP(complete_emulated_msr) > > > KVM_X86_OP(vcpu_deliver_sipi_vector) > > > KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons); > > > KVM_X86_OP_OPTIONAL_RET0(fault_is_private); > > > +KVM_X86_OP_OPTIONAL_RET0(update_mem_attr) > > > > > > #undef KVM_X86_OP > > > #undef KVM_X86_OP_OPTIONAL > > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > > index f856d689dda0..2da3fb2d5d1b 100644 > > > --- a/arch/x86/include/asm/kvm_host.h > > > +++ b/arch/x86/include/asm/kvm_host.h > > > @@ -1644,6 +1644,8 @@ struct kvm_x86_ops { > > > void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa, > > > int root_level); > > > bool (*fault_is_private)(struct kvm *kvm, gpa_t gpa, u64 error_code, bool *private_fault); > > > + int (*update_mem_attr)(struct kvm_memory_slot *slot, unsigned int attr, > > > + gfn_t start, gfn_t end); > > > > > > bool (*has_wbinvd_exit)(void); > > > > > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > > > index fb3f34b7391c..053bd77bbf52 100644 > > > --- a/arch/x86/kvm/mmu/mmu.c > > > +++ b/arch/x86/kvm/mmu/mmu.c > > > @@ -7251,4 +7251,17 @@ void kvm_arch_set_memory_attributes(struct kvm *kvm, > > > linfo_update_mixed(gfn, slot, level, mixed); > > > } > > > } > > > + > > > +void kvm_arch_post_set_memory_attributes(struct kvm *kvm, > > > + struct kvm_memory_slot *slot, > > > + unsigned long attrs, > > > + gfn_t start, gfn_t end) > > > +{ > > > + int ret; > > > + > > > + ret = static_call(kvm_x86_update_mem_attr)(slot, attrs, start, end); > > > + if (ret) > > > + pr_warn_ratelimited("Failed to update GFN range 0x%llx-0x%llx with attributes 0x%lx. Ret: %d\n", > > > + start, end, attrs, ret); > > > +} > > > #endif > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > > > index fdc59479b3e2..d200b8f45583 100644 > > > --- a/include/linux/kvm_host.h > > > +++ b/include/linux/kvm_host.h > > > @@ -2330,6 +2330,10 @@ void kvm_arch_set_memory_attributes(struct kvm *kvm, > > > struct kvm_memory_slot *slot, > > > unsigned long attrs, > > > gfn_t start, gfn_t end); > > > +void kvm_arch_post_set_memory_attributes(struct kvm *kvm, > > > + struct kvm_memory_slot *slot, > > > + unsigned long attrs, > > > + gfn_t start, gfn_t end); > > > > > > static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) > > > { > > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > > > index b68574ff6c30..8ec985f1c57d 100644 > > > --- a/virt/kvm/kvm_main.c > > > +++ b/virt/kvm/kvm_main.c > > > @@ -2561,6 +2561,32 @@ static void kvm_mem_attrs_changed(struct kvm *kvm, unsigned long attrs, > > > kvm_flush_remote_tlbs(kvm); > > > } > > > > > > +static void kvm_post_mem_attrs_changed(struct kvm *kvm, unsigned long attrs, > > > + gfn_t start_orig, gfn_t end_orig) > > > +{ > > > + struct kvm_memory_slot *slot; > > > + struct kvm_memslots *slots; > > > + struct kvm_memslot_iter iter; > > > + int i; > > > + > > > + for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) { > > > + slots = __kvm_memslots(kvm, i); > > > + > > > + kvm_for_each_memslot_in_gfn_range(&iter, slots, start_orig, end_orig) { > > > + gfn_t start, end; > > > + > > > + slot = iter.slot; > > > + start = max(start_orig, slot->base_gfn); > > > + end = min(end_orig, slot->base_gfn + slot->npages); > > > + > > > + if (start >= end) > > > + continue; > > > + > > > + kvm_arch_post_set_memory_attributes(kvm, slot, attrs, start, end); > > > + } > > > + } > > > +} > > > + > > > static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm, > > > struct kvm_memory_attributes *attrs) > > > { > > > @@ -2602,6 +2628,9 @@ static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm, > > > kvm_mmu_invalidate_end(kvm); > > > KVM_MMU_UNLOCK(kvm); > > > > > > + if (i > start) > > > + kvm_post_mem_attrs_changed(kvm, attrs->attributes, start, i); > > > + > > > > Doesn't kvm_arch_set_memory_attributes() work for you? i.e the following patch. > > The error check and pr_warn_ratelimited() can be pushed down into the callback. > > This is originally how I had but when CONFIG_PREEMPT_COUNT is set this > will generate warnings for this callback as well as the invalidation > callback as reported in v7 here: > > https://lore.kernel.org/lkml/Y80vhKwQyw8hS%2F22@notebook/ > > The main issue is that kvm_mem_attrs_changed() is called while holding > the KVM MMU lock, which disables preemption. But when updating > attributes for SNP, we also need to remove private pages from kernel > directmap, which involves acquiring a mutex which results in > "BUG: scheduling while atomic" warnings. > > So that's why we ended up somewhat duplicating some of the logic and > using a separate callback chain that happens out of KVM MMU lock. Let's split the things of changing memory attributes: 1) Update the memory attributes in the xa array (Both TDX and SNP) 2) Zapping the EPT/NPT mappings (Required by TDX) 3) Update RMP table (Required by SNP) 4) Update the directmap of kernel (SNP, but I guess TDX needs it as well) Does SNP really need to zap the NPT mappings when changing the memory attributes? (The new mappings will be created later in the fault). I don't find this requirement from APM. If yes, can we postpone the update of the RMP table in the later fault, like TDX? So that we can save this update_mem_attr x86 ops as things will be solved in the SNP-specific fault handler. If no, guess we need a x86 ops to tell if a zapping is required. Back to the lock, updating RMP table doesn't require a mutex. Taking the lock is required when updating the directmap. both TDX/SNP requires this update the directmap when changing memory attributes. Wouldn't it better to factor the touching directmap of kernel part out? Then you can call the x86 ops.update_mem_attr() in kvm_mem_attrs_changed(). And update the direct kernel mapping for both TDX/SNP in the kvm_post_mem_attrs_changed(). > > -Mike > > > > > From 7c618c1f3c236c382e64680efcbe7d8a672aa870 Mon Sep 17 00:00:00 2001 > > Message-Id: <7c618c1f3c236c382e64680efcbe7d8a672aa870.1679114841.git.isaku.yamahata@xxxxxxxxx> > > In-Reply-To: <428a676face7a06a90e59dca1c32941c9b6ee001.1679114841.git.isaku.yamahata@xxxxxxxxx> > > References: <428a676face7a06a90e59dca1c32941c9b6ee001.1679114841.git.isaku.yamahata@xxxxxxxxx> > > From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx> > > Date: Fri, 17 Mar 2023 12:00:09 -0700 > > Subject: [PATCH 4/4] KVM: x86: Add 'set_mem_attr' x86 op > > > > This callback will do any platform-specific handling needed for > > converting pages between shared/private. > > > > Originally-by: Michael Roth <michael.roth@xxxxxxx> > > Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx> > > --- > > arch/x86/include/asm/kvm-x86-ops.h | 1 + > > arch/x86/include/asm/kvm_host.h | 2 ++ > > arch/x86/kvm/mmu/mmu.c | 1 + > > 3 files changed, 4 insertions(+) > > > > diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h > > index dc5f18ac0bd5..956db2ee25a5 100644 > > --- a/arch/x86/include/asm/kvm-x86-ops.h > > +++ b/arch/x86/include/asm/kvm-x86-ops.h > > @@ -100,6 +100,7 @@ KVM_X86_OP_OPTIONAL_RET0(set_identity_map_addr) > > KVM_X86_OP_OPTIONAL_RET0(get_mt_mask) > > KVM_X86_OP(load_mmu_pgd) > > KVM_X86_OP(fault_is_private) > > +KVM_X86_OP_OPTIONAL(set_mem_attr) > > KVM_X86_OP_OPTIONAL(link_private_spt) > > KVM_X86_OP_OPTIONAL(free_private_spt) > > KVM_X86_OP_OPTIONAL(split_private_spt) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > index 0382d236fbf4..88e11dd3afde 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -1731,6 +1731,8 @@ struct kvm_x86_ops { > > void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa, > > int root_level); > > bool (*fault_is_private)(struct kvm *kvm, gpa_t gpa, u64 error_code); > > + void (*set_mem_attr)(struct kvm *kvm, struct kvm_memory_slot *slot, > > + unsigned int attr, gfn_t start, gfn_t end); > > > > int (*link_private_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level, > > void *private_spt); > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > > index 0ec94c72895c..329333486e64 100644 > > --- a/arch/x86/kvm/mmu/mmu.c > > +++ b/arch/x86/kvm/mmu/mmu.c > > @@ -7908,6 +7908,7 @@ void kvm_arch_set_memory_attributes(struct kvm *kvm, > > gfn_t start, gfn_t end) > > { > > kvm_update_lpage_mixed_flag(kvm, slot, true, attrs, start, end); > > + static_call(kvm_x86_set_mem_attr)(kvm, slot, attrs, start, end); > > } > > > > void kvm_memory_attributes_create_memslot(struct kvm *kvm, > > -- > > 2.25.1 > > > > -- > > Isaku Yamahata <isaku.yamahata@xxxxxxxxx>