On Thu, Dec 23, 2021, Chao Peng wrote: > Extend the memslot definition to provide fd-based private memory support > by adding two new fields(fd/ofs). The memslot then can maintain memory > for both shared and private pages in a single memslot. Shared pages are > provided in the existing way by using userspace_addr(hva) field and > get_user_pages() while private pages are provided through the new > fields(fd/ofs). Since there is no 'hva' concept anymore for private > memory we cannot call get_user_pages() to get a pfn, instead we rely on > the newly introduced MEMFD_OPS callbacks to do the same job. > > This new extension is indicated by a new flag KVM_MEM_PRIVATE. > > Signed-off-by: Yu Zhang <yu.c.zhang@xxxxxxxxxxxxxxx> > Signed-off-by: Chao Peng <chao.p.peng@xxxxxxxxxxxxxxx> > --- > include/linux/kvm_host.h | 10 ++++++++++ > include/uapi/linux/kvm.h | 12 ++++++++++++ > 2 files changed, 22 insertions(+) > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index f8ed799e8674..2cd35560c44b 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -460,8 +460,18 @@ struct kvm_memory_slot { > u32 flags; > short id; > u16 as_id; > + u32 fd; There should be no need to store the fd in the memslot, the fd should be unneeded outside of __kvm_set_memory_region(), e.g. diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1caebded52c4..4e43262887a3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2029,10 +2029,10 @@ int __kvm_set_memory_region(struct kvm *kvm, new->npages = npages; new->flags = mem->flags; new->userspace_addr = mem->userspace_addr; - new->fd = mem->fd; - new->file = NULL; - new->ofs = mem->ofs; - + if (mem->flags & KVM_MEM_PRIVATE) { + new->private_file = fget(mem->private_fd); + new->private_offset = mem->private_offset; + } r = kvm_set_memslot(kvm, old, new, change); if (r) kfree(new); > + struct file *file; Please use more descriptive names, shaving characters is not at all priority. > + u64 ofs; I believe this should be loff_t. struct file *private_file; struct loff_t private_offset; > }; > > +static inline bool kvm_slot_is_private(const struct kvm_memory_slot *slot) > +{ > + if (slot && (slot->flags & KVM_MEM_PRIVATE)) > + return true; > + return false; return slot && (slot->flags & KVM_MEM_PRIVATE); > +} > + > static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot *slot) > { > return slot->flags & KVM_MEM_LOG_DIRTY_PAGES; > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 1daa45268de2..41434322fa23 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -103,6 +103,17 @@ struct kvm_userspace_memory_region { > __u64 userspace_addr; /* start of the userspace allocated memory */ > }; > > +struct kvm_userspace_memory_region_ext { > + __u32 slot; > + __u32 flags; > + __u64 guest_phys_addr; > + __u64 memory_size; /* bytes */ > + __u64 userspace_addr; /* hva */ Would it make sense to embed "struct kvm_userspace_memory_region"? > + __u64 ofs; /* offset into fd */ > + __u32 fd; Again, use descriptive names, then comments like "offset into fd" are unnecessary. __u64 private_offset; __u32 private_fd; > + __u32 padding[5]; > +}; > + > /* > * The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace, > * other bits are reserved for kvm internal use which are defined in > @@ -110,6 +121,7 @@ struct kvm_userspace_memory_region { > */ > #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) > #define KVM_MEM_READONLY (1UL << 1) > +#define KVM_MEM_PRIVATE (1UL << 2) > > /* for KVM_IRQ_LINE */ > struct kvm_irq_level { > -- > 2.17.1 >