Thanks for reviewing, Gavin! I'll also adopt these when I respin. Gavin Shan <gshan@xxxxxxxxxx> writes: > Hi Fuad, > > On 1/18/25 2:29 AM, Fuad Tabba wrote: >> From: Ackerley Tng <ackerleytng@xxxxxxxxxx> >> >> Using guest mem inodes allows us to store metadata for the backing >> memory on the inode. Metadata will be added in a later patch to >> support HugeTLB pages. >> >> Metadata about backing memory should not be stored on the file, since >> the file represents a guest_memfd's binding with a struct kvm, and >> metadata about backing memory is not unique to a specific binding and >> struct kvm. >> >> Signed-off-by: Ackerley Tng <ackerleytng@xxxxxxxxxx> >> Signed-off-by: Fuad Tabba <tabba@xxxxxxxxxx> >> --- >> include/uapi/linux/magic.h | 1 + >> virt/kvm/guest_memfd.c | 119 ++++++++++++++++++++++++++++++------- >> 2 files changed, 100 insertions(+), 20 deletions(-) >> >> diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h >> index bb575f3ab45e..169dba2a6920 100644 >> --- a/include/uapi/linux/magic.h >> +++ b/include/uapi/linux/magic.h >> @@ -103,5 +103,6 @@ >> #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */ >> #define SECRETMEM_MAGIC 0x5345434d /* "SECM" */ >> #define PID_FS_MAGIC 0x50494446 /* "PIDF" */ >> +#define GUEST_MEMORY_MAGIC 0x474d454d /* "GMEM" */ >> >> #endif /* __LINUX_MAGIC_H__ */ >> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c >> index 47a9f68f7b24..198554b1f0b5 100644 >> --- a/virt/kvm/guest_memfd.c >> +++ b/virt/kvm/guest_memfd.c >> @@ -1,12 +1,17 @@ >> // SPDX-License-Identifier: GPL-2.0 >> +#include <linux/fs.h> >> +#include <linux/mount.h> > > This can be dropped since "linux/mount.h" has been included to "linux/fs.h". > >> #include <linux/backing-dev.h> >> #include <linux/falloc.h> >> #include <linux/kvm_host.h> >> +#include <linux/pseudo_fs.h> >> #include <linux/pagemap.h> >> #include <linux/anon_inodes.h> >> >> #include "kvm_mm.h" >> >> +static struct vfsmount *kvm_gmem_mnt; >> + >> struct kvm_gmem { >> struct kvm *kvm; >> struct xarray bindings; >> @@ -307,6 +312,38 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn) >> return gfn - slot->base_gfn + slot->gmem.pgoff; >> } >> >> +static const struct super_operations kvm_gmem_super_operations = { >> + .statfs = simple_statfs, >> +}; >> + >> +static int kvm_gmem_init_fs_context(struct fs_context *fc) >> +{ >> + struct pseudo_fs_context *ctx; >> + >> + if (!init_pseudo(fc, GUEST_MEMORY_MAGIC)) >> + return -ENOMEM; >> + >> + ctx = fc->fs_private; >> + ctx->ops = &kvm_gmem_super_operations; >> + >> + return 0; >> +} >> + >> +static struct file_system_type kvm_gmem_fs = { >> + .name = "kvm_guest_memory", >> + .init_fs_context = kvm_gmem_init_fs_context, >> + .kill_sb = kill_anon_super, >> +}; >> + >> +static void kvm_gmem_init_mount(void) >> +{ >> + kvm_gmem_mnt = kern_mount(&kvm_gmem_fs); >> + BUG_ON(IS_ERR(kvm_gmem_mnt)); >> + >> + /* For giggles. Userspace can never map this anyways. */ >> + kvm_gmem_mnt->mnt_flags |= MNT_NOEXEC; >> +} >> + >> static struct file_operations kvm_gmem_fops = { >> .open = generic_file_open, >> .release = kvm_gmem_release, >> @@ -316,6 +353,8 @@ static struct file_operations kvm_gmem_fops = { >> void kvm_gmem_init(struct module *module) >> { >> kvm_gmem_fops.owner = module; >> + >> + kvm_gmem_init_mount(); >> } >> >> static int kvm_gmem_migrate_folio(struct address_space *mapping, >> @@ -397,11 +436,67 @@ static const struct inode_operations kvm_gmem_iops = { >> .setattr = kvm_gmem_setattr, >> }; >> >> +static struct inode *kvm_gmem_inode_make_secure_inode(const char *name, >> + loff_t size, u64 flags) >> +{ >> + const struct qstr qname = QSTR_INIT(name, strlen(name)); >> + struct inode *inode; >> + int err; >> + >> + inode = alloc_anon_inode(kvm_gmem_mnt->mnt_sb); >> + if (IS_ERR(inode)) >> + return inode; >> + >> + err = security_inode_init_security_anon(inode, &qname, NULL); >> + if (err) { >> + iput(inode); >> + return ERR_PTR(err); >> + } >> + >> + inode->i_private = (void *)(unsigned long)flags; >> + inode->i_op = &kvm_gmem_iops; >> + inode->i_mapping->a_ops = &kvm_gmem_aops; >> + inode->i_mode |= S_IFREG; >> + inode->i_size = size; >> + mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); >> + mapping_set_inaccessible(inode->i_mapping); >> + /* Unmovable mappings are supposed to be marked unevictable as well. */ >> + WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping)); >> + >> + return inode; >> +} >> + >> +static struct file *kvm_gmem_inode_create_getfile(void *priv, loff_t size, >> + u64 flags) >> +{ >> + static const char *name = "[kvm-gmem]"; >> + struct inode *inode; >> + struct file *file; >> + >> + if (kvm_gmem_fops.owner && !try_module_get(kvm_gmem_fops.owner)) >> + return ERR_PTR(-ENOENT); >> + > > The validation on 'kvm_gmem_fops.owner' can be removed since try_module_get() > and module_put() are friendly to a NULL parameter, even when CONFIG_MODULE_UNLOAD == N > > A module_put(kvm_gmem_fops.owner) is needed in the various erroneous cases in > this function. Otherwise, the reference count of the owner (module) will become > imbalanced on any errors. > Thanks for catching this! Will add module_put() for error paths. > >> + inode = kvm_gmem_inode_make_secure_inode(name, size, flags); >> + if (IS_ERR(inode)) >> + return ERR_CAST(inode); >> + > > ERR_CAST may be dropped since there is nothing to be casted or converted? > This cast is necessary as it casts from a struct inode * to a struct file *. >> + file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR, >> + &kvm_gmem_fops); >> + if (IS_ERR(file)) { >> + iput(inode); >> + return file; >> + } >> + >> + file->f_mapping = inode->i_mapping; >> + file->f_flags |= O_LARGEFILE; >> + file->private_data = priv; >> + > > 'file->f_mapping = inode->i_mapping' may be dropped since it's already correctly > set by alloc_file_pseudo(). > > alloc_file_pseudo > alloc_path_pseudo > alloc_file > alloc_empty_file > file_init_path // Set by this function > Thanks! > >> + return file; >> +} >> + >> static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) >> { >> - const char *anon_name = "[kvm-gmem]"; >> struct kvm_gmem *gmem; >> - struct inode *inode; >> struct file *file; >> int fd, err; >> >> @@ -415,32 +510,16 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) >> goto err_fd; >> } >> >> - file = anon_inode_create_getfile(anon_name, &kvm_gmem_fops, gmem, >> - O_RDWR, NULL); >> + file = kvm_gmem_inode_create_getfile(gmem, size, flags); >> if (IS_ERR(file)) { >> err = PTR_ERR(file); >> goto err_gmem; >> } >> >> - file->f_flags |= O_LARGEFILE; >> - >> - inode = file->f_inode; >> - WARN_ON(file->f_mapping != inode->i_mapping); >> - >> - inode->i_private = (void *)(unsigned long)flags; >> - inode->i_op = &kvm_gmem_iops; >> - inode->i_mapping->a_ops = &kvm_gmem_aops; >> - inode->i_mode |= S_IFREG; >> - inode->i_size = size; >> - mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); >> - mapping_set_inaccessible(inode->i_mapping); >> - /* Unmovable mappings are supposed to be marked unevictable as well. */ >> - WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping)); >> - >> kvm_get_kvm(kvm); >> gmem->kvm = kvm; >> xa_init(&gmem->bindings); >> - list_add(&gmem->entry, &inode->i_mapping->i_private_list); >> + list_add(&gmem->entry, &file_inode(file)->i_mapping->i_private_list); >> >> fd_install(fd, file); >> return fd; > > Thanks, > Gavin