Re: [PATCH 1/3] KVM: gmem: allocate private data for the gmem inode

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



+Ackerley, who's also working on resurrecting the file system[*].  At a glance,
there appear to be non-trivial differences, e.g. Ackerley's version has a call
to security_inode_init_security_anon().  I've paged out much of the inode stuff,
so I trust Ackerley's judgment far, far more than my own :-)

[*] https://lore.kernel.org/all/d1940d466fc69472c8b6dda95df2e0522b2d8744.1726009989.git.ackerleytng@xxxxxxxxxx

On Fri, Nov 08, 2024, Paolo Bonzini wrote:
> In preparation for removing the usage of the uptodate flag,
> reintroduce the gmem filesystem type.  We need it in order to
> free the private inode information.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
>  include/uapi/linux/magic.h |   1 +
>  virt/kvm/guest_memfd.c     | 117 +++++++++++++++++++++++++++++++++----
>  virt/kvm/kvm_main.c        |   7 ++-
>  virt/kvm/kvm_mm.h          |   8 ++-
>  4 files changed, 119 insertions(+), 14 deletions(-)
> 
> diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
> index bb575f3ab45e..d856dd6a7ed9 100644
> --- a/include/uapi/linux/magic.h
> +++ b/include/uapi/linux/magic.h
> @@ -103,5 +103,6 @@
>  #define DEVMEM_MAGIC		0x454d444d	/* "DMEM" */
>  #define SECRETMEM_MAGIC		0x5345434d	/* "SECM" */
>  #define PID_FS_MAGIC		0x50494446	/* "PIDF" */
> +#define KVM_GUEST_MEM_MAGIC	0x474d454d	/* "GMEM" */
>  
>  #endif /* __LINUX_MAGIC_H__ */
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index 8f079a61a56d..3ea5a7597fd4 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -4,9 +4,74 @@
>  #include <linux/kvm_host.h>
>  #include <linux/pagemap.h>
>  #include <linux/anon_inodes.h>
> +#include <linux/pseudo_fs.h>
>  
>  #include "kvm_mm.h"
>  
> +/* Do all the filesystem crap just for evict_inode... */
> +
> +static struct vfsmount *kvm_gmem_mnt __read_mostly;
> +
> +static void gmem_evict_inode(struct inode *inode)
> +{
> +	kvfree(inode->i_private);
> +	truncate_inode_pages_final(&inode->i_data);
> +	clear_inode(inode);
> +}
> +
> +static const struct super_operations gmem_super_operations = {
> +	.drop_inode	= generic_delete_inode,
> +	.evict_inode    = gmem_evict_inode,
> +	.statfs         = simple_statfs,
> +};
> +
> +static int gmem_init_fs_context(struct fs_context *fc)
> +{
> +	struct pseudo_fs_context *ctx = init_pseudo(fc, KVM_GUEST_MEM_MAGIC);
> +	if (!ctx)
> +		return -ENOMEM;
> +
> +	ctx->ops = &gmem_super_operations;
> +	return 0;
> +}
> +
> +static struct file_system_type kvm_gmem_fs_type = {
> +	.name           = "kvm_gmemfs",
> +	.init_fs_context = gmem_init_fs_context,
> +	.kill_sb        = kill_anon_super,
> +};
> +
> +static struct file *kvm_gmem_create_file(const char *name, const struct file_operations *fops)
> +{
> +	struct inode *inode;
> +	struct file *file;
> +
> +	if (fops->owner && !try_module_get(fops->owner))
> +		return ERR_PTR(-ENOENT);
> +
> +	inode = alloc_anon_inode(kvm_gmem_mnt->mnt_sb);
> +	if (IS_ERR(inode)) {
> +		file = ERR_CAST(inode);
> +		goto err;
> +	}
> +	file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR, fops);
> +	if (IS_ERR(file))
> +		goto err_iput;
> +
> +	return file;
> +
> +err_iput:
> +	iput(inode);
> +err:
> +	module_put(fops->owner);
> +	return file;
> +}
> +
> +
> +struct kvm_gmem_inode {
> +	unsigned long flags;
> +};
> +
>  struct kvm_gmem {
>  	struct kvm *kvm;
>  	struct xarray bindings;
> @@ -308,9 +373,31 @@ static struct file_operations kvm_gmem_fops = {
>  	.fallocate	= kvm_gmem_fallocate,
>  };
>  
> -void kvm_gmem_init(struct module *module)
> +int kvm_gmem_init(struct module *module)
>  {
> +	int ret;
> +
> +	ret = register_filesystem(&kvm_gmem_fs_type);
> +	if (ret) {
> +		pr_err("kvm-gmem: cannot register file system (%d)\n", ret);
> +		return ret;
> +	}
> +
> +	kvm_gmem_mnt = kern_mount(&kvm_gmem_fs_type);
> +	if (IS_ERR(kvm_gmem_mnt)) {
> +		pr_err("kvm-gmem: kernel mount failed (%ld)\n", PTR_ERR(kvm_gmem_mnt));
> +		return PTR_ERR(kvm_gmem_mnt);
> +	}
> +
>  	kvm_gmem_fops.owner = module;
> +
> +	return 0;
> +}
> +
> +void kvm_gmem_exit(void)
> +{
> +	kern_unmount(kvm_gmem_mnt);
> +	unregister_filesystem(&kvm_gmem_fs_type);
>  }
>  
>  static int kvm_gmem_migrate_folio(struct address_space *mapping,
> @@ -394,15 +481,23 @@ static const struct inode_operations kvm_gmem_iops = {
>  
>  static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>  {
> -	const char *anon_name = "[kvm-gmem]";
> +	const char *gmem_name = "[kvm-gmem]";
> +	struct kvm_gmem_inode *i_gmem;
>  	struct kvm_gmem *gmem;
>  	struct inode *inode;
>  	struct file *file;
>  	int fd, err;
>  
> +	i_gmem = kvzalloc(sizeof(struct kvm_gmem_inode), GFP_KERNEL);
> +	if (!i_gmem)
> +		return -ENOMEM;
> +	i_gmem->flags = flags;
> +
>  	fd = get_unused_fd_flags(0);
> -	if (fd < 0)
> -		return fd;
> +	if (fd < 0) {
> +		err = fd;
> +		goto err_i_gmem;
> +	}
>  
>  	gmem = kzalloc(sizeof(*gmem), GFP_KERNEL);
>  	if (!gmem) {
> @@ -410,19 +505,19 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>  		goto err_fd;
>  	}
>  
> -	file = anon_inode_create_getfile(anon_name, &kvm_gmem_fops, gmem,
> -					 O_RDWR, NULL);
> +	file = kvm_gmem_create_file(gmem_name, &kvm_gmem_fops);
>  	if (IS_ERR(file)) {
>  		err = PTR_ERR(file);
>  		goto err_gmem;
>  	}
>  
> +	inode = file->f_inode;
> +
> +	file->f_mapping = inode->i_mapping;
> +	file->private_data = gmem;
>  	file->f_flags |= O_LARGEFILE;
>  
> -	inode = file->f_inode;
> -	WARN_ON(file->f_mapping != inode->i_mapping);
> -
> -	inode->i_private = (void *)(unsigned long)flags;
> +	inode->i_private = i_gmem;
>  	inode->i_op = &kvm_gmem_iops;
>  	inode->i_mapping->a_ops = &kvm_gmem_aops;
>  	inode->i_mode |= S_IFREG;
> @@ -444,6 +539,8 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>  	kfree(gmem);
>  err_fd:
>  	put_unused_fd(fd);
> +err_i_gmem:
> +	kvfree(i_gmem);
>  	return err;
>  }
>  
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 279e03029ce1..8b7b4e0eb639 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -6504,7 +6504,9 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
>  	if (WARN_ON_ONCE(r))
>  		goto err_vfio;
>  
> -	kvm_gmem_init(module);
> +	r = kvm_gmem_init(module);
> +	if (r)
> +		goto err_gmem;
>  
>  	r = kvm_init_virtualization();
>  	if (r)
> @@ -6525,6 +6527,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
>  err_register:
>  	kvm_uninit_virtualization();
>  err_virt:
> +	kvm_gmem_exit();
> +err_gmem:
>  	kvm_vfio_ops_exit();
>  err_vfio:
>  	kvm_async_pf_deinit();
> @@ -6556,6 +6560,7 @@ void kvm_exit(void)
>  	for_each_possible_cpu(cpu)
>  		free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
>  	kmem_cache_destroy(kvm_vcpu_cache);
> +	kvm_gmem_exit();
>  	kvm_vfio_ops_exit();
>  	kvm_async_pf_deinit();
>  	kvm_irqfd_exit();
> diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
> index 715f19669d01..91e4202574a8 100644
> --- a/virt/kvm/kvm_mm.h
> +++ b/virt/kvm/kvm_mm.h
> @@ -36,15 +36,17 @@ static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
>  #endif /* HAVE_KVM_PFNCACHE */
>  
>  #ifdef CONFIG_KVM_PRIVATE_MEM
> -void kvm_gmem_init(struct module *module);
> +int kvm_gmem_init(struct module *module);
> +void kvm_gmem_exit(void);
>  int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args);
>  int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot,
>  		  unsigned int fd, loff_t offset);
>  void kvm_gmem_unbind(struct kvm_memory_slot *slot);
>  #else
> -static inline void kvm_gmem_init(struct module *module)
> +static inline void kvm_gmem_exit(void) {}
> +static inline int kvm_gmem_init(struct module *module)
>  {
> -
> +	return 0;
>  }
>  
>  static inline int kvm_gmem_bind(struct kvm *kvm,
> -- 
> 2.43.5
> 
> 




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux