Re: [PATCH] drivers/virt: vmgenid: add vm generation id driver

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Oct 16, 2020 at 02:33:15PM +0000, Catangiu, Adrian Costin wrote:
> +config VMGENID
> +	tristate "Virtual Machine Generation ID driver"
> +	depends on ACPI
> +	default M

Unless this is required to boot a machine, this should be removed.

> +	help
> +	  This is a Virtual Machine Generation ID driver which provides
> +	  a virtual machine unique identifier. The provided UUID can be
> +	  watched through the FS interface exposed by this driver, and
> +	  thus can provide notifications for VM snapshot or cloning events.

Where is the uuid exposed in the filesystem?

> +	  This enables applications and libraries that store or cache
> +	  sensitive information, to know that they need to regenerate it
> +	  after process memory has been exposed to potential copying.

Module name?

> +
>  config FSL_HV_MANAGER
>  	tristate "Freescale hypervisor management driver"
>  	depends on FSL_SOC
> diff --git a/drivers/virt/Makefile b/drivers/virt/Makefile
> index fd33124..a1f8dcc 100644
> --- a/drivers/virt/Makefile
> +++ b/drivers/virt/Makefile
> @@ -4,4 +4,5 @@
>  #
>  
>  obj-$(CONFIG_FSL_HV_MANAGER)	+= fsl_hypervisor.o
> +obj-$(CONFIG_VMGENID)		+= vmgenid.o
>  obj-y				+= vboxguest/
> diff --git a/drivers/virt/vmgenid.c b/drivers/virt/vmgenid.c
> new file mode 100644
> index 0000000..d314c72
> --- /dev/null
> +++ b/drivers/virt/vmgenid.c
> @@ -0,0 +1,419 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Virtual Machine Generation ID driver
> + *
> + * Copyright (C) 2018 Red Hat Inc, Copyright (C) 2020 Amazon.com Inc

That's not how you write copyright lines, please fix up.

> + * All rights reserved.
> + *	Authors:
> + *	  Adrian Catangiu <acatan@xxxxxxxxxx>
> + *	  Or Idgar <oridgar@xxxxxxxxx>
> + *	  Gal Hammer <ghammer@xxxxxxxxxx>
> + *
> + */
> +#include <linux/acpi.h>
> +#include <linux/cdev.h>
> +#include <linux/kernel.h>
> +#include <linux/mm.h>
> +#include <linux/module.h>
> +#include <linux/poll.h>
> +#include <linux/uuid.h>
> +#include <linux/vmgenid.h>
> +
> +#define DEV_NAME "vmgenid"
> +ACPI_MODULE_NAME(DEV_NAME);
> +
> +struct dev_data {
> +	struct cdev       cdev;
> +	dev_t             dev_id;
> +	unsigned long     map_buf;
> +
> +	void              *uuid_iomap;
> +	uuid_t            uuid;
> +	wait_queue_head_t read_wait;
> +
> +	atomic_t          watchers;
> +	atomic_t          outdated_watchers;
> +	wait_queue_head_t outdated_wait;
> +};
> +
> +struct file_data {
> +	struct dev_data  *dev_data;
> +	uuid_t           acked_uuid;
> +};
> +
> +static bool vmgenid_uuid_matches(struct dev_data *priv, uuid_t *uuid)
> +{
> +	return !memcmp(uuid, &priv->uuid, sizeof(uuid_t));
> +}
> +
> +static void vmgenid_put_outdated_watchers(struct dev_data *priv)
> +{
> +	if (atomic_dec_and_test(&priv->outdated_watchers))
> +		wake_up_interruptible(&priv->outdated_wait);
> +}
> +
> +static int vmgenid_open(struct inode *inode, struct file *file)
> +{
> +	struct dev_data *priv =
> +		container_of(inode->i_cdev, struct dev_data, cdev);
> +	struct file_data *file_data =
> +		kzalloc(sizeof(struct file_data), GFP_KERNEL);
> +
> +	if (!file_data)
> +		return -ENOMEM;
> +
> +	file_data->acked_uuid = priv->uuid;
> +	file_data->dev_data = priv;
> +
> +	file->private_data = file_data;
> +	atomic_inc(&priv->watchers);
> +
> +	return 0;
> +}
> +
> +static int vmgenid_close(struct inode *inode, struct file *file)
> +{
> +	struct file_data *file_data = (struct file_data *) file->private_data;
> +	struct dev_data *priv = file_data->dev_data;
> +
> +	if (!vmgenid_uuid_matches(priv, &file_data->acked_uuid))
> +		vmgenid_put_outdated_watchers(priv);
> +	atomic_dec(&priv->watchers);
> +	kfree(file->private_data);
> +
> +	return 0;
> +}
> +
> +static ssize_t
> +vmgenid_read(struct file *file, char __user *ubuf, size_t nbytes, loff_t *ppos)
> +{
> +	struct file_data *file_data =
> +		(struct file_data *) file->private_data;
> +	struct dev_data *priv = file_data->dev_data;
> +	ssize_t ret;
> +
> +	if (nbytes == 0)
> +		return 0;
> +	/* disallow partial UUID reads */
> +	if (nbytes < sizeof(uuid_t))
> +		return -EINVAL;
> +	nbytes = sizeof(uuid_t);
> +
> +	if (vmgenid_uuid_matches(priv, &file_data->acked_uuid)) {
> +		if (file->f_flags & O_NONBLOCK)
> +			return -EAGAIN;
> +		ret = wait_event_interruptible(
> +			priv->read_wait,
> +			!vmgenid_uuid_matches(priv, &file_data->acked_uuid)
> +		);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	ret = copy_to_user(ubuf, &priv->uuid, nbytes);
> +	if (ret)
> +		return -EFAULT;
> +
> +	return nbytes;
> +}
> +
> +static ssize_t vmgenid_write(struct file *file, const char __user *ubuf,
> +				size_t count, loff_t *ppos)
> +{
> +	struct file_data *file_data = (struct file_data *) file->private_data;
> +	struct dev_data *priv = file_data->dev_data;
> +	uuid_t ack_uuid;
> +
> +	/* disallow partial UUID writes */
> +	if (count != sizeof(uuid_t))
> +		return -EINVAL;
> +	if (copy_from_user(&ack_uuid, ubuf, count))
> +		return -EFAULT;
> +	/* wrong UUID acknowledged */
> +	if (!vmgenid_uuid_matches(priv, &ack_uuid))
> +		return -EINVAL;
> +
> +	if (!vmgenid_uuid_matches(priv, &file_data->acked_uuid)) {
> +		/* update local view of UUID */
> +		file_data->acked_uuid = ack_uuid;
> +		vmgenid_put_outdated_watchers(priv);
> +	}
> +
> +	return (ssize_t)count;
> +}
> +
> +static __poll_t
> +vmgenid_poll(struct file *file, poll_table *wait)
> +{
> +	__poll_t mask = 0;
> +	struct file_data *file_data =
> +		(struct file_data *) file->private_data;
> +	struct dev_data *priv = file_data->dev_data;
> +
> +	if (!vmgenid_uuid_matches(priv, &file_data->acked_uuid))
> +		return EPOLLIN | EPOLLRDNORM;
> +
> +	poll_wait(file, &priv->read_wait, wait);
> +
> +	if (!vmgenid_uuid_matches(priv, &file_data->acked_uuid))
> +		mask = EPOLLIN | EPOLLRDNORM;
> +
> +	return mask;
> +}
> +
> +static long vmgenid_ioctl(struct file *file,
> +		unsigned int cmd, unsigned long arg)
> +{
> +	struct file_data *file_data =
> +		(struct file_data *) file->private_data;
> +	struct dev_data *priv = file_data->dev_data;
> +	struct timespec __user *timeout = (void *) arg;
> +	struct timespec kts;
> +	ktime_t until;
> +	int ret;
> +
> +	switch (cmd) {
> +	case VMGENID_GET_OUTDATED_WATCHERS:
> +		ret = atomic_read(&priv->outdated_watchers);
> +		break;
> +	case VMGENID_WAIT_WATCHERS:
> +		if (timeout) {
> +			ret = copy_from_user(&kts, timeout, sizeof(kts));
> +			if (ret)
> +				return -EFAULT;
> +			until = timespec_to_ktime(kts);

No validation of structure fields?

And you are exposing a kernel structure to userspace?

I don't see this function in Linus's tree right now, are you sure this
builds?


> +		} else {
> +			until = KTIME_MAX;
> +		}
> +
> +		ret = wait_event_interruptible_hrtimeout(
> +			priv->outdated_wait,
> +			!atomic_read(&priv->outdated_watchers),
> +			until
> +		);
> +		break;
> +	default:
> +		ret = -EINVAL;
> +		break;
> +	}
> +	return ret;
> +}
> +
> +static vm_fault_t vmgenid_vm_fault(struct vm_fault *vmf)
> +{
> +	struct page *page;
> +	struct file_data *file_data =
> +			(struct file_data *) vmf->vma->vm_private_data;

You do know you don't need to cast stuff like this, right?

Also run checkpatch.pl so maintainers are not grumpy and tell you to run
checkpatch.pl.

> +	struct dev_data *priv = file_data->dev_data;
> +
> +	if (priv->map_buf) {
> +		page = virt_to_page(priv->map_buf);
> +		get_page(page);
> +		vmf->page = page;
> +	}
> +
> +	return 0;
> +}
> +
> +static const struct vm_operations_struct vmgenid_vm_ops = {
> +	.fault = vmgenid_vm_fault,
> +};
> +
> +static int vmgenid_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> +	if (vma->vm_pgoff != 0 || vma_pages(vma) > 1)
> +		return -EINVAL;
> +
> +	if ((vma->vm_flags & VM_WRITE) != 0)
> +		return -EPERM;
> +
> +	vma->vm_ops = &vmgenid_vm_ops;
> +	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
> +	vma->vm_private_data = file->private_data;
> +
> +	return 0;
> +}
> +
> +static const struct file_operations fops = {
> +	.owner          = THIS_MODULE,
> +	.mmap           = vmgenid_mmap,
> +	.open           = vmgenid_open,
> +	.release        = vmgenid_close,
> +	.read           = vmgenid_read,
> +	.write          = vmgenid_write,
> +	.poll           = vmgenid_poll,
> +	.compat_ioctl   = vmgenid_ioctl,
> +	.unlocked_ioctl = vmgenid_ioctl,
> +};
> +
> +static int vmgenid_acpi_map(struct dev_data *priv, acpi_handle handle)
> +{
> +	int i;
> +	phys_addr_t phys_addr;
> +	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
> +	acpi_status status;
> +	union acpi_object *pss;
> +	union acpi_object *element;
> +
> +	status = acpi_evaluate_object(handle, "ADDR", NULL, &buffer);
> +	if (ACPI_FAILURE(status)) {
> +		ACPI_EXCEPTION((AE_INFO, status, "Evaluating ADDR"));
> +		return -ENODEV;
> +	}
> +	pss = buffer.pointer;
> +	if (!pss || pss->type != ACPI_TYPE_PACKAGE || pss->package.count != 2)
> +		return -EINVAL;
> +
> +	phys_addr = 0;
> +	for (i = 0; i < pss->package.count; i++) {
> +		element = &(pss->package.elements[i]);
> +		if (element->type != ACPI_TYPE_INTEGER)
> +			return -EINVAL;
> +		phys_addr |= element->integer.value << i * 32;
> +	}
> +
> +	priv->uuid_iomap = acpi_os_map_memory(phys_addr, sizeof(uuid_t));
> +	if (!priv->uuid_iomap) {
> +		pr_err("Could not map memory at 0x%llx, size %u",
> +			   phys_addr,
> +			   (u32)sizeof(uuid_t));
> +		return -ENOMEM;
> +	}
> +
> +	memcpy_fromio(&priv->uuid, priv->uuid_iomap, sizeof(uuid_t));
> +	memcpy((void *) priv->map_buf, &priv->uuid, sizeof(uuid_t));
> +
> +	return 0;
> +}
> +
> +static int vmgenid_acpi_add(struct acpi_device *device)
> +{
> +	int ret;
> +	struct dev_data *priv;
> +
> +	if (!device)
> +		return -EINVAL;
> +
> +	priv = kzalloc(sizeof(struct dev_data), GFP_KERNEL);
> +	if (!priv)
> +		return -ENOMEM;
> +
> +	priv->map_buf = get_zeroed_page(GFP_KERNEL);
> +	if (!priv->map_buf) {
> +		ret = -ENOMEM;
> +		goto free;
> +	}
> +
> +	device->driver_data = priv;
> +
> +	init_waitqueue_head(&priv->read_wait);
> +	atomic_set(&priv->watchers, 0);
> +	atomic_set(&priv->outdated_watchers, 0);
> +	init_waitqueue_head(&priv->outdated_wait);
> +
> +	ret = vmgenid_acpi_map(priv, device->handle);
> +	if (ret < 0)
> +		goto err;
> +
> +	ret = alloc_chrdev_region(&priv->dev_id, 0, 1, DEV_NAME);

Why is this not a misc device driver instead of messing around with all
of this "by hand"?

By doing things this way, you are not creating a device node in /dev/
automatically, right?  How did you test this?

> @@ -0,0 +1,22 @@
> +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
> +/*
> + * Copyright (c) 2020, Amazon.com Inc.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.

License boilerplate is not needed.

> + */
> +
> +#ifndef _UAPI_LINUX_VMGENID_H
> +#define _UAPI_LINUX_VMGENID_H
> +
> +#include <linux/ioctl.h>
> +#include <linux/time.h>
> +
> +#define VMGENID_IOCTL 0x2d
> +#define VMGENID_GET_OUTDATED_WATCHERS _IO(VMGENID_IOCTL, 1)
> +#define VMGENID_WAIT_WATCHERS         _IOW(VMGENID_IOCTL, 2, struct timespec)

Why do you need ioctls for this?  Why not just use read/write?

thanks,

greg k-h



[Index of Archives]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]     [Linux Resources]

  Powered by Linux