Re: [RFC PATCH v4 02/18] add memory map/unmap support for VM introspection on the guest side

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2017-12-18 02:06 PM, Adalber Lazăr wrote:
From: Adalbert Lazar <alazar@xxxxxxxxxxxxxxx>

An introspection tool running in a dedicated VM can use the new device
(/dev/kvmmem) to map memory from other introspected VM-s.

Two ioctl operations are supported:
   - KVM_INTRO_MEM_MAP/struct kvmi_mem_map
   - KVM_INTRO_MEM_UNMAP/unsigned long

In order to map an introspected gpa to the local gva, the process using
this device needs to obtain a token from the host KVMI subsystem (see
Documentation/virtual/kvm/kvmi.rst - KVMI_GET_MAP_TOKEN).

Both operations use hypercalls (KVM_HC_MEM_MAP, KVM_INTRO_MEM_UNMAP)
to pass the requests to the host kernel/KVMi (see hypercalls.txt).

Signed-off-by: Mircea Cîrjaliu <mcirjaliu@xxxxxxxxxxxxxxx>
---
  arch/x86/Kconfig                  |   9 +
  arch/x86/include/asm/kvmi_guest.h |  10 +
  arch/x86/kernel/Makefile          |   1 +
  arch/x86/kernel/kvmi_mem_guest.c  |  26 +++
  virt/kvm/kvmi_mem_guest.c         | 379 ++++++++++++++++++++++++++++++++++++++
  5 files changed, 425 insertions(+)
  create mode 100644 arch/x86/include/asm/kvmi_guest.h
  create mode 100644 arch/x86/kernel/kvmi_mem_guest.c
  create mode 100644 virt/kvm/kvmi_mem_guest.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8eed3f94bfc7..6e2548f4d44c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -782,6 +782,15 @@ config KVM_DEBUG_FS
  	  Statistics are displayed in debugfs filesystem. Enabling this option
  	  may incur significant overhead.
+config KVMI_MEM_GUEST
+	bool "KVM Memory Introspection support on Guest"
+	depends on KVM_GUEST
+	default n
+	---help---
+	  This option enables functions and hypercalls for security applications
+	  running in a separate VM to control the execution of other VM-s, query
+	  the state of the vCPU-s (GPR-s, MSR-s etc.).
+
  config PARAVIRT_TIME_ACCOUNTING
  	bool "Paravirtual steal time accounting"
  	depends on PARAVIRT
diff --git a/arch/x86/include/asm/kvmi_guest.h b/arch/x86/include/asm/kvmi_guest.h
new file mode 100644
index 000000000000..c7ed53a938e0
--- /dev/null
+++ b/arch/x86/include/asm/kvmi_guest.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __KVMI_GUEST_H__
+#define __KVMI_GUEST_H__
+
+long kvmi_arch_map_hc(struct kvmi_map_mem_token *tknp,
+	gpa_t req_gpa, gpa_t map_gpa);
+long kvmi_arch_unmap_hc(gpa_t map_gpa);
+
+
+#endif
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 81bb565f4497..fdb54b65e46e 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -111,6 +111,7 @@ obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
  obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
  obj-$(CONFIG_PARAVIRT_CLOCK)	+= pvclock.o
  obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
+obj-$(CONFIG_KVMI_MEM_GUEST)	+= kvmi_mem_guest.o ../../../virt/kvm/kvmi_mem_guest.o
obj-$(CONFIG_EISA) += eisa.o
  obj-$(CONFIG_PCSPKR_PLATFORM)	+= pcspeaker.o
diff --git a/arch/x86/kernel/kvmi_mem_guest.c b/arch/x86/kernel/kvmi_mem_guest.c
new file mode 100644
index 000000000000..c4e2613f90f3
--- /dev/null
+++ b/arch/x86/kernel/kvmi_mem_guest.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM introspection guest implementation
+ *
+ * Copyright (C) 2017 Bitdefender S.R.L.
+ *
+ * Author:
+ *   Mircea Cirjaliu <mcirjaliu@xxxxxxxxxxxxxxx>
+ */
+
+#include <uapi/linux/kvmi.h>
+#include <uapi/linux/kvm_para.h>
+#include <linux/kvm_types.h>
+#include <asm/kvm_para.h>
+
+long kvmi_arch_map_hc(struct kvmi_map_mem_token *tknp,
+		       gpa_t req_gpa, gpa_t map_gpa)
+{
+	return kvm_hypercall3(KVM_HC_MEM_MAP, (unsigned long)tknp,
+			      req_gpa, map_gpa);
+}
+
+long kvmi_arch_unmap_hc(gpa_t map_gpa)
+{
+	return kvm_hypercall1(KVM_HC_MEM_UNMAP, map_gpa);
+}
diff --git a/virt/kvm/kvmi_mem_guest.c b/virt/kvm/kvmi_mem_guest.c
new file mode 100644
index 000000000000..118c22ca47c5
--- /dev/null
+++ b/virt/kvm/kvmi_mem_guest.c
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM introspection guest implementation
+ *
+ * Copyright (C) 2017 Bitdefender S.R.L.
+ *
+ * Author:
+ *   Mircea Cirjaliu <mcirjaliu@xxxxxxxxxxxxxxx>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/types.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm_para.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/rmap.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <uapi/linux/kvmi.h>
+#include <asm/kvmi_guest.h>
+
+#define ASSERT(exp) BUG_ON(!(exp))
+
+
+static struct list_head file_list;
+static spinlock_t file_lock;
+
+struct file_map {
+	struct list_head file_list;
+	struct file *file;
+	struct list_head map_list;
+	struct mutex lock;
+	int active;	/* for tearing down */
+};
+
+struct page_map {
+	struct list_head map_list;
+	__u64 gpa;
+	unsigned long vaddr;
+	unsigned long paddr;
+};
+
+
+static int kvm_dev_open(struct inode *inodep, struct file *filp)
+{
+	struct file_map *fmp;
+
+	pr_debug("kvmi: file %016lx opened by process %s\n",
+		 (unsigned long) filp, current->comm);
+
+	/* link the file 1:1 with such a structure */
+	fmp = kmalloc(sizeof(struct file_map), GFP_KERNEL);

I think this is supposed to be "kmalloc(sizeof(*fmp), GFP_KERNEL)".

+	if (fmp == NULL)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&fmp->file_list);
+	fmp->file = filp;
+	filp->private_data = fmp;
+	INIT_LIST_HEAD(&fmp->map_list);
+	mutex_init(&fmp->lock);
+	fmp->active = 1;
+
+	/* add the entry to the global list */
+	spin_lock(&file_lock);
+	list_add_tail(&fmp->file_list, &file_list);
+	spin_unlock(&file_lock);
+
+	return 0;
+}
+
+/* actually does the mapping of a page */
+static long _do_mapping(struct kvmi_mem_map *map_req, struct page_map *pmap)

Here you have a "struct page_map" and call it pmap. However, in the rest of the code, whenever there's a "struct page_map" it's called pmp. It seems that it would be good to stay consistent with the naming, so perhaps rename it here to pmp as well?


+{
+	unsigned long paddr;
+	struct vm_area_struct *vma = NULL; > +	struct page *page;

Out of curiosity, why do you set "*vma = NULL" but not "*page = NULL"?

+	long result;
+
+	pr_debug("kvmi: mapping remote GPA %016llx into %016llx\n",
+		 map_req->gpa, map_req->gva);
+
+	/* check access to memory location */
+	if (!access_ok(VERIFY_READ, map_req->gva, PAGE_SIZE)) {
+		pr_err("kvmi: invalid virtual address for mapping\n");
+		return -EINVAL;
+	}
+
+	down_read(&current->mm->mmap_sem);
+
+	/* find the page to be replaced */
+	vma = find_vma(current->mm, map_req->gva);
+	if (IS_ERR_OR_NULL(vma)) {
+		result = PTR_ERR(vma);
+		pr_err("kvmi: find_vma() failed with result %ld\n", result);
+		goto out;
+	}
+
+	page = follow_page(vma, map_req->gva, 0);
+	if (IS_ERR_OR_NULL(page)) {
+		result = PTR_ERR(page);
+		pr_err("kvmi: follow_page() failed with result %ld\n", result);
+		goto out;
+	}
+
+	if (IS_ENABLED(CONFIG_DEBUG_VM))
+		dump_page(page, "page to map_req into");
+
+	WARN(is_zero_pfn(page_to_pfn(page)), "zero-page still mapped");
+
+	/* get the physical address and store it in page_map */
+	paddr = page_to_phys(page);
+	pr_debug("kvmi: page phys addr %016lx\n", paddr);
+	pmap->paddr = paddr;
+
+	/* last thing to do is host mapping */
+	result = kvmi_arch_map_hc(&map_req->token, map_req->gpa, paddr);
+	if (IS_ERR_VALUE(result)) {
+		pr_err("kvmi: HC failed with result %ld\n", result);
+		goto out;
+	}
+
+out:
+	up_read(&current->mm->mmap_sem);
+
+	return result;
+}
+
+/* actually does the unmapping of a page */
+static long _do_unmapping(unsigned long paddr)
+{
+	long result;
+
+	pr_debug("kvmi: unmapping request for phys addr %016lx\n", paddr);
+
+	/* local GPA uniquely identifies the mapping on the host */
+	result = kvmi_arch_unmap_hc(paddr);
+	if (IS_ERR_VALUE(result))
+		pr_warn("kvmi: HC failed with result %ld\n", result);
+
+	return result;
+}
+
+static long kvm_dev_ioctl_map(struct file_map *fmp, struct kvmi_mem_map *map)
+{
+	struct page_map *pmp;
+	long result = 0;

Out of curiosity again, why do you set "result = 0" here when it's always set before used (and, for e.g., _do_unmapping() doesn't do "result = 0")?

+
+	if (!access_ok(VERIFY_READ, map->gva, PAGE_SIZE))
+		return -EINVAL;
+	if (!access_ok(VERIFY_WRITE, map->gva, PAGE_SIZE))
+		return -EINVAL;
+
+	/* prepare list entry */
+	pmp = kmalloc(sizeof(struct page_map), GFP_KERNEL);

This should also probably be "kmalloc(sizeof(*pmp), GFP_KERNEL)".

+	if (pmp == NULL)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&pmp->map_list);
+	pmp->gpa = map->gpa;
+	pmp->vaddr = map->gva;
+
+	/* acquire the file mapping */
+	mutex_lock(&fmp->lock);
+
+	/* check if other thread is closing the file */
+	if (!fmp->active) {
+		result = -ENODEV;
+		pr_warn("kvmi: unable to map, file is being closed\n");
+		goto out_err;
+	}
+
+	/* do the actual mapping */
+	result = _do_mapping(map, pmp);
+	if (IS_ERR_VALUE(result))
+		goto out_err;
+
+	/* link to list */
+	list_add_tail(&pmp->map_list, &fmp->map_list);
+
+	/* all fine */
+	result = 0;
+	goto out_finalize;
+
+out_err:
+	kfree(pmp);
+
+out_finalize:
+	mutex_unlock(&fmp->lock);
+
+	return result;
+}
+
+static long kvm_dev_ioctl_unmap(struct file_map *fmp, unsigned long vaddr)
+{
+	struct list_head *cur;
+	struct page_map *pmp;
+	bool found = false;
+
+	/* acquire the file */
+	mutex_lock(&fmp->lock);
+
+	/* check if other thread is closing the file */
+	if (!fmp->active) {
+		mutex_unlock(&fmp->lock);

Wouldn't this be better replaced with a "goto out_err" like in kvm_dev_ioctl_map()?

+		pr_warn("kvmi: unable to unmap, file is being closed\n");
+		return -ENODEV;
+	}
+
+	/* check that this address belongs to us */
+	list_for_each(cur, &fmp->map_list) {
+		pmp = list_entry(cur, struct page_map, map_list);
+
+		/* found */
+		if (pmp->vaddr == vaddr) {
+			found = true;
+			break;
+		}
+	}
+
+	/* not found ? */
+	if (!found) {
+		mutex_unlock(&fmp->lock);

Here too: "goto out_err".

+		pr_err("kvmi: address %016lx not mapped\n", vaddr);
+		return -ENOENT;
+	}
+
+	/* decouple guest mapping */
+	list_del(&pmp->map_list);
+	mutex_unlock(&fmp->lock);

In kvm_dev_ioctl_map(), the fmp mutex is held across the _do_mapping() call. Is there any particular reason why here the mutex doesn't need to be held across the _do_unmapping() call? Or was that more an artifact of having a common "out_err" exit in kvm_dev_ioctl_map()?

+
+	/* unmap & ignore result */
+	_do_unmapping(pmp->paddr);
+
+	/* free guest mapping */
+	kfree(pmp);
+
+	return 0;
+}
+
+static long kvm_dev_ioctl(struct file *filp,
+			  unsigned int ioctl, unsigned long arg)
+{
+	void __user *argp = (void __user *) arg;
+	struct file_map *fmp;
+	long result;
+
+	/* minor check */
+	fmp = filp->private_data;
+	ASSERT(fmp->file == filp);
+
+	switch (ioctl) {
+	case KVM_INTRO_MEM_MAP: {
+		struct kvmi_mem_map map;
+
+		result = -EFAULT;
+		if (copy_from_user(&map, argp, sizeof(map)))
+			break;
+
+		result = kvm_dev_ioctl_map(fmp, &map);
+		if (IS_ERR_VALUE(result))
+			break;
+
+		result = 0;
+		break;
+	}

Since kvm_dev_ioctl_map() either returns an error or 0, couldn't this just be reduced to:
		result = kvm_dev_ioctl_map(fmap, &map);
		break;
	}

+	case KVM_INTRO_MEM_UNMAP: {
+		unsigned long vaddr = (unsigned long) arg;
+
+		result = kvm_dev_ioctl_unmap(fmp, vaddr);
+		if (IS_ERR_VALUE(result))
+			break;
+
+		result = 0;
+		break;
+	}

Ditto here.

+	default:
+		pr_err("kvmi: ioctl %d not implemented\n", ioctl);
+		result = -ENOTTY;
+	}
+
+	return result;
+}
+
+static int kvm_dev_release(struct inode *inodep, struct file *filp)
+{
+	int result = 0;

You set "result = 0" here, but result isn't used until the end, and just to return it.

+	struct file_map *fmp;
+	struct list_head *cur, *next;
+	struct page_map *pmp;
+
+	pr_debug("kvmi: file %016lx closed by process %s\n",
+		 (unsigned long) filp, current->comm);
+
+	/* acquire the file */
+	fmp = filp->private_data;
+	mutex_lock(&fmp->lock);
+
+	/* mark for teardown */
+	fmp->active = 0;
+
+	/* release mappings taken on this instance of the file */
+	list_for_each_safe(cur, next, &fmp->map_list) {
+		pmp = list_entry(cur, struct page_map, map_list);
+
+		/* unmap address */
+		_do_unmapping(pmp->paddr);
+
+		/* decouple & free guest mapping */
+		list_del(&pmp->map_list);
+		kfree(pmp);
+	}
+
+	/* done processing this file mapping */
+	mutex_unlock(&fmp->lock);
+
+	/* decouple file mapping */
+	spin_lock(&file_lock);
+	list_del(&fmp->file_list);
+	spin_unlock(&file_lock);
+
+	/* free it */
+	kfree(fmp);
+
+	return result;

This is the first time result is used. Couldn't this just be replaced with "return 0"?

+}
+
+
+static const struct file_operations kvmmem_ops = {
+	.open		= kvm_dev_open,
+	.unlocked_ioctl = kvm_dev_ioctl,
+	.compat_ioctl   = kvm_dev_ioctl,
+	.release	= kvm_dev_release,
+};

Here you have all the rvals aligned...

+
+static struct miscdevice kvm_mem_dev = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "kvmmem",
+	.fops = &kvmmem_ops,
+};

...but here you don't. I'm not sure what the "proper" style is, but I think it should at least just be consistent.

+
+int __init kvm_intro_guest_init(void)
+{
+	int result;
+
+	if (!kvm_para_available()) {
+		pr_err("kvmi: paravirt not available\n");
+		return -EPERM;
+	}
+
+	result = misc_register(&kvm_mem_dev);
+	if (result) {
+		pr_err("kvmi: misc device register failed: %d\n", result);
+		return result;
+	}
+
+	INIT_LIST_HEAD(&file_list);
+	spin_lock_init(&file_lock);
+
+	pr_info("kvmi: guest introspection device created\n");
+
+	return 0;
+}
+
+void kvm_intro_guest_exit(void)
+{
+	misc_deregister(&kvm_mem_dev);
+}
+
+module_init(kvm_intro_guest_init)
+module_exit(kvm_intro_guest_exit)



Patrick

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>



[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]
  Powered by Linux