[PATCH v3 6/6] KVM: introduce readonly memslot

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



In current code, if we map a readonly memory space from host to guest
and the page is not currently mapped in the host, we will get a fault-pfn
and async is not allowed, then the vm will crash

Address Avi's idea, we introduce readonly memory region to map ROM/ROMD
to the guest

Signed-off-by: Xiao Guangrong <xiaoguangrong@xxxxxxxxxxxxxxxxxx>
---
 Documentation/virtual/kvm/api.txt |    9 ++++--
 arch/x86/kvm/x86.c                |    1 +
 include/linux/kvm.h               |    4 ++-
 virt/kvm/kvm_main.c               |   61 ++++++++++++++++++++++++++++--------
 4 files changed, 57 insertions(+), 18 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 310fe50..a97ee90 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -857,7 +857,8 @@ struct kvm_userspace_memory_region {
 };

 /* for kvm_memory_region::flags */
-#define KVM_MEM_LOG_DIRTY_PAGES  1UL
+#define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
+#define KVM_MEM_READONLY	(1UL << 1)

 This ioctl allows the user to create or modify a guest physical memory
 slot.  When changing an existing slot, it may be moved in the guest
@@ -873,9 +874,11 @@ It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr
 be identical.  This allows large pages in the guest to be backed by large
 pages in the host.

-The flags field supports just one flag, KVM_MEM_LOG_DIRTY_PAGES, which
+The flags field supports two flag, KVM_MEM_LOG_DIRTY_PAGES, which
 instructs kvm to keep track of writes to memory within the slot.  See
-the KVM_GET_DIRTY_LOG ioctl.
+the KVM_GET_DIRTY_LOG ioctl. Another flag is KVM_MEM_READONLY, which
++indicates the guest memory is read-only, that means, guest is only allowed
++to read it. Writes will be posted to userspace as KVM_EXIT_MMIO exits.

 When the KVM_CAP_SYNC_MMU capability, changes in the backing of the memory
 region are automatically reflected into the guest.  For example, an mmap()
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a01a424..994f47b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2148,6 +2148,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_GET_TSC_KHZ:
 	case KVM_CAP_PCI_2_3:
 	case KVM_CAP_KVMCLOCK_CTRL:
+	case KVM_CAP_READONLY_MEM:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index dc3aa2a..892d673 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -102,7 +102,8 @@ struct kvm_userspace_memory_region {
 };

 /* for kvm_memory_region::flags */
-#define KVM_MEM_LOG_DIRTY_PAGES  1UL
+#define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
+#define KVM_MEM_READONLY	(1UL << 1)

 /* for KVM_IRQ_LINE */
 struct kvm_irq_level {
@@ -617,6 +618,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_GET_SMMU_INFO 78
 #define KVM_CAP_S390_COW 79
 #define KVM_CAP_PPC_ALLOC_HTAB 80
+#define KVM_CAP_READONLY_MEM 81

 #ifdef KVM_CAP_IRQ_ROUTING

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4b96bc2..b551db1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -688,7 +688,7 @@ void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new)

 static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
 {
-	if (mem->flags & ~KVM_MEM_LOG_DIRTY_PAGES)
+	if (mem->flags & ~(KVM_MEM_LOG_DIRTY_PAGES | KVM_MEM_READONLY))
 		return -EINVAL;

 	return 0;
@@ -1033,10 +1033,11 @@ out:
 	return size;
 }

-static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
-				     gfn_t *nr_pages)
+static unsigned long __gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
+				     gfn_t *nr_pages, bool write)
 {
-	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
+	if (!slot || slot->flags & KVM_MEMSLOT_INVALID ||
+	      ((slot->flags & KVM_MEM_READONLY) && write))
 		return bad_hva();

 	if (nr_pages)
@@ -1045,6 +1046,12 @@ static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
 	return gfn_to_hva_memslot(slot, gfn);
 }

+static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
+				     gfn_t *nr_pages)
+{
+	return __gfn_to_hva_many(slot, gfn, nr_pages, true);
+}
+
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 {
 	return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL);
@@ -1057,7 +1064,7 @@ EXPORT_SYMBOL_GPL(gfn_to_hva);
  */
 static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn)
 {
-	return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL);
+	return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false);
 }

 static int kvm_read_hva(void *data, void *hva, int len)
@@ -1098,6 +1105,34 @@ static inline int check_user_page_hwpoison(unsigned long addr)
 	return rc == -EHWPOISON;
 }

+static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault)
+{
+	if (unlikely(!(vma->vm_flags & VM_READ)))
+		return false;
+
+	if (write_fault && (unlikely(!(vma->vm_flags & VM_WRITE))))
+		return false;
+
+	return true;
+}
+
+static int hva_to_pfn_fast(unsigned long addr, bool atomic, bool *async,
+			bool slot_writable, bool *writable, struct page **page)
+{
+	int npages = 0;
+
+	if (!slot_writable)
+		return 0;
+
+	if (writable)
+		*writable = true;
+
+	if (atomic || async)
+		npages = __get_user_pages_fast(addr, 1, 1, page);
+
+	return npages;
+}
+
 static pfn_t hva_to_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
 			unsigned long addr, bool atomic, bool *async,
 			bool write_fault, bool *writable)
@@ -1105,18 +1140,16 @@ static pfn_t hva_to_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
 	struct page *page[1];
 	int npages = 0;
 	pfn_t pfn;
+	bool slot_writable = !(slot->flags & KVM_MEM_READONLY);

 	/* we can do it either atomically or asynchronously, not both */
 	BUG_ON(atomic && async);

 	BUG_ON(!write_fault && !writable);
+	BUG_ON(write_fault && !slot_writable);

-	if (writable)
-		*writable = true;
-
-	if (atomic || async)
-		npages = __get_user_pages_fast(addr, 1, 1, page);
-
+	npages = hva_to_pfn_fast(addr, atomic, async, slot_writable,
+				 writable, page);
 	if (unlikely(npages != 1) && !atomic) {
 		might_sleep();

@@ -1133,7 +1166,7 @@ static pfn_t hva_to_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
 						     page);

 		/* map read fault as writable if possible */
-		if (unlikely(!write_fault) && npages == 1) {
+		if (unlikely(!write_fault) && npages == 1 && slot_writable) {
 			struct page *wpage[1];

 			npages = __get_user_pages_fast(addr, 1, 1, wpage);
@@ -1169,7 +1202,7 @@ static pfn_t hva_to_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
 				vma->vm_pgoff;
 			BUG_ON(!kvm_is_mmio_pfn(pfn));
 		} else {
-			if (async && (vma->vm_flags & VM_WRITE))
+			if (async && vma_is_valid(vma, write_fault))
 				*async = true;
 			pfn = get_fault_pfn();
 		}
@@ -1190,7 +1223,7 @@ static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
 		*async = false;

 	slot = gfn_to_memslot(kvm, gfn);
-	addr = gfn_to_hva_many(slot, gfn, NULL);
+	addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault);

 	if (kvm_is_error_hva(addr)) {
 		get_page(bad_page);
-- 
1.7.7.6

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux