[PATCH v1] vdpa: Do not count the pages that were already pinned in the vhost-vDPA

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



We count pinned_vm as follow in vhost-vDPA

        lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
        if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
                ret = -ENOMEM;
                goto unlock;
        }
This means if we have two vDPA devices for the same VM the pages would be counted twice
So we add a tree to save the page that counted and we will not count it again

Signed-off-by: Cindy Lu <lulu@xxxxxxxxxx>
---
 drivers/vhost/vdpa.c     | 79 ++++++++++++++++++++++++++++++++++++++--
 include/linux/mm_types.h |  2 +
 2 files changed, 78 insertions(+), 3 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 05f5fd2af58f..48cb5c8264b5 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -24,6 +24,9 @@
 #include <linux/vhost.h>
 
 #include "vhost.h"
+#include <linux/rbtree.h>
+#include <linux/interval_tree.h>
+#include <linux/interval_tree_generic.h>
 
 enum {
 	VHOST_VDPA_BACKEND_FEATURES =
@@ -505,6 +508,50 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
 	mutex_unlock(&d->mutex);
 	return r;
 }
+int vhost_vdpa_add_range_ctx(struct rb_root_cached *root, u64 start, u64 last)
+{
+	struct interval_tree_node *new_node;
+
+	if (last < start)
+		return -EFAULT;
+
+	/* If the range being mapped is [0, ULONG_MAX], split it into two entries
+	 * otherwise its size would overflow u64.
+	 */
+	if (start == 0 && last == ULONG_MAX) {
+		u64 mid = last / 2;
+
+		vhost_vdpa_add_range_ctx(root, start, mid);
+		start = mid + 1;
+	}
+
+	new_node = kmalloc(sizeof(struct interval_tree_node), GFP_ATOMIC);
+	if (!new_node)
+		return -ENOMEM;
+
+	new_node->start = start;
+	new_node->last = last;
+
+	interval_tree_insert(new_node, root);
+
+	return 0;
+}
+
+void vhost_vdpa_del_range(struct rb_root_cached *root, u64 start, u64 last)
+{
+	struct interval_tree_node *new_node;
+
+	while ((new_node = interval_tree_iter_first(root, start, last))) {
+		interval_tree_remove(new_node, root);
+		kfree(new_node);
+	}
+}
+
+struct interval_tree_node *vhost_vdpa_search_range(struct rb_root_cached *root,
+						   u64 start, u64 last)
+{
+	return interval_tree_iter_first(root, start, last);
+}
 
 static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last)
 {
@@ -513,6 +560,7 @@ static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last)
 	struct vhost_iotlb_map *map;
 	struct page *page;
 	unsigned long pfn, pinned;
+	struct interval_tree_node *new_node = NULL;
 
 	while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
 		pinned = PFN_DOWN(map->size);
@@ -523,7 +571,18 @@ static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last)
 				set_page_dirty_lock(page);
 			unpin_user_page(page);
 		}
-		atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm);
+
+		new_node = vhost_vdpa_search_range(&dev->mm->root_for_vdpa,
+						   map->start,
+						   map->start + map->size - 1);
+
+		if (new_node) {
+			vhost_vdpa_del_range(&dev->mm->root_for_vdpa,
+					     map->start,
+					     map->start + map->size - 1);
+			atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm);
+		}
+
 		vhost_iotlb_map_free(iotlb, map);
 	}
 }
@@ -591,6 +650,7 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova,
 	struct vdpa_device *vdpa = v->vdpa;
 	const struct vdpa_config_ops *ops = vdpa->config;
 	int r = 0;
+	struct interval_tree_node *new_node = NULL;
 
 	r = vhost_iotlb_add_range_ctx(dev->iotlb, iova, iova + size - 1,
 				      pa, perm, opaque);
@@ -611,9 +671,22 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova,
 		return r;
 	}
 
-	if (!vdpa->use_va)
-		atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
+	if (!vdpa->use_va) {
+		new_node = vhost_vdpa_search_range(&dev->mm->root_for_vdpa,
+						   iova, iova + size - 1);
+
+		if (new_node == 0) {
+			r = vhost_vdpa_add_range_ctx(&dev->mm->root_for_vdpa,
+						     iova, iova + size - 1);
+			if (r) {
+				vhost_iotlb_del_range(dev->iotlb, iova,
+						      iova + size - 1);
+				return r;
+			}
 
+			atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
+		}
+	}
 	return 0;
 }
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5140e5feb486..46eaa6d0560b 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -634,6 +634,8 @@ struct mm_struct {
 #ifdef CONFIG_IOMMU_SUPPORT
 		u32 pasid;
 #endif
+		struct rb_root_cached root_for_vdpa;
+
 	} __randomize_layout;
 
 	/*
-- 
2.34.1




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux