[RFC PATCH v2 4/5] hugetlb_cgroup: Add accounting for shared mappings

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



For shared mappings, the pointer to the hugetlb_cgroup to uncharge lives
in the resv_map entries, in file_region->reservation_counter.

When a file_region entry is added to the resv_map via region_add, we
also charge the appropriate hugetlb_cgroup and put the pointer to that
in file_region->reservation_counter. This is slightly delicate since we
need to not modify the resv_map until we know that charging the
reservation has succeeded. If charging doesn't succeed, we report the
error to the caller, so that the kernel fails the reservation.

On region_del, which is when the hugetlb memory is unreserved, we delete
the file_region entry in the resv_map, but also uncharge the
file_region->reservation_counter.

---
 mm/hugetlb.c | 208 +++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 170 insertions(+), 38 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 235996aef6618..d76e3137110ab 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -242,8 +242,72 @@ struct file_region {
 	struct list_head link;
 	long from;
 	long to;
+#ifdef CONFIG_CGROUP_HUGETLB
+	/*
+	 * On shared mappings, each reserved region appears as a struct
+	 * file_region in resv_map. These fields hold the info needed to
+	 * uncharge each reservation.
+	 */
+	struct page_counter *reservation_counter;
+	unsigned long pages_per_hpage;
+#endif
 };

+/* Must be called with resv->lock held. Calling this with dry_run == true will
+ * count the number of pages added but will not modify the linked list.
+ */
+static long consume_regions_we_overlap_with(struct file_region *rg,
+		struct list_head *head, long f, long *t,
+		struct hugetlb_cgroup *h_cg,
+		struct hstate *h,
+		bool dry_run)
+{
+	long add = 0;
+	struct file_region *trg = NULL, *nrg = NULL;
+
+	/* Consume any regions we now overlap with. */
+	nrg = rg;
+	list_for_each_entry_safe(rg, trg, rg->link.prev, link) {
+		if (&rg->link == head)
+			break;
+		if (rg->from > *t)
+			break;
+
+		/* If this area reaches higher then extend our area to
+		 * include it completely.  If this is not the first area
+		 * which we intend to reuse, free it.
+		 */
+		if (rg->to > *t)
+			*t = rg->to;
+		if (rg != nrg) {
+			/* Decrement return value by the deleted range.
+			 * Another range will span this area so that by
+			 * end of routine add will be >= zero
+			 */
+			add -= (rg->to - rg->from);
+			if (!dry_run) {
+				list_del(&rg->link);
+				kfree(rg);
+			}
+		}
+	}
+
+	add += (nrg->from - f);		/* Added to beginning of region */
+	add += *t - nrg->to;		/* Added to end of region */
+
+	if (!dry_run) {
+		nrg->from = f;
+		nrg->to = *t;
+#ifdef CONFIG_CGROUP_HUGETLB
+		nrg->reservation_counter =
+			&h_cg->reserved_hugepage[hstate_index(h)];
+		nrg->pages_per_hpage = pages_per_huge_page(h);
+#endif
+	}
+
+	return add;
+}
+
 /*
  * Add the huge page range represented by [f, t) to the reserve
  * map.  In the normal case, existing regions will be expanded
@@ -258,11 +322,13 @@ struct file_region {
  * Return the number of new huge pages added to the map.  This
  * number is greater than or equal to zero.
  */
-static long region_add(struct resv_map *resv, long f, long t)
+static long region_add(struct hstate *h, struct resv_map *resv, long f, long t)
 {
 	struct list_head *head = &resv->regions;
-	struct file_region *rg, *nrg, *trg;
-	long add = 0;
+	struct file_region *rg, *nrg;
+	long add = 0, newadd = 0;
+	struct hugetlb_cgroup *h_cg = NULL;
+	int ret = 0;

 	spin_lock(&resv->lock);
 	/* Locate the region we are either in or before. */
@@ -277,6 +343,23 @@ static long region_add(struct resv_map *resv, long f, long t)
 	 * from the cache and use it for this range.
 	 */
 	if (&rg->link == head || t < rg->from) {
+#ifdef CONFIG_CGROUP_HUGETLB
+		/*
+		 * If res->reservation_counter is NULL, then it means this is
+		 * a shared mapping, and hugetlb cgroup accounting should be
+		 * done on the file_region entries inside resv_map.
+		 */
+		if (!resv->reservation_counter) {
+			ret = hugetlb_cgroup_charge_cgroup(
+					hstate_index(h),
+					(t - f) * pages_per_huge_page(h),
+					&h_cg, true);
+		}
+
+		if (ret)
+			goto out_locked;
+#endif
+
 		VM_BUG_ON(resv->region_cache_count <= 0);

 		resv->region_cache_count--;
@@ -286,6 +369,15 @@ static long region_add(struct resv_map *resv, long f, long t)

 		nrg->from = f;
 		nrg->to = t;
+
+#ifdef CONFIG_CGROUP_HUGETLB
+		if (h_cg) {
+			nrg->reservation_counter =
+				&h_cg->reserved_hugepage[hstate_index(h)];
+			nrg->pages_per_hpage = pages_per_huge_page(h);
+		}
+#endif
+
 		list_add(&nrg->link, rg->link.prev);

 		add += t - f;
@@ -296,38 +388,37 @@ static long region_add(struct resv_map *resv, long f, long t)
 	if (f > rg->from)
 		f = rg->from;

-	/* Check for and consume any regions we now overlap with. */
-	nrg = rg;
-	list_for_each_entry_safe(rg, trg, rg->link.prev, link) {
-		if (&rg->link == head)
-			break;
-		if (rg->from > t)
-			break;
-
-		/* If this area reaches higher then extend our area to
-		 * include it completely.  If this is not the first area
-		 * which we intend to reuse, free it. */
-		if (rg->to > t)
-			t = rg->to;
-		if (rg != nrg) {
-			/* Decrement return value by the deleted range.
-			 * Another range will span this area so that by
-			 * end of routine add will be >= zero
-			 */
-			add -= (rg->to - rg->from);
-			list_del(&rg->link);
-			kfree(rg);
-		}
+#ifdef CONFIG_CGROUP_HUGETLB
+	/* Count any regions we now overlap with. */
+	add = consume_regions_we_overlap_with(rg, head, f, &t, NULL, NULL,
+			true);
+
+	if (!resv->reservation_counter) {
+		ret = hugetlb_cgroup_charge_cgroup(
+				hstate_index(h),
+				add * pages_per_huge_page(h),
+				&h_cg, true);
 	}

-	add += (nrg->from - f);		/* Added to beginning of region */
-	nrg->from = f;
-	add += t - nrg->to;		/* Added to end of region */
-	nrg->to = t;
+	if (ret)
+		goto out_locked;
+#endif
+
+	/* Check for and consume any regions we now overlap with. */
+	newadd = consume_regions_we_overlap_with(rg, head, f, &t, h_cg, h,
+			false);
+	/*
+	 * If these aren't equal, then there is a bug with
+	 * consume_regions_we_overlap_with, and we're charging the wrong amount
+	 * of memory.
+	 */
+	WARN_ON(add != newadd);

 out_locked:
 	resv->adds_in_progress--;
 	spin_unlock(&resv->lock);
+	if (ret)
+		return ret;
 	VM_BUG_ON(add < 0);
 	return add;
 }
@@ -487,6 +578,10 @@ static long region_del(struct resv_map *resv, long f, long t)
 	struct file_region *rg, *trg;
 	struct file_region *nrg = NULL;
 	long del = 0;
+#ifdef CONFIG_CGROUP_HUGETLB
+	struct page_counter *reservation_counter = NULL;
+	unsigned long pages_per_hpage = 0;
+#endif

 retry:
 	spin_lock(&resv->lock);
@@ -514,6 +609,14 @@ static long region_del(struct resv_map *resv, long f, long t)
 				nrg = list_first_entry(&resv->region_cache,
 							struct file_region,
 							link);
+#ifdef CONFIG_CGROUP_HUGETLB
+				/*
+				 * Save counter information from the deleted
+				 * node, in case we need to do an uncharge.
+				 */
+				reservation_counter = nrg->reservation_counter;
+				pages_per_hpage = nrg->pages_per_hpage;
+#endif
 				list_del(&nrg->link);
 				resv->region_cache_count--;
 			}
@@ -543,6 +646,14 @@ static long region_del(struct resv_map *resv, long f, long t)

 		if (f <= rg->from && t >= rg->to) { /* Remove entire region */
 			del += rg->to - rg->from;
+#ifdef CONFIG_CGROUP_HUGETLB
+			/*
+			 * Save counter information from the deleted node,
+			 * in case we need to do an uncharge.
+			 */
+			reservation_counter = rg->reservation_counter;
+			pages_per_hpage = rg->pages_per_hpage;
+#endif
 			list_del(&rg->link);
 			kfree(rg);
 			continue;
@@ -559,6 +670,19 @@ static long region_del(struct resv_map *resv, long f, long t)

 	spin_unlock(&resv->lock);
 	kfree(nrg);
+#ifdef CONFIG_CGROUP_HUGETLB
+	/*
+	 * If resv->reservation_counter is NULL, then this is shared
+	 * reservation, and the reserved memory is tracked in the file_struct
+	 * entries inside of resv_map. So we need to uncharge the memory here.
+	 */
+	if (reservation_counter && pages_per_hpage && del > 0 &&
+	    !resv->reservation_counter) {
+		hugetlb_cgroup_uncharge_counter(
+				reservation_counter,
+				del * pages_per_hpage);
+	}
+#endif
 	return del;
 }

@@ -1930,7 +2054,7 @@ static long __vma_reservation_common(struct hstate *h,
 		ret = region_chg(resv, idx, idx + 1);
 		break;
 	case VMA_COMMIT_RESV:
-		ret = region_add(resv, idx, idx + 1);
+		ret = region_add(h, resv, idx, idx + 1);
 		break;
 	case VMA_END_RESV:
 		region_abort(resv, idx, idx + 1);
@@ -1938,7 +2062,7 @@ static long __vma_reservation_common(struct hstate *h,
 		break;
 	case VMA_ADD_RESV:
 		if (vma->vm_flags & VM_MAYSHARE)
-			ret = region_add(resv, idx, idx + 1);
+			ret = region_add(h, resv, idx, idx + 1);
 		else {
 			region_abort(resv, idx, idx + 1);
 			ret = region_del(resv, idx, idx + 1);
@@ -4536,7 +4660,7 @@ int hugetlb_reserve_pages(struct inode *inode,
 					struct vm_area_struct *vma,
 					vm_flags_t vm_flags)
 {
-	long ret, chg;
+	long ret, chg, add;
 	struct hstate *h = hstate_inode(inode);
 	struct hugepage_subpool *spool = subpool_inode(inode);
 	struct resv_map *resv_map;
@@ -4624,9 +4748,7 @@ int hugetlb_reserve_pages(struct inode *inode,
 	 */
 	ret = hugetlb_acct_memory(h, gbl_reserve);
 	if (ret < 0) {
-		/* put back original number of pages, chg */
-		(void)hugepage_subpool_put_pages(spool, chg);
-		goto out_err;
+		goto out_put_pages;
 	}

 	/*
@@ -4641,7 +4763,12 @@ int hugetlb_reserve_pages(struct inode *inode,
 	 * else has to be done for private mappings here
 	 */
 	if (!vma || vma->vm_flags & VM_MAYSHARE) {
-		long add = region_add(resv_map, from, to);
+		add = region_add(h, resv_map, from, to);
+		if (add < 0) {
+			ret = -ENOMEM;
+			goto out_acct_memory;
+		}
+

 		if (unlikely(chg > add)) {
 			/*
@@ -4659,10 +4786,15 @@ int hugetlb_reserve_pages(struct inode *inode,
 		}
 	}
 	return 0;
+out_acct_memory:
+	hugetlb_acct_memory(h, -gbl_reserve);
+out_put_pages:
+	/* put back original number of pages, chg */
+	(void)hugepage_subpool_put_pages(spool, chg);
 out_err:
 	if (!vma || vma->vm_flags & VM_MAYSHARE)
-		/* Don't call region_abort if region_chg failed */
-		if (chg >= 0)
+		/* Don't call region_abort if region_chg or region_add failed */
+		if (chg >= 0 && add >= 0)
 			region_abort(resv_map, from, to);
 	if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER))
 		kref_put(&resv_map->refs, resv_map_release);
--
2.23.0.rc1.153.gdeed80330f-goog




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux