+ hugetlb-disable-region_add-file_region-coalescing-fix.patch added to -mm tree

akpm@xxxxxxxxxxxxxxxxxxxx · Wed, 19 Feb 2020 12:16:56 -0800

The patch titled
     Subject: mm/hugetlb: Fix file_region entry allocations
has been added to the -mm tree.  Its filename is
     hugetlb-disable-region_add-file_region-coalescing-fix.patch

This patch should soon appear at
    http://ozlabs.org/~akpm/mmots/broken-out/hugetlb-disable-region_add-file_region-coalescing-fix.patch
and later at
    http://ozlabs.org/~akpm/mmotm/broken-out/hugetlb-disable-region_add-file_region-coalescing-fix.patch

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***

The -mm tree is included into linux-next and is updated
there every 3-4 working days

------------------------------------------------------
From: Mina Almasry <almasrymina@xxxxxxxxxx>
Subject: mm/hugetlb: Fix file_region entry allocations

Commit a9e443086489e ("hugetlb: disable region_add file_region
coalescing") introduced a bug with adding file_region entries
that is fixed here:

1. Refactor file_region entry allocation logic into 1 function called
   from region_add and region_chg since the code is now identical.
2. region_chg only modifies resv->adds_in_progress after the regions
   have been allocated. In the past it used to increment
   adds_in_progress and then drop the lock, which would confuse racing
   region_add calls into thinking they need to allocate entries when
   they are not allowed.
3. In region_add, only try to allocate regions when
   actual_regions_needed > in_regions_needed. This is not causing a bug
   but is better for cleanliness and reasoning about the code.

Tested using ltp hugemmap0* tests, and libhugetlbfs tests.

Link: http://lkml.kernel.org/r/20200219012736.20363-1-almasrymina@xxxxxxxxxx
Fixes: Commit a9e443086489e ("hugetlb: disable region_add file_region coalescing")
Reported-by: Qian Cai <cai@xxxxxx>
Signed-off-by: Mina Almasry <almasrymina@xxxxxxxxxx>
Cc: David Rientjes <rientjes@xxxxxxxxxx>
Cc: Greg Thelen <gthelen@xxxxxxxxxx>
Cc: Mike Kravetz <mike.kravetz@xxxxxxxxxx>
Cc: Sandipan Das <sandipan@xxxxxxxxxxxxx>
Cc: Shakeel Butt <shakeelb@xxxxxxxxxx>
Cc: Shuah Khan <shuah@xxxxxxxxxx>
Cc: Miguel Ojeda <miguel.ojeda.sandonis@xxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
---

 mm/hugetlb.c |  150 ++++++++++++++++++++++++-------------------------
 1 file changed, 75 insertions(+), 75 deletions(-)

--- a/mm/hugetlb.c~hugetlb-disable-region_add-file_region-coalescing-fix
+++ a/mm/hugetlb.c
@@ -337,6 +337,67 @@ static long add_reservation_in_range(str
 	return add;
 }
 
+/* Must be called with resv->lock acquired. Will drop lock to allocate entries.
+ */
+static int allocate_file_region_entries(struct resv_map *resv,
+					int regions_needed)
+	__must_hold(&resv->lock)
+{
+	struct list_head allocated_regions;
+	int to_allocate = 0, i = 0;
+	struct file_region *trg = NULL, *rg = NULL;
+
+	VM_BUG_ON(regions_needed < 0);
+
+	INIT_LIST_HEAD(&allocated_regions);
+
+	/*
+	 * Check for sufficient descriptors in the cache to accommodate
+	 * the number of in progress add operations plus regions_needed.
+	 *
+	 * This is a while loop because when we drop the lock, some other call
+	 * to region_add or region_del may have consumed some region_entries,
+	 * so we keep looping here until we finally have enough entries for
+	 * (adds_in_progress + regions_needed).
+	 */
+	while (resv->region_cache_count <
+	       (resv->adds_in_progress + regions_needed)) {
+		to_allocate = resv->adds_in_progress + regions_needed -
+			      resv->region_cache_count;
+
+		/* At this point, we should have enough entries in the cache
+		 * for all the existings adds_in_progress. We should only be
+		 * needing to allocate for regions_needed.
+		 */
+		VM_BUG_ON(resv->region_cache_count < resv->adds_in_progress);
+
+		spin_unlock(&resv->lock);
+		for (i = 0; i < to_allocate; i++) {
+			trg = kmalloc(sizeof(*trg), GFP_KERNEL);
+			if (!trg)
+				goto out_of_memory;
+			list_add(&trg->link, &allocated_regions);
+		}
+
+		spin_lock(&resv->lock);
+
+		list_for_each_entry_safe(rg, trg, &allocated_regions, link) {
+			list_del(&rg->link);
+			list_add(&rg->link, &resv->region_cache);
+			resv->region_cache_count++;
+		}
+	}
+
+	return 0;
+
+out_of_memory:
+	list_for_each_entry_safe(rg, trg, &allocated_regions, link) {
+		list_del(&rg->link);
+		kfree(rg);
+	}
+	return -ENOMEM;
+}
+
 /*
  * Add the huge page range represented by [f, t) to the reserve
  * map.  Regions will be taken from the cache to fill in this range.
@@ -357,11 +418,7 @@ static long add_reservation_in_range(str
 static long region_add(struct resv_map *resv, long f, long t,
 		       long in_regions_needed)
 {
-	long add = 0, actual_regions_needed = 0, i = 0;
-	struct file_region *trg = NULL, *rg = NULL;
-	struct list_head allocated_regions;
-
-	INIT_LIST_HEAD(&allocated_regions);
+	long add = 0, actual_regions_needed = 0;
 
 	spin_lock(&resv->lock);
 retry:
@@ -372,34 +429,24 @@ retry:
 	/*
 	 * Check for sufficient descriptors in the cache to accommodate
 	 * this add operation. Note that actual_regions_needed may be greater
-	 * than in_regions_needed. In this case, we need to make sure that we
+	 * than in_regions_needed, as the resv_map may have been modified since
+	 * the region_chg call. In this case, we need to make sure that we
 	 * allocate extra entries, such that we have enough for all the
 	 * existing adds_in_progress, plus the excess needed for this
 	 * operation.
 	 */
-	if (resv->region_cache_count <
-	    resv->adds_in_progress +
-		    (actual_regions_needed - in_regions_needed)) {
+	if (actual_regions_needed > in_regions_needed &&
+	    resv->region_cache_count <
+		    resv->adds_in_progress +
+			    (actual_regions_needed - in_regions_needed)) {
 		/* region_add operation of range 1 should never need to
 		 * allocate file_region entries.
 		 */
 		VM_BUG_ON(t - f <= 1);
 
-		/* Must drop lock to allocate a new descriptor. */
-		spin_unlock(&resv->lock);
-		for (i = 0; i < (actual_regions_needed - in_regions_needed);
-		     i++) {
-			trg = kmalloc(sizeof(*trg), GFP_KERNEL);
-			if (!trg)
-				goto out_of_memory;
-			list_add(&trg->link, &allocated_regions);
-		}
-		spin_lock(&resv->lock);
-
-		list_for_each_entry_safe(rg, trg, &allocated_regions, link) {
-			list_del(&rg->link);
-			list_add(&rg->link, &resv->region_cache);
-			resv->region_cache_count++;
+		if (allocate_file_region_entries(
+			    resv, actual_regions_needed - in_regions_needed)) {
+			return -ENOMEM;
 		}
 
 		goto retry;
@@ -412,13 +459,6 @@ retry:
 	spin_unlock(&resv->lock);
 	VM_BUG_ON(add < 0);
 	return add;
-
-out_of_memory:
-	list_for_each_entry_safe(rg, trg, &allocated_regions, link) {
-		list_del(&rg->link);
-		kfree(rg);
-	}
-	return -ENOMEM;
 }
 
 /*
@@ -444,11 +484,7 @@ out_of_memory:
 static long region_chg(struct resv_map *resv, long f, long t,
 		       long *out_regions_needed)
 {
-	struct file_region *trg = NULL, *rg = NULL;
-	long chg = 0, i = 0, to_allocate = 0;
-	struct list_head allocated_regions;
-
-	INIT_LIST_HEAD(&allocated_regions);
+	long chg = 0;
 
 	spin_lock(&resv->lock);
 
@@ -458,49 +494,13 @@ static long region_chg(struct resv_map *
 	if (*out_regions_needed == 0)
 		*out_regions_needed = 1;
 
-	resv->adds_in_progress += *out_regions_needed;
-
-	/*
-	 * Check for sufficient descriptors in the cache to accommodate
-	 * the number of in progress add operations.
-	 */
-	while (resv->region_cache_count < resv->adds_in_progress) {
-		to_allocate = resv->adds_in_progress - resv->region_cache_count;
-
-		/* Must drop lock to allocate a new descriptor. Note that even
-		 * though we drop the lock here, we do not make another call to
-		 * add_reservation_in_range after re-acquiring the lock.
-		 * Essentially this branch makes sure that we have enough
-		 * descriptors in the cache as suggested by the first call to
-		 * add_reservation_in_range. If more regions turn out to be
-		 * required, region_add will deal with it.
-		 */
-		spin_unlock(&resv->lock);
-		for (i = 0; i < to_allocate; i++) {
-			trg = kmalloc(sizeof(*trg), GFP_KERNEL);
-			if (!trg)
-				goto out_of_memory;
-			list_add(&trg->link, &allocated_regions);
-		}
-
-		spin_lock(&resv->lock);
+	if (allocate_file_region_entries(resv, *out_regions_needed))
+		return -ENOMEM;
 
-		list_for_each_entry_safe(rg, trg, &allocated_regions, link) {
-			list_del(&rg->link);
-			list_add(&rg->link, &resv->region_cache);
-			resv->region_cache_count++;
-		}
-	}
+	resv->adds_in_progress += *out_regions_needed;
 
 	spin_unlock(&resv->lock);
 	return chg;
-
-out_of_memory:
-	list_for_each_entry_safe(rg, trg, &allocated_regions, link) {
-		list_del(&rg->link);
-		kfree(rg);
-	}
-	return -ENOMEM;
 }
 
 /*
_

Patches currently in -mm which might be from almasrymina@xxxxxxxxxx are

hugetlb_cgroup-add-hugetlb_cgroup-reservation-counter.patch
hugetlb_cgroup-add-interface-for-charge-uncharge-hugetlb-reservations.patch
hugetlb_cgroup-add-reservation-accounting-for-private-mappings.patch
hugetlb-disable-region_add-file_region-coalescing.patch
hugetlb-disable-region_add-file_region-coalescing-fix.patch
hugetlb_cgroup-add-accounting-for-shared-mappings.patch
hugetlb_cgroup-support-noreserve-mappings.patch
hugetlb-support-file_region-coalescing-again.patch
hugetlb_cgroup-add-hugetlb_cgroup-reservation-tests.patch
hugetlb_cgroup-add-hugetlb_cgroup-reservation-docs.patch