This is found by code observation only. Firstly, the worst case scenario should assume the whole range was covered by pmd sharing. The old algorithm might not work as expected for ranges like (1g-2m, 1g+2m), where the adjusted range should be (0, 1g+2m) but the expected range should be (0, 2g). Since at it, remove the loop since it should not be required. With that, the new code should be faster too when the invalidating range is huge. CC: Andrea Arcangeli <aarcange@xxxxxxxxxx> CC: Mike Kravetz <mike.kravetz@xxxxxxxxxx> CC: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> CC: linux-mm@xxxxxxxxx CC: linux-kernel@xxxxxxxxxxxxxxx Signed-off-by: Peter Xu <peterx@xxxxxxxxxx> --- mm/hugetlb.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 4645f1441d32..0e5a0512c13c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -43,6 +43,9 @@ #include <linux/page_owner.h> #include "internal.h" +#define MAX(a,b) (((a)>(b))?(a):(b)) +#define MIN(a,b) (((a)<(b))?(a):(b)) + int hugetlb_max_hstate __read_mostly; unsigned int default_hstate_idx; struct hstate hstates[HUGE_MAX_HSTATE]; @@ -5321,25 +5324,21 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr) void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, unsigned long *start, unsigned long *end) { - unsigned long check_addr; + unsigned long a_start, a_end; if (!(vma->vm_flags & VM_MAYSHARE)) return; - for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) { - unsigned long a_start = check_addr & PUD_MASK; - unsigned long a_end = a_start + PUD_SIZE; + /* Extend the range to be PUD aligned for a worst case scenario */ + a_start = ALIGN_DOWN(*start, PUD_SIZE); + a_end = ALIGN(*end, PUD_SIZE); - /* - * If sharing is possible, adjust start/end if necessary. - */ - if (range_in_vma(vma, a_start, a_end)) { - if (a_start < *start) - *start = a_start; - if (a_end > *end) - *end = a_end; - } - } + /* + * Intersect the range with the vma range, since pmd sharing won't be + * across vma after all + */ + *start = MAX(vma->vm_start, a_start); + *end = MIN(vma->vm_end, a_end); } /* -- 2.26.2