- If gbl_chg=1, the allocation cannot reuse an existing reservation - If gbl_chg=0, the allocation should reuse an existing reservation Firstly, map_chg is defined as following, to cover all cases of hugetlb reservation scenarios (mostly, via vma_needs_reservation(), but cow_from_owner is an outlier): CONDITION HAS RESERVATION? ========= ================ - SHARED: always check against per-inode resv_map (ignore NONRESERVE) - If resv exists ==> YES [1] - If not ==> NO [2] - PRIVATE: complicated... - Request came from a CoW from owner resv map ==> NO [3] (when cow_from_owner==true) - If does not own a resv_map at all.. ==> NO [4] (examples: VM_NORESERVE, private fork()) - If owns a resv_map, but resv donsn't exists ==> NO [5] - If owns a resv_map, and resv exists ==> YES [6] Further on, gbl_chg considered spool setup, so that is a decision based on all the context. If we look at vma_has_reserves(), it almost does check that has already been processed by map_chg accounting (I marked each return value to the case above): static bool vma_has_reserves(struct vm_area_struct *vma, long chg) { if (vma->vm_flags & VM_NORESERVE) { if (vma->vm_flags & VM_MAYSHARE && chg == 0) return true; ==> [1] else return false; ==> [2] or [4] } if (vma->vm_flags & VM_MAYSHARE) { if (chg) return false; ==> [2] else return true; ==> [1] } if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { if (chg) return false; ==> [5] else return true; ==> [6] } return false; ==> [4] } It didn't check [3], but [3] case was actually already covered now by the "chg" / "gbl_chg" / "map_chg" calculations. In short, vma_has_reserves() doesn't provide anything more than return "!chg".. so just simplify all the things. There're a lot of comments describing truncation races, IIUC there should have no race as long as map_chg is properly done. Signed-off-by: Peter Xu <peterx@xxxxxxxxxx> --- mm/hugetlb.c | 67 ++++++---------------------------------------------- 1 file changed, 7 insertions(+), 60 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 14cfe0bb01e4..b7e16b3c4e67 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1247,66 +1247,13 @@ void clear_vma_resv_huge_pages(struct vm_area_struct *vma) } /* Returns true if the VMA has associated reserve pages */ -static bool vma_has_reserves(struct vm_area_struct *vma, long chg) +static bool vma_has_reserves(long chg) { - if (vma->vm_flags & VM_NORESERVE) { - /* - * This address is already reserved by other process(chg == 0), - * so, we should decrement reserved count. Without decrementing, - * reserve count remains after releasing inode, because this - * allocated page will go into page cache and is regarded as - * coming from reserved pool in releasing step. Currently, we - * don't have any other solution to deal with this situation - * properly, so add work-around here. - */ - if (vma->vm_flags & VM_MAYSHARE && chg == 0) - return true; - else - return false; - } - - /* Shared mappings always use reserves */ - if (vma->vm_flags & VM_MAYSHARE) { - /* - * We know VM_NORESERVE is not set. Therefore, there SHOULD - * be a region map for all pages. The only situation where - * there is no region map is if a hole was punched via - * fallocate. In this case, there really are no reserves to - * use. This situation is indicated if chg != 0. - */ - if (chg) - return false; - else - return true; - } - /* - * Only the process that called mmap() has reserves for - * private mappings. + * Now "chg" has all the conditions considered for whether we + * should use an existing reservation. */ - if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { - /* - * Like the shared case above, a hole punch or truncate - * could have been performed on the private mapping. - * Examine the value of chg to determine if reserves - * actually exist or were previously consumed. - * Very Subtle - The value of chg comes from a previous - * call to vma_needs_reserves(). The reserve map for - * private mappings has different (opposite) semantics - * than that of shared mappings. vma_needs_reserves() - * has already taken this difference in semantics into - * account. Therefore, the meaning of chg is the same - * as in the shared case above. Code could easily be - * combined, but keeping it separate draws attention to - * subtle differences. - */ - if (chg) - return false; - else - return true; - } - - return false; + return chg == 0; } static void enqueue_hugetlb_folio(struct hstate *h, struct folio *folio) @@ -1407,7 +1354,7 @@ static struct folio *dequeue_hugetlb_folio_vma(struct hstate *h, * have no page reserves. This check ensures that reservations are * not "stolen". The child may still get SIGKILLed */ - if (!vma_has_reserves(vma, chg) && !available_huge_pages(h)) + if (!vma_has_reserves(chg) && !available_huge_pages(h)) goto err; gfp_mask = htlb_alloc_mask(h); @@ -1425,7 +1372,7 @@ static struct folio *dequeue_hugetlb_folio_vma(struct hstate *h, folio = dequeue_hugetlb_folio_nodemask(h, gfp_mask, nid, nodemask); - if (folio && vma_has_reserves(vma, chg)) { + if (folio && vma_has_reserves(chg)) { folio_set_hugetlb_restore_reserve(folio); h->resv_huge_pages--; } @@ -3076,7 +3023,7 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, if (!folio) goto out_uncharge_cgroup; spin_lock_irq(&hugetlb_lock); - if (vma_has_reserves(vma, gbl_chg)) { + if (vma_has_reserves(gbl_chg)) { folio_set_hugetlb_restore_reserve(folio); h->resv_huge_pages--; } -- 2.47.0