Hi: On 2021/1/20 9:30, Mike Kravetz wrote: > As hugetlbfs evolved, state information about hugetlb pages was added. > One 'convenient' way of doing this was to use available fields in tail > pages. Over time, it has become difficult to know the meaning or contents > of fields simply by looking at a small bit of code. Sometimes, the > naming is just confusing. For example: The PagePrivate flag indicates > a huge page reservation was consumed and needs to be restored if an error > is encountered and the page is freed before it is instantiated. The This PagePrivate flag really confused me for a long time. :( > page.private field contains the pointer to a subpool if the page is > associated with one. > > In an effort to make the code more readable, use page.private to contain > hugetlb specific page flags. These flags will have test, set and clear > functions similar to those used for 'normal' page flags. More importantly, > an enum of flag values will be created with names that actually reflect > their purpose. Thanks for doing this. This would make life easier. > > In this patch, > - Create infrastructure for hugetlb specific page flag functions > - Move subpool pointer to page[1].private to make way for flags > Create routines with meaningful names to modify subpool field > - Use new HPageRestoreReserve flag instead of PagePrivate > > Conversion of other state information will happen in subsequent patches. > > Signed-off-by: Mike Kravetz <mike.kravetz@xxxxxxxxxx> > --- > fs/hugetlbfs/inode.c | 12 ++----- > include/linux/hugetlb.h | 72 +++++++++++++++++++++++++++++++++++++++++ > mm/hugetlb.c | 45 +++++++++++++------------- > 3 files changed, 97 insertions(+), 32 deletions(-) > > diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c > index 740693d7f255..b8a661780c4a 100644 > --- a/fs/hugetlbfs/inode.c > +++ b/fs/hugetlbfs/inode.c > @@ -955,15 +955,9 @@ static int hugetlbfs_migrate_page(struct address_space *mapping, > if (rc != MIGRATEPAGE_SUCCESS) > return rc; > > - /* > - * page_private is subpool pointer in hugetlb pages. Transfer to > - * new page. PagePrivate is not associated with page_private for > - * hugetlb pages and can not be set here as only page_huge_active > - * pages can be migrated. > - */ > - if (page_private(page)) { > - set_page_private(newpage, page_private(page)); > - set_page_private(page, 0); > + if (hugetlb_page_subpool(page)) { > + hugetlb_set_page_subpool(newpage, hugetlb_page_subpool(page)); > + hugetlb_set_page_subpool(page, NULL); > } > > if (mode != MIGRATE_SYNC_NO_COPY) > diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h > index ef5b144b8aac..be71a00ee2a0 100644 > --- a/include/linux/hugetlb.h > +++ b/include/linux/hugetlb.h > @@ -472,6 +472,64 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, > unsigned long flags); > #endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */ > > +/* > + * huegtlb page specific state flags. These flags are located in page.private > + * of the hugetlb head page. Functions created via the below macros should be > + * used to manipulate these flags. > + * > + * HPG_restore_reserve - Set when a hugetlb page consumes a reservation at > + * allocation time. Cleared when page is fully instantiated. Free > + * routine checks flag to restore a reservation on error paths. > + */ > +enum hugetlb_page_flags { > + HPG_restore_reserve = 0, > + __NR_HPAGEFLAGS, > +}; > + > +/* > + * Macros to create test, set and clear function definitions for > + * hugetlb specific page flags. > + */ > +#ifdef CONFIG_HUGETLB_PAGE > +#define TESTHPAGEFLAG(uname, flname) \ > +static inline int HPage##uname(struct page *page) \ > + { BUILD_BUG_ON(sizeof_field(struct page, private) * \ > + BITS_PER_BYTE < __NR_HPAGEFLAGS); \ > + return test_bit(HPG_##flname, &(page->private)); } > + > +#define SETHPAGEFLAG(uname, flname) \ > +static inline void SetHPage##uname(struct page *page) \ > + { set_bit(HPG_##flname, &(page->private)); } > + > +#define CLEARHPAGEFLAG(uname, flname) \ > +static inline void ClearHPage##uname(struct page *page) \ > + { clear_bit(HPG_##flname, &(page->private)); } > +#else > +#define TESTHPAGEFLAG(uname, flname) \ > +static inline int HPage##uname(struct page *page) \ > + { BUILD_BUG_ON(sizeof_field(struct page, private) * \ > + BITS_PER_BYTE < __NR_HPAGEFLAGS); \ > + return 0 } > + > +#define SETHPAGEFLAG(uname, flname) \ > +static inline void SetHPage##uname(struct page *page) \ > + { } > + > +#define CLEARHPAGEFLAG(uname, flname) \ > +static inline void ClearHPage##uname(struct page *page) \ > + { } > +#endif > + > +#define HPAGEFLAG(uname, flname) \ > + TESTHPAGEFLAG(uname, flname) \ > + SETHPAGEFLAG(uname, flname) \ > + CLEARHPAGEFLAG(uname, flname) \ > + > +/* > + * Create functions associated with hugetlb page flags > + */ > +HPAGEFLAG(RestoreReserve, restore_reserve) > + > #ifdef CONFIG_HUGETLB_PAGE > > #define HSTATE_NAME_LEN 32 > @@ -531,6 +589,20 @@ extern unsigned int default_hstate_idx; > > #define default_hstate (hstates[default_hstate_idx]) > > +/* > + * hugetlb page subpool pointer located in hpage[1].private > + */ > +static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) > +{ > + return (struct hugepage_subpool *)(hpage+1)->private; > +} > + > +static inline void hugetlb_set_page_subpool(struct page *hpage, > + struct hugepage_subpool *subpool) > +{ > + set_page_private(hpage+1, (unsigned long)subpool); > +} > + > static inline struct hstate *hstate_file(struct file *f) > { > return hstate_inode(file_inode(f)); > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index 737b2dce19e6..8bed6b5202d2 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -1133,7 +1133,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, > nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask); > page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask); > if (page && !avoid_reserve && vma_has_reserves(vma, chg)) { > - SetPagePrivate(page); > + SetHPageRestoreReserve(page); > h->resv_huge_pages--; > } > > @@ -1407,20 +1407,19 @@ static void __free_huge_page(struct page *page) > */ > struct hstate *h = page_hstate(page); > int nid = page_to_nid(page); > - struct hugepage_subpool *spool = > - (struct hugepage_subpool *)page_private(page); > + struct hugepage_subpool *spool = hugetlb_page_subpool(page); > bool restore_reserve; > > VM_BUG_ON_PAGE(page_count(page), page); > VM_BUG_ON_PAGE(page_mapcount(page), page); > > - set_page_private(page, 0); > + hugetlb_set_page_subpool(page, NULL); > page->mapping = NULL; > - restore_reserve = PagePrivate(page); > - ClearPagePrivate(page); > + restore_reserve = HPageRestoreReserve(page); > + ClearHPageRestoreReserve(page); > > /* > - * If PagePrivate() was set on page, page allocation consumed a > + * If HPageRestoreReserve was set on page, page allocation consumed a > * reservation. If the page was associated with a subpool, there > * would have been a page reserved in the subpool before allocation > * via hugepage_subpool_get_pages(). Since we are 'restoring' the > @@ -2254,24 +2253,24 @@ static long vma_add_reservation(struct hstate *h, > * This routine is called to restore a reservation on error paths. In the > * specific error paths, a huge page was allocated (via alloc_huge_page) > * and is about to be freed. If a reservation for the page existed, > - * alloc_huge_page would have consumed the reservation and set PagePrivate > - * in the newly allocated page. When the page is freed via free_huge_page, > - * the global reservation count will be incremented if PagePrivate is set. > - * However, free_huge_page can not adjust the reserve map. Adjust the > - * reserve map here to be consistent with global reserve count adjustments > - * to be made by free_huge_page. > + * alloc_huge_page would have consumed the reservation and set > + * HPageRestoreReserve in the newly allocated page. When the page is freed > + * via free_huge_page, the global reservation count will be incremented if > + * HPageRestoreReserve is set. However, free_huge_page can not adjust the > + * reserve map. Adjust the reserve map here to be consistent with global > + * reserve count adjustments to be made by free_huge_page. > */ > static void restore_reserve_on_error(struct hstate *h, > struct vm_area_struct *vma, unsigned long address, > struct page *page) > { > - if (unlikely(PagePrivate(page))) { > + if (unlikely(HPageRestoreReserve(page))) { > long rc = vma_needs_reservation(h, vma, address); > > if (unlikely(rc < 0)) { > /* > * Rare out of memory condition in reserve map > - * manipulation. Clear PagePrivate so that > + * manipulation. Clear HPageRestoreReserve so that > * global reserve count will not be incremented > * by free_huge_page. This will make it appear > * as though the reservation for this page was > @@ -2280,7 +2279,7 @@ static void restore_reserve_on_error(struct hstate *h, > * is better than inconsistent global huge page > * accounting of reserve counts. > */ > - ClearPagePrivate(page); > + ClearHPageRestoreReserve(page); > } else if (rc) { > rc = vma_add_reservation(h, vma, address); > if (unlikely(rc < 0)) > @@ -2288,7 +2287,7 @@ static void restore_reserve_on_error(struct hstate *h, > * See above comment about rare out of > * memory condition. > */ > - ClearPagePrivate(page); > + ClearHPageRestoreReserve(page); > } else > vma_end_reservation(h, vma, address); > } > @@ -2369,7 +2368,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, > if (!page) > goto out_uncharge_cgroup; > if (!avoid_reserve && vma_has_reserves(vma, gbl_chg)) { > - SetPagePrivate(page); > + SetHPageRestoreReserve(page); > h->resv_huge_pages--; > } > spin_lock(&hugetlb_lock); > @@ -2387,7 +2386,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, > > spin_unlock(&hugetlb_lock); > > - set_page_private(page, (unsigned long)spool); > + hugetlb_set_page_subpool(page, spool); > > map_commit = vma_commit_reservation(h, vma, addr); > if (unlikely(map_chg > map_commit)) { > @@ -4212,7 +4211,7 @@ static vm_fault_t hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, > spin_lock(ptl); > ptep = huge_pte_offset(mm, haddr, huge_page_size(h)); > if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) { > - ClearPagePrivate(new_page); > + ClearHPageRestoreReserve(new_page); > > /* Break COW */ > huge_ptep_clear_flush(vma, haddr, ptep); > @@ -4279,7 +4278,7 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping, > > if (err) > return err; > - ClearPagePrivate(page); > + ClearHPageRestoreReserve(page); > > /* > * set page dirty so that it will not be removed from cache/file > @@ -4441,7 +4440,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, > goto backout; > > if (anon_rmap) { > - ClearPagePrivate(page); > + ClearHPageRestoreReserve(page); > hugepage_add_new_anon_rmap(page, vma, haddr); > } else > page_dup_rmap(page, true); > @@ -4755,7 +4754,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, > if (vm_shared) { > page_dup_rmap(page, true); > } else { > - ClearPagePrivate(page); > + ClearHPageRestoreReserve(page); > hugepage_add_new_anon_rmap(page, dst_vma, dst_addr); > } > > Looks good to me.Thanks. Reviewed-by: Miaohe Lin <linmiaohe@xxxxxxxxxx>