Thanks Vlastimil for the inputs!! On 8/10/2022 5:10 PM, Vlastimil Babka wrote: >> --- a/mm/page_owner.c >> +++ b/mm/page_owner.c >> @@ -141,7 +141,7 @@ void __reset_page_owner(struct page *page, >> unsigned short order) >> struct page_owner *page_owner; >> u64 free_ts_nsec = local_clock(); >> - page_ext = lookup_page_ext(page); >> + page_ext = page_ext_get(page); >> if (unlikely(!page_ext)) >> return; >> @@ -153,6 +153,7 @@ void __reset_page_owner(struct page *page, >> unsigned short order) >> page_owner->free_ts_nsec = free_ts_nsec; >> page_ext = page_ext_next(page_ext); >> } >> + page_ext_put(); >> } >> static inline void __set_page_owner_handle(struct page_ext *page_ext, >> @@ -183,19 +184,26 @@ static inline void >> __set_page_owner_handle(struct page_ext *page_ext, >> noinline void __set_page_owner(struct page *page, unsigned short order, >> gfp_t gfp_mask) >> { >> - struct page_ext *page_ext = lookup_page_ext(page); >> + struct page_ext *page_ext = page_ext_get(page); >> depot_stack_handle_t handle; >> if (unlikely(!page_ext)) >> return; >> + page_ext_put(); >> handle = save_stack(gfp_mask); >> + >> + /* Ensure page_ext is valid after page_ext_put() above */ >> + page_ext = page_ext_get(page); > > Why not simply do the save_stack() first and then page_ext_get() just > once? It should be really rare that it's NULL, so I don't think we save > much by avoiding an unnecessary save_stack(), while the overhead of > doing two get/put instead of one will affect every call. > I am under the assumption that save_stack can take time when it goes for GFP_KERNEL allocations over page_ext which is mere rcu_lock + few arithmetic operations. Am I wrong here? But yes it is rare that page_ext can be NULL here, so I am fine to follow your suggestion, which atleast improve the code readability, IMO. >> + if (unlikely(!page_ext)) >> + return; >> __set_page_owner_handle(page_ext, handle, order, gfp_mask); >> + page_ext_put(); >> } >> void __set_page_owner_migrate_reason(struct page *page, int reason) >> { >> - struct page_ext *page_ext = lookup_page_ext(page); >> + struct page_ext *page_ext = page_ext_get(page); >> struct page_owner *page_owner; >> if (unlikely(!page_ext)) >> @@ -203,12 +211,13 @@ void __set_page_owner_migrate_reason(struct page >> *page, int reason) >> page_owner = get_page_owner(page_ext); >> page_owner->last_migrate_reason = reason; >> + page_ext_put(); >> } >> void __split_page_owner(struct page *page, unsigned int nr) >> { >> int i; >> - struct page_ext *page_ext = lookup_page_ext(page); >> + struct page_ext *page_ext = page_ext_get(page); >> struct page_owner *page_owner; >> if (unlikely(!page_ext)) >> @@ -219,16 +228,24 @@ void __split_page_owner(struct page *page, >> unsigned int nr) >> page_owner->order = 0; >> page_ext = page_ext_next(page_ext); >> } >> + page_ext_put(); >> } >> void __folio_copy_owner(struct folio *newfolio, struct folio *old) >> { >> - struct page_ext *old_ext = lookup_page_ext(&old->page); >> - struct page_ext *new_ext = lookup_page_ext(&newfolio->page); >> + struct page_ext *old_ext; >> + struct page_ext *new_ext; >> struct page_owner *old_page_owner, *new_page_owner; >> - if (unlikely(!old_ext || !new_ext)) >> + old_ext = page_ext_get(&old->page); >> + if (unlikely(!old_ext)) >> + return; >> + >> + new_ext = page_ext_get(&newfolio->page); > > The second one can keep using just lookup_page_ext() and we can have a > single page_ext_put()? I don't think it would be dangerous in case the > internals change, as page_ext_put() doesn't have a page parameter anyway > so it can't be specific to a page. Actually we did hide the lookup_page_ext() while exposing only a single interface i.e. page_ext_get/put(). And this suggestion requires to expose the lookup_page_ext as well which leaves two interfaces to get the page_ext which seems not look good, IMO. Please let me know If you think otherwise here. > >> + if (unlikely(!new_ext)) { >> + page_ext_put(); >> return; >> + } >> old_page_owner = get_page_owner(old_ext); >> new_page_owner = get_page_owner(new_ext); >> @@ -254,6 +271,8 @@ void __folio_copy_owner(struct folio *newfolio, >> struct folio *old) >> */ >> __set_bit(PAGE_EXT_OWNER, &new_ext->flags); >> __set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags); >> + page_ext_put(); >> + page_ext_put(); >> } >> void pagetypeinfo_showmixedcount_print(struct seq_file *m, >> @@ -307,12 +326,12 @@ void pagetypeinfo_showmixedcount_print(struct >> seq_file *m, >> if (PageReserved(page)) >> continue; >> - page_ext = lookup_page_ext(page); >> + page_ext = page_ext_get(page); >> if (unlikely(!page_ext)) >> continue; >> if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, >> &page_ext->flags)) >> - continue; >> + goto loop; >> page_owner = get_page_owner(page_ext); >> page_mt = gfp_migratetype(page_owner->gfp_mask); >> @@ -323,9 +342,12 @@ void pagetypeinfo_showmixedcount_print(struct >> seq_file *m, >> count[pageblock_mt]++; >> pfn = block_end_pfn; >> + page_ext_put(); >> break; >> } >> pfn += (1UL << page_owner->order) - 1; >> +loop: >> + page_ext_put(); >> } >> } >> @@ -435,7 +457,7 @@ print_page_owner(char __user *buf, size_t count, >> unsigned long pfn, >> void __dump_page_owner(const struct page *page) >> { >> - struct page_ext *page_ext = lookup_page_ext(page); >> + struct page_ext *page_ext = page_ext_get((void *)page); >> struct page_owner *page_owner; >> depot_stack_handle_t handle; >> gfp_t gfp_mask; >> @@ -452,6 +474,7 @@ void __dump_page_owner(const struct page *page) >> if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) { >> pr_alert("page_owner info is not present (never set?)\n"); >> + page_ext_put(); >> return; >> } >> @@ -482,6 +505,7 @@ void __dump_page_owner(const struct page *page) >> if (page_owner->last_migrate_reason != -1) >> pr_alert("page has been migrated, last migrate reason: %s\n", >> migrate_reason_names[page_owner->last_migrate_reason]); >> + page_ext_put(); >> } >> static ssize_t >> @@ -508,6 +532,14 @@ read_page_owner(struct file *file, char __user >> *buf, size_t count, loff_t *ppos) >> /* Find an allocated page */ >> for (; pfn < max_pfn; pfn++) { >> /* >> + * This temporary page_owner is required so >> + * that we can avoid the context switches while holding >> + * the rcu lock and copying the page owner information to >> + * user through copy_to_user() or GFP_KERNEL allocations. >> + */ >> + struct page_owner page_owner_tmp; >> + >> + /* >> * If the new page is in a new MAX_ORDER_NR_PAGES area, >> * validate the area as existing, skip it if not >> */ >> @@ -525,7 +557,7 @@ read_page_owner(struct file *file, char __user >> *buf, size_t count, loff_t *ppos) >> continue; >> } >> - page_ext = lookup_page_ext(page); >> + page_ext = page_ext_get(page); >> if (unlikely(!page_ext)) >> continue; >> @@ -534,14 +566,14 @@ read_page_owner(struct file *file, char __user >> *buf, size_t count, loff_t *ppos) >> * because we don't hold the zone lock. >> */ >> if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) >> - continue; >> + goto loop; >> /* >> * Although we do have the info about past allocation of free >> * pages, it's not relevant for current memory usage. >> */ >> if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags)) >> - continue; >> + goto loop; >> page_owner = get_page_owner(page_ext); >> @@ -550,7 +582,7 @@ read_page_owner(struct file *file, char __user >> *buf, size_t count, loff_t *ppos) >> * would inflate the stats. >> */ >> if (!IS_ALIGNED(pfn, 1 << page_owner->order)) >> - continue; >> + goto loop; >> /* >> * Access to page_ext->handle isn't synchronous so we should >> @@ -558,13 +590,17 @@ read_page_owner(struct file *file, char __user >> *buf, size_t count, loff_t *ppos) >> */ >> handle = READ_ONCE(page_owner->handle); >> if (!handle) >> - continue; >> + goto loop; >> /* Record the next PFN to read in the file offset */ >> *ppos = (pfn - min_low_pfn) + 1; >> + memcpy(&page_owner_tmp, page_owner, sizeof(struct >> page_owner)); >> + page_ext_put(); >> return print_page_owner(buf, count, pfn, page, >> - page_owner, handle); >> + &page_owner_tmp, handle); >> +loop: >> + page_ext_put(); >> } >> return 0; >> @@ -617,18 +653,20 @@ static void init_pages_in_zone(pg_data_t *pgdat, >> struct zone *zone) >> if (PageReserved(page)) >> continue; >> - page_ext = lookup_page_ext(page); >> + page_ext = page_ext_get(page); >> if (unlikely(!page_ext)) >> continue; >> /* Maybe overlapping zone */ >> if (test_bit(PAGE_EXT_OWNER, &page_ext->flags)) >> - continue; >> + goto loop; >> /* Found early allocated page */ >> __set_page_owner_handle(page_ext, early_handle, >> 0, 0); >> count++; >> +loop: >> + page_ext_put(); >> } >> cond_resched(); > > This is called from init_page_owner() where races with offline are > impossible, so it's unnecessary. Although it won't hurt. Totally agree. Infact in V2, this change is not there. And there are some other places too that it is not required to go for page_ext_get/put, eg: page_owner_migration, but these changes are done as we have exposed a single interface to get the page_ext. > Thanks, Chararan