On Thu, Oct 28, 2021 at 07:56:50PM +0800, Ning Zhang wrote: > +++ b/include/linux/huge_mm.h > @@ -185,6 +185,15 @@ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, > void free_transhuge_page(struct page *page); > bool is_transparent_hugepage(struct page *page); > > +#ifdef CONFIG_MEMCG > +int zsr_get_hpage(struct hpage_reclaim *hr_queue, struct page **reclaim_page); > +unsigned long zsr_reclaim_hpage(struct lruvec *lruvec, struct page *page); > +static inline struct list_head *hpage_reclaim_list(struct page *page) > +{ > + return &page[3].hpage_reclaim_list; > +} > +#endif I don't think any of this needs to be under an ifdef. That goes for a lot of your other additions to header files. > @@ -1110,6 +1121,10 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, > gfp_t gfp_mask, > unsigned long *total_scanned); > > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > +void del_hpage_from_queue(struct page *page); > +#endif That name is too generic. Also, to avoid ifdefs in code, it should be: #ifdef CONFIG_TRANSPARENT_HUGEPAGE void del_hpage_from_queue(struct page *page); #else static inline void del_hpage_from_queue(struct page *page) { } #endif > @@ -159,6 +159,12 @@ struct page { > /* For both global and memcg */ > struct list_head deferred_list; > }; > + struct { /* Third tail page of compound page */ > + unsigned long _compound_pad_2; > + unsigned long _compound_pad_3; > + /* For zero subpages reclaim */ > + struct list_head hpage_reclaim_list; Why do you need _compound_pad_3 here? > +++ b/include/linux/mmzone.h > @@ -787,6 +787,12 @@ struct deferred_split { > struct list_head split_queue; > unsigned long split_queue_len; > }; > + > +struct hpage_reclaim { > + spinlock_t reclaim_queue_lock; > + struct list_head reclaim_queue; > + unsigned long reclaim_queue_len; > +}; Have you considered using an XArray instead of a linked list? > +static bool hpage_estimate_zero(struct page *page) > +{ > + unsigned int i, maybe_zero_pages = 0, offset = 0; > + void *addr; > + > +#define BYTES_PER_LONG (BITS_PER_LONG / BITS_PER_BYTE) BYTES_PER_LONG is simply sizeof(long). Also, I'd check the entire cacheline rather than just one word; it's essentially free. > +#ifdef CONFIG_MMU > +#define ZSR_PG_MLOCK(flag) (1UL << flag) > +#else > +#define ZSR_PG_MLOCK(flag) 0 > +#endif Or use __PG_MLOCKED ? > +#ifdef CONFIG_ARCH_USES_PG_UNCACHED > +#define ZSR_PG_UNCACHED(flag) (1UL << flag) > +#else > +#define ZSR_PG_UNCACHED(flag) 0 > +#endif Define __PG_UNCACHED in page-flags.h? > +#ifdef CONFIG_MEMORY_FAILURE > +#define ZSR_PG_HWPOISON(flag) (1UL << flag) > +#else > +#define ZSR_PG_HWPOISON(flag) 0 > +#endif __PG_HWPOISON > +#define hr_queue_list_to_page(head) \ > + compound_head(list_entry((head)->prev, struct page,\ > + hpage_reclaim_list)) I think you're better off subtracting 3*sizeof(struct page) than loading from compound_head. > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > +/* Need the page lock if the page is not a newly allocated page. */ > +static void add_hpage_to_queue(struct page *page, struct mem_cgroup *memcg) > +{ > + struct hpage_reclaim *hr_queue; > + unsigned long flags; > + > + if (READ_ONCE(memcg->thp_reclaim) == THP_RECLAIM_DISABLE) > + return; > + > + page = compound_head(page); Why do you think the caller might be passing in a tail page here?