On Thu, Oct 28, 2021 at 07:56:50PM +0800, Ning Zhang wrote:
+++ b/include/linux/huge_mm.h
@@ -185,6 +185,15 @@ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
void free_transhuge_page(struct page *page);
bool is_transparent_hugepage(struct page *page);
+#ifdef CONFIG_MEMCG
+int zsr_get_hpage(struct hpage_reclaim *hr_queue, struct page **reclaim_page);
+unsigned long zsr_reclaim_hpage(struct lruvec *lruvec, struct page *page);
+static inline struct list_head *hpage_reclaim_list(struct page *page)
+{
+ return &page[3].hpage_reclaim_list;
+}
+#endif
I don't think any of this needs to be under an ifdef. That goes for a
lot of your other additions to header files.
@@ -1110,6 +1121,10 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
gfp_t gfp_mask,
unsigned long *total_scanned);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void del_hpage_from_queue(struct page *page);
+#endif
That name is too generic. Also, to avoid ifdefs in code, it should be:
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void del_hpage_from_queue(struct page *page);
#else
static inline void del_hpage_from_queue(struct page *page) { }
#endif
@@ -159,6 +159,12 @@ struct page {
/* For both global and memcg */
struct list_head deferred_list;
};
+ struct { /* Third tail page of compound page */
+ unsigned long _compound_pad_2;
+ unsigned long _compound_pad_3;
+ /* For zero subpages reclaim */
+ struct list_head hpage_reclaim_list;
Why do you need _compound_pad_3 here?
+++ b/include/linux/mmzone.h
@@ -787,6 +787,12 @@ struct deferred_split {
struct list_head split_queue;
unsigned long split_queue_len;
};
+
+struct hpage_reclaim {
+ spinlock_t reclaim_queue_lock;
+ struct list_head reclaim_queue;
+ unsigned long reclaim_queue_len;
+};
Have you considered using an XArray instead of a linked list?
+static bool hpage_estimate_zero(struct page *page)
+{
+ unsigned int i, maybe_zero_pages = 0, offset = 0;
+ void *addr;
+
+#define BYTES_PER_LONG (BITS_PER_LONG / BITS_PER_BYTE)
BYTES_PER_LONG is simply sizeof(long).
Also, I'd check the entire cacheline rather than just one word; it's
essentially free.
+#ifdef CONFIG_MMU
+#define ZSR_PG_MLOCK(flag) (1UL << flag)
+#else
+#define ZSR_PG_MLOCK(flag) 0
+#endif
Or use __PG_MLOCKED ?
+#ifdef CONFIG_ARCH_USES_PG_UNCACHED
+#define ZSR_PG_UNCACHED(flag) (1UL << flag)
+#else
+#define ZSR_PG_UNCACHED(flag) 0
+#endif
Define __PG_UNCACHED in page-flags.h?
+#ifdef CONFIG_MEMORY_FAILURE
+#define ZSR_PG_HWPOISON(flag) (1UL << flag)
+#else
+#define ZSR_PG_HWPOISON(flag) 0
+#endif
__PG_HWPOISON
+#define hr_queue_list_to_page(head) \
+ compound_head(list_entry((head)->prev, struct page,\
+ hpage_reclaim_list))
I think you're better off subtracting 3*sizeof(struct page) than
loading from compound_head.
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/* Need the page lock if the page is not a newly allocated page. */
+static void add_hpage_to_queue(struct page *page, struct mem_cgroup *memcg)
+{
+ struct hpage_reclaim *hr_queue;
+ unsigned long flags;
+
+ if (READ_ONCE(memcg->thp_reclaim) == THP_RECLAIM_DISABLE)
+ return;
+
+ page = compound_head(page);
Why do you think the caller might be passing in a tail page here?