Signed-off-by: Konstantin Khlebnikov <khlebnikov@xxxxxxxxxxxxxx>
Link: https://lore.kernel.org/lkml/20190722213205.140845-1-joel@xxxxxxxxxxxxxxxxx/
---
Documentation/admin-guide/mm/pagemap.rst | 3 ++-
Documentation/filesystems/proc.txt | 3 +++
fs/proc/task_mmu.c | 33 ++++++++++++++++++++++++++++--
3 files changed, 36 insertions(+), 3 deletions(-)
diff --git a/Documentation/admin-guide/mm/pagemap.rst b/Documentation/admin-guide/mm/pagemap.rst
index 340a5aee9b80..d7ee60287584 100644
--- a/Documentation/admin-guide/mm/pagemap.rst
+++ b/Documentation/admin-guide/mm/pagemap.rst
@@ -21,7 +21,8 @@ There are four components to pagemap:
* Bit 55 pte is soft-dirty (see
:ref:`Documentation/admin-guide/mm/soft-dirty.rst <soft_dirty>`)
* Bit 56 page exclusively mapped (since 4.2)
- * Bits 57-60 zero
+ * Bit 57 page is idle
+ * Bits 58-60 zero
* Bit 61 page is file-page or shared-anon (since 3.5)
* Bit 62 page swapped
* Bit 63 page present
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 99ca040e3f90..d222be8b4eb9 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -574,6 +574,9 @@ To reset the peak resident set size ("high water mark") to the process's
current value:
> echo 5 > /proc/PID/clear_refs
+To mark all mapped pages as idle:
+ > echo 6 > /proc/PID/clear_refs
+
Any other value written to /proc/PID/clear_refs will have no effect.
The /proc/pid/pagemap gives the PFN, which can be used to find the pageflags
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 731642e0f5a0..6da952574a1f 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -413,6 +413,7 @@ struct mem_size_stats {
unsigned long private_clean;
unsigned long private_dirty;
unsigned long referenced;
+ unsigned long idle;
unsigned long anonymous;
unsigned long lazyfree;
unsigned long anonymous_thp;
@@ -479,6 +480,10 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
if (young || page_is_young(page) || PageReferenced(page))
mss->referenced += size;
+ /* Not accessed and still idle. */
+ if (!young && page_is_idle(page))
+ mss->idle += size;
+
/*
* Then accumulate quantities that may depend on sharing, or that may
* differ page-by-page.
@@ -799,6 +804,9 @@ static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss,
SEQ_PUT_DEC(" kB\nPrivate_Clean: ", mss->private_clean);
SEQ_PUT_DEC(" kB\nPrivate_Dirty: ", mss->private_dirty);
SEQ_PUT_DEC(" kB\nReferenced: ", mss->referenced);
+#ifdef CONFIG_IDLE_PAGE_TRACKING
+ SEQ_PUT_DEC(" kB\nIdle: ", mss->idle);
+#endif
SEQ_PUT_DEC(" kB\nAnonymous: ", mss->anonymous);
SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree);
SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp);
@@ -969,6 +977,7 @@ enum clear_refs_types {
CLEAR_REFS_MAPPED,
CLEAR_REFS_SOFT_DIRTY,
CLEAR_REFS_MM_HIWATER_RSS,
+ CLEAR_REFS_SOFT_ACCESS,
CLEAR_REFS_LAST,
};
@@ -1045,6 +1054,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
pte_t *pte, ptent;
spinlock_t *ptl;
struct page *page;
+ int young;
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
@@ -1058,8 +1068,16 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
page = pmd_page(*pmd);
+ young = pmdp_test_and_clear_young(vma, addr, pmd);
+
+ if (cp->type == CLEAR_REFS_SOFT_ACCESS) {
+ if (young)
+ set_page_young(page);
+ set_page_idle(page);
+ goto out;
+ }
+
/* Clear accessed and referenced bits. */
- pmdp_test_and_clear_young(vma, addr, pmd);
test_and_clear_page_young(page);
ClearPageReferenced(page);
out:
@@ -1086,8 +1104,16 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
if (!page)
continue;
+ young = ptep_test_and_clear_young(vma, addr, pte);
+
+ if (cp->type == CLEAR_REFS_SOFT_ACCESS) {
+ if (young)
+ set_page_young(page);
+ set_page_idle(page);
+ continue;
+ }
+
/* Clear accessed and referenced bits. */
- ptep_test_and_clear_young(vma, addr, pte);
test_and_clear_page_young(page);
ClearPageReferenced(page);
}
@@ -1253,6 +1279,7 @@ struct pagemapread {
#define PM_PFRAME_MASK GENMASK_ULL(PM_PFRAME_BITS - 1, 0)
#define PM_SOFT_DIRTY BIT_ULL(55)
#define PM_MMAP_EXCLUSIVE BIT_ULL(56)
+#define PM_IDLE BIT_ULL(57)
#define PM_FILE BIT_ULL(61)
#define PM_SWAP BIT_ULL(62)
#define PM_PRESENT BIT_ULL(63)
@@ -1326,6 +1353,8 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
page = vm_normal_page(vma, addr, pte);
if (pte_soft_dirty(pte))
flags |= PM_SOFT_DIRTY;
+ if (!pte_young(pte) && page && page_is_idle(page))
+ flags |= PM_IDLE;
} else if (is_swap_pte(pte)) {
swp_entry_t entry;
if (pte_swp_soft_dirty(pte))