From: Yang Yang <yang.yang29@xxxxxxxxxx> Subject: delayacct: support swapin delay accounting for swapping without blkio Currently delayacct accounts swapin delay only for swapping that cause blkio. If we use zram for swapping, tools/accounting/getdelays can't get any SWAP delay. It's useful to get zram swapin delay information, for example to adjust compress algorithm or /proc/sys/vm/swappiness. Reference to PSI, it accounts any kind of swapping by doing its work in swap_readpage(), no matter whether swapping causes blkio. Let delayacct do the similar work. Link: https://lkml.kernel.org/r/20211112083813.8559-1-yang.yang29@xxxxxxxxxx Signed-off-by: Yang Yang <yang.yang29@xxxxxxxxxx> Reported-by: Zeal Robot <zealci@xxxxxxxxxx> Cc: Balbir Singh <bsingharora@xxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxxxxx> Cc: Johannes Weiner <hannes@xxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- include/linux/delayacct.h | 44 ++++++++++++++++++------------------ kernel/delayacct.c | 34 +++++++++++++++------------ mm/memory.c | 4 --- mm/page_io.c | 3 ++ 4 files changed, 44 insertions(+), 41 deletions(-) --- a/include/linux/delayacct.h~delayacct-support-swapin-delay-accounting-for-swapping-without-blkio +++ a/include/linux/delayacct.h @@ -9,14 +9,6 @@ #include <uapi/linux/taskstats.h> -/* - * Per-task flags relevant to delay accounting - * maintained privately to avoid exhausting similar flags in sched.h:PF_* - * Used to set current->delays->flags - */ -#define DELAYACCT_PF_SWAPIN 0x00000001 /* I am doing a swapin */ -#define DELAYACCT_PF_BLKIO 0x00000002 /* I am waiting on IO */ - #ifdef CONFIG_TASK_DELAY_ACCT struct task_delay_info { raw_spinlock_t lock; @@ -37,13 +29,13 @@ struct task_delay_info { * associated with the operation is added to XXX_delay. * XXX_delay contains the accumulated delay time in nanoseconds. */ - u64 blkio_start; /* Shared by blkio, swapin */ + u64 blkio_start; u64 blkio_delay; /* wait for sync block io completion */ - u64 swapin_delay; /* wait for swapin block io completion */ + u64 swapin_start; + u64 swapin_delay; /* wait for swapin */ u32 blkio_count; /* total count of the number of sync block */ /* io operations performed */ - u32 swapin_count; /* total count of the number of swapin block */ - /* io operations performed */ + u32 swapin_count; /* total count of swapin */ u64 freepages_start; u64 freepages_delay; /* wait for memory reclaim */ @@ -79,14 +71,8 @@ extern void __delayacct_freepages_start( extern void __delayacct_freepages_end(void); extern void __delayacct_thrashing_start(void); extern void __delayacct_thrashing_end(void); - -static inline int delayacct_is_task_waiting_on_io(struct task_struct *p) -{ - if (p->delays) - return (p->delays->flags & DELAYACCT_PF_BLKIO); - else - return 0; -} +extern void __delayacct_swapin_start(void); +extern void __delayacct_swapin_end(void); static inline void delayacct_set_flag(struct task_struct *p, int flag) { @@ -123,7 +109,6 @@ static inline void delayacct_blkio_start if (!static_branch_unlikely(&delayacct_key)) return; - delayacct_set_flag(current, DELAYACCT_PF_BLKIO); if (current->delays) __delayacct_blkio_start(); } @@ -135,7 +120,6 @@ static inline void delayacct_blkio_end(s if (p->delays) __delayacct_blkio_end(p); - delayacct_clear_flag(p, DELAYACCT_PF_BLKIO); } static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk) @@ -169,6 +153,18 @@ static inline void delayacct_thrashing_e __delayacct_thrashing_end(); } +static inline void delayacct_swapin_start(void) +{ + if (current->delays) + __delayacct_swapin_start(); +} + +static inline void delayacct_swapin_end(void) +{ + if (current->delays) + __delayacct_swapin_end(); +} + #else static inline void delayacct_set_flag(struct task_struct *p, int flag) {} @@ -199,6 +195,10 @@ static inline void delayacct_thrashing_s {} static inline void delayacct_thrashing_end(void) {} +static inline void delayacct_swapin_start(void) +{} +static inline void delayacct_swapin_end(void) +{} #endif /* CONFIG_TASK_DELAY_ACCT */ --- a/kernel/delayacct.c~delayacct-support-swapin-delay-accounting-for-swapping-without-blkio +++ a/kernel/delayacct.c @@ -100,19 +100,10 @@ void __delayacct_blkio_start(void) */ void __delayacct_blkio_end(struct task_struct *p) { - struct task_delay_info *delays = p->delays; - u64 *total; - u32 *count; - - if (p->delays->flags & DELAYACCT_PF_SWAPIN) { - total = &delays->swapin_delay; - count = &delays->swapin_count; - } else { - total = &delays->blkio_delay; - count = &delays->blkio_count; - } - - delayacct_end(&delays->lock, &delays->blkio_start, total, count); + delayacct_end(&p->delays->lock, + &p->delays->blkio_start, + &p->delays->blkio_delay, + &p->delays->blkio_count); } int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) @@ -179,8 +170,7 @@ __u64 __delayacct_blkio_ticks(struct tas unsigned long flags; raw_spin_lock_irqsave(&tsk->delays->lock, flags); - ret = nsec_to_clock_t(tsk->delays->blkio_delay + - tsk->delays->swapin_delay); + ret = nsec_to_clock_t(tsk->delays->blkio_delay); raw_spin_unlock_irqrestore(&tsk->delays->lock, flags); return ret; } @@ -210,3 +200,17 @@ void __delayacct_thrashing_end(void) ¤t->delays->thrashing_delay, ¤t->delays->thrashing_count); } + +void __delayacct_swapin_start(void) +{ + current->delays->swapin_start = local_clock(); +} + +void __delayacct_swapin_end(void) +{ + delayacct_end(¤t->delays->lock, + ¤t->delays->swapin_start, + ¤t->delays->swapin_delay, + ¤t->delays->swapin_count); +} + --- a/mm/memory.c~delayacct-support-swapin-delay-accounting-for-swapping-without-blkio +++ a/mm/memory.c @@ -3508,7 +3508,6 @@ vm_fault_t do_swap_page(struct vm_fault if (unlikely(!si)) goto out; - delayacct_set_flag(current, DELAYACCT_PF_SWAPIN); page = lookup_swap_cache(entry, vma, vmf->address); swapcache = page; @@ -3556,7 +3555,6 @@ vm_fault_t do_swap_page(struct vm_fault vmf->address, &vmf->ptl); if (likely(pte_same(*vmf->pte, vmf->orig_pte))) ret = VM_FAULT_OOM; - delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN); goto unlock; } @@ -3570,13 +3568,11 @@ vm_fault_t do_swap_page(struct vm_fault * owner processes (which may be unknown at hwpoison time) */ ret = VM_FAULT_HWPOISON; - delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN); goto out_release; } locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags); - delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN); if (!locked) { ret |= VM_FAULT_RETRY; goto out_release; --- a/mm/page_io.c~delayacct-support-swapin-delay-accounting-for-swapping-without-blkio +++ a/mm/page_io.c @@ -25,6 +25,7 @@ #include <linux/psi.h> #include <linux/uio.h> #include <linux/sched/task.h> +#include <linux/delayacct.h> void end_swap_bio_write(struct bio *bio) { @@ -370,6 +371,7 @@ int swap_readpage(struct page *page, boo * significant part of overall IO time. */ psi_memstall_enter(&pflags); + delayacct_swapin_start(); if (frontswap_load(page) == 0) { SetPageUptodate(page); @@ -432,6 +434,7 @@ int swap_readpage(struct page *page, boo out: psi_memstall_leave(&pflags); + delayacct_swapin_end(); return ret; } _