A new perf script page-reclaim is introduced in this patch. This new script is used to report the page reclaim details. The possible usage of this script is as bellow, - identify latency spike caused by direct reclaim - whehter the latency spike is relevant with pageout - why is page reclaim requested, i.e. whether it is because of memory fragmentation - page reclaim efficiency etc In the future we may also enhance it to analyze the memcg reclaim. Bellow is how to use this script, # Record, one of the following $ perf record -e 'vmscan:mm_vmscan_*' ./workload $ perf script record page-reclaim # Report $ perf script report page-reclaim # Report per process latency $ perf script report page-reclaim -- -p # Report per process latency details. At what time and how long it # stalls at each time. $ perf script report page-reclaim -- -v An example of doing mmtests, $ perf script report page-reclaim Direct reclaims: 4924 Direct latency (ms) total max avg min 177823.211 6378.977 36.114 0.051 Direct file reclaimed 22920 Direct file scanned 28306 Direct file sync write I/O 0 Direct file async write I/O 0 Direct anon reclaimed 212567 Direct anon scanned 1446854 Direct anon sync write I/O 0 Direct anon async write I/O 278325 Direct order 0 1 3 4870 23 31 Wake kswapd requests 716 Wake order 0 1 715 1 Kswapd reclaims: 9 Kswapd latency (ms) total max avg min 86353.046 42128.816 9594.783 120.736 Kswapd file reclaimed 366461 Kswapd file scanned 369554 Kswapd file sync write I/O 0 Kswapd file async write I/O 0 Kswapd anon reclaimed 362594 Kswapd anon scanned 693938 Kswapd anon sync write I/O 0 Kswapd anon async write I/O 330663 Kswapd order 0 1 3 3 1 5 Kswapd re-wakes 705 $ perf script report page-reclaim -- -p # besides the above basic output, it will also summary per task # latency Per process latency (ms): pid[comm] total max avg min 1[systemd] 276.764 248.933 21.29 0.293 163[kswapd0] 86353.046 42128.816 9594.783 120.736 7241[bash] 12787.749 859.091 94.028 0.163 1592[master] 81.604 70.811 27.201 2.906 1595[pickup] 496.162 374.168 165.387 14.478 1098[auditd] 19.32 19.32 19.32 19.32 1120[irqbalance] 5232.331 1386.352 158.555 0.169 7236[usemem] 79649.04 1763.281 24.921 0.051 1605[sshd] 1344.41 645.125 34.472 0.16 7238[bash] 1158.92 1023.307 231.784 0.067 7239[bash] 15100.776 993.447 82.069 0.145 ... $ per script report page-reclaim -- -v # Besides the basic output, it will asl show per task latency details Per process latency (ms): pid[comm] total max avg min timestamp latency(ns) 1[systemd] 276.764 248.933 21.29 0.293 3406860552338: 16819800 3406877381650: 5532855 3407458799399: 929517 3407459796042: 916682 3407460763220: 418989 3407461250236: 332355 3407461637534: 401731 3407462092234: 449219 3407462605855: 292857 3407462952343: 372700 3407463364947: 414880 3407463829547: 949162 3407464813883: 248933444 163[kswapd0] 86353.046 42128.816 9594.783 120.736 3357637025977: 1026962745 3358915619888: 41268642175 3400239664127: 42128816204 3443784780373: 679641989 3444847948969: 120735792 3445001978784: 342713657 3445835850664: 316851589 3446865035476: 247457873 3449355401352: 221223878 ... This script must be in sync with bellow vmscan tracepoints, mm_vmscan_direct_reclaim_begin mm_vmscan_direct_reclaim_end mm_vmscan_kswapd_wake mm_vmscan_kswapd_sleep mm_vmscan_wakeup_kswapd mm_vmscan_lru_shrink_inactive mm_vmscan_writepage Signed-off-by: Yafang Shao <laoar.shao@xxxxxxxxx> Cc: Tony Jones <tonyj@xxxxxxx> Cc: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> --- v1->v2: verified it with python2.7 and python3.6. add vmscan tracepoints comments in this script --- tools/perf/scripts/python/bin/page-reclaim-record | 2 + tools/perf/scripts/python/bin/page-reclaim-report | 4 + tools/perf/scripts/python/page-reclaim.py | 393 ++++++++++++++++++++++ 3 files changed, 399 insertions(+) create mode 100644 tools/perf/scripts/python/bin/page-reclaim-record create mode 100644 tools/perf/scripts/python/bin/page-reclaim-report create mode 100644 tools/perf/scripts/python/page-reclaim.py diff --git a/tools/perf/scripts/python/bin/page-reclaim-record b/tools/perf/scripts/python/bin/page-reclaim-record new file mode 100644 index 0000000..5a16a23 --- /dev/null +++ b/tools/perf/scripts/python/bin/page-reclaim-record @@ -0,0 +1,2 @@ +#!/bin/bash +perf record -e vmscan:mm_vmscan_direct_reclaim_begin -e vmscan:mm_vmscan_direct_reclaim_end -e vmscan:mm_vmscan_kswapd_wake -e vmscan:mm_vmscan_kswapd_sleep -e vmscan:mm_vmscan_wakeup_kswapd -e vmscan:mm_vmscan_lru_shrink_inactive -e vmscan:mm_vmscan_writepage $@ diff --git a/tools/perf/scripts/python/bin/page-reclaim-report b/tools/perf/scripts/python/bin/page-reclaim-report new file mode 100644 index 0000000..b74e197 --- /dev/null +++ b/tools/perf/scripts/python/bin/page-reclaim-report @@ -0,0 +1,4 @@ +#!/bin/bash +#description: display page reclaim details +#args: [-h] [-p] [-v] +perf script -s "$PERF_EXEC_PATH"/scripts/python/page-reclaim.py $@ diff --git a/tools/perf/scripts/python/page-reclaim.py b/tools/perf/scripts/python/page-reclaim.py new file mode 100644 index 0000000..df40d79 --- /dev/null +++ b/tools/perf/scripts/python/page-reclaim.py @@ -0,0 +1,393 @@ +# SPDX-License-Identifier: GPL-2.0 +# Perf script to help analyze page reclaim with vmscan tracepoints +# e.g. to capture the latency spike caused by direct reclaim. +# +# This script must be in sync with bellow vmscan tracepoints, +# mm_vmscan_direct_reclaim_begin +# mm_vmscan_direct_reclaim_end +# mm_vmscan_kswapd_wake +# mm_vmscan_kswapd_sleep +# mm_vmscan_wakeup_kswapd +# mm_vmscan_lru_shrink_inactive +# mm_vmscan_writepage +# +# This script is motivated by Mel's trace-vmscan-postprocess.pl. +# +# Author: Yafang Shao <laoar.shao@xxxxxxxxx> + +import os +import sys +import getopt +import signal + +signal.signal(signal.SIGPIPE, signal.SIG_DFL) + +usage = "usage: perf script report page-reclaim -- [-h] [-p] [-v]\n" + +latency_metric = ['total', 'max', 'avg', 'min'] +reclaim_path = ['Kswapd', 'Direct'] +sync_io = ['async', 'sync'] +lru = ['anon', 'file'] + +class Show: + DEFAULT = 0 + PROCCESS = 1 + VERBOSE = 2 + +show_opt = Show.DEFAULT + +def ns(sec, nsec): + return (sec * 1000000000) + nsec + +def ns_to_ms(ns): + return round(ns / 1000000.0, 3) + +# Format: +# title valueX valueY +def print_list_in_line(__list, title, padding): + line = title + for v in __list: + line += str(v).rjust(padding) + + print(line) + +# Format: +# title nameX nameY +# valueX valueY +def print_list_in_table(__list, title, padding): + width = len(title) + 1 + header = title.ljust(width) + line = ''.ljust(width) + + for v in __list: + header += str(v[0]).rjust(padding) + line += str(v[1]).rjust(padding) + + print(header) + print(line) + +class Trace: + def __init__(self, secs, nsecs): + self.begin = ns(secs, nsecs) + + def complete(self, secs, nsecs): + self.ns = ns(secs, nsecs) - self.begin + +class Stat: + def __init__(self): + self.stats = {} + self.stats['file'] = {} + self.stats['file']['reclaimed'] = 0 + self.stats['file']['scanned'] = 0 + self.stats['file']['sync'] = 0 + self.stats['file']['async'] = 0 + self.stats['anon'] = {} + self.stats['anon']['reclaimed'] = 0 + self.stats['anon']['scanned'] = 0 + self.stats['anon']['sync'] = 0 + self.stats['anon']['async'] = 0 + + # including reclaimed slab caches + self.stats['reclaimed'] = 0 + + # The MAX_ORDER in kernel is configurable + self.stats['order'] = {} + + self.stats['latency'] = {} + self.stats['latency']['total'] = 0.0 + self.stats['latency']['max'] = 0.0 + self.stats['latency']['avg'] = 0.0 + self.stats['latency']['min'] = float("inf") + self.stats['count'] = 0 + + def add_latency(self, val, order): + self.stats['latency']['total'] += val + _max = self.stats['latency']['max'] + _min = self.stats['latency']['min'] + if val > _max: + self.stats['latency']['max'] = val + if val < _min: + self.stats['latency']['min'] = val + + self.stats['count'] += 1 + self.stats['order'][order] = self.stats['order'].get(order, 0) + 1 + + def add_page(self, _lru, scanned, reclaimed): + self.stats[_lru]['scanned'] += scanned + self.stats[_lru]['reclaimed'] += reclaimed + + def inc_write_io(self, _lru, _io): + self.stats[_lru][_io] += 1 + + def convert_latency(self): + count = self.stats['count'] + if count: + self.stats['latency']['avg'] = \ + self.stats['latency']['total'] / count + for i, v in self.stats['latency'].items(): + self.stats['latency'][i] = ns_to_ms(v) + + latency_list = sorted(self.stats['latency'].items(), + key=lambda i:latency_metric.index(i[0])) + + return latency_list + + def show_stats(self, key): + count = self.stats['count'] + print("%s reclaims: %d" % (key, count)) + + # Format latency output + # Print latencys in milliseconds: + # title total max avg min + # v v v v + latency_list = self.convert_latency() + print_list_in_table(latency_list, key + " latency (ms)", 12) + + for _lru in ['file', 'anon']: + for action in ['reclaimed', 'scanned']: + print("%s %s %s %d" % (key, _lru, action, self.stats[_lru][action])) + for _io in ['sync', 'async']: + print("%s %s %s write I/O %d" % (key, _lru, _io, self.stats[_lru][_io])) + + # Format order output + # Similar with /proc/buddyinfo: + # title order-N ... + # v ... + # N.B. v is a non-zero value + order_list = sorted(self.stats['order'].items()) + print_list_in_table(order_list, key + ' order', 6) + +class Vmscan: + events = {} + stat = {} + stat['Direct'] = Stat() + stat['Kswapd'] = Stat() + # for re-wake the kswapd + rewake = 0 + + @classmethod + def direct_begin(cls, pid, comm, start_secs, start_nsecs, order): + event = cls.events.get(pid) + if event is None: + #new vmscan instance + event = cls.events[pid] = Vmscan(comm, pid) + event.vmscan_trace_begin(start_secs, start_nsecs, order, 1) + + @classmethod + def direct_end(cls, pid, secs, nsecs, reclaimed): + event = cls.events.get(pid) + if event and event.tracing(): + event.vmscan_trace_end(secs, nsecs) + + @classmethod + def kswapd_wake(cls, pid, comm, start_secs, start_nsecs, order): + event = cls.events.get(pid) + if event is None: + # new vmscan instance + event = cls.events[pid] = Vmscan(comm, pid) + + if event.tracing() is False: + event.vmscan_trace_begin(start_secs, start_nsecs, order, 0) + # kswapd is already running + else: + cls.rewake_kswapd(order) + + @classmethod + def rewake_kswapd(cls, order): + cls.rewake += 1 + + @classmethod + def show_rewakes(cls): + print("Kswapd re-wakes %d" % (cls.rewake)) + + @classmethod + def kswapd_sleep(cls, pid, secs, nsecs): + event = cls.events.get(pid) + if event and event.tracing(): + event.vmscan_trace_end(secs, nsecs) + + @classmethod + def shrink_inactive(cls, pid, scanned, reclaimed, flags): + event = cls.events.get(pid) + if event and event.tracing(): + # RECLAIM_WB_ANON 0x1 + # RECLAIM_WB_FILE 0x2 + _type = (flags & 0x2) >> 1 + event.process_lru(lru[_type], scanned, reclaimed) + + @classmethod + def writepage(cls, pid, flags): + event = cls.events.get(pid) + if event and event.tracing(): + # RECLAIM_WB_ANON 0x1 + # RECLAIM_WB_FILE 0x2 + # RECLAIM_WB_SYNC 0x4 + # RECLAIM_WB_ASYNC 0x8 + _type = (flags & 0x2) >> 1 + _io = (flags & 0x4) >> 2 + + event.process_writepage(lru[_type], sync_io[_io]) + + @classmethod + def iterate_proc(cls): + if show_opt != Show.DEFAULT: + print("\nPer process latency (ms):") + _str = 'pid'.rjust(8) + '[comm]'.ljust(12) + print_list_in_line(latency_metric, _str, 12) + + if show_opt == Show.VERBOSE: + print("%20s %s" % ('timestamp','latency(ns)')) + + for i in cls.events: + yield cls.events[i] + + def __init__(self, comm, pid): + self.comm = comm + self.pid = pid + self.trace = None + self._list = [] + self.stat = Stat() + self.direct = 0 + self.order = 0 + + def vmscan_trace_begin(self, secs, nsecs, order, direct): + self.trace = Trace(secs, nsecs) + self.direct = direct + self.order = order + + def vmscan_trace_end(self, secs, nsecs): + path = reclaim_path[self.direct] + self.trace.complete(secs, nsecs) + + Vmscan.stat[path].add_latency(self.trace.ns, self.order) + if show_opt != Show.DEFAULT: + self.stat.add_latency(self.trace.ns, self.order) + if show_opt == Show.VERBOSE: + self._list.append(self.trace) + + self.trace = None + + def process_lru(self, lru, scanned, reclaimed): + path = reclaim_path[self.direct] + Vmscan.stat[path].add_page(lru, scanned, reclaimed) + + def process_writepage(self, lru, io): + path = reclaim_path[self.direct] + Vmscan.stat[path].inc_write_io(lru, io) + + def tracing(self): + return self.trace != None + + def display_proc(self): + self.stat.convert_latency() + _list = sorted(list(self.stat.stats['latency'].values()), reverse = True) + if _list: + comm = '[' + self.comm[0:10] + ']' + proc = str(self.pid).rjust(8) + comm.ljust(12) + print_list_in_line(_list, proc, 12) + + def display_proc_detail(self): + if show_opt == Show.VERBOSE: + for i, v in enumerate(self._list): + print("%20d: %d" % (v.begin, v.ns)) + +# Wake kswpad request +class Wakeup: + wakes = 0 + orders = {} + + @classmethod + def wakeup_kswapd(cls, order): + cls.wakes += 1 + cls.orders[order] = cls.orders.get(order, 0) + 1 + + @classmethod + def show_wakes(cls): + print("Wake kswapd requests %d" % (cls.wakes)) + + order_list = sorted(cls.orders.items()) + print_list_in_table(order_list, 'Wake order', 6) + +def trace_end(): + Vmscan.stat['Direct'].show_stats('Direct') + Wakeup.show_wakes() + print('') + + Vmscan.stat['Kswapd'].show_stats('Kswapd') + Vmscan.show_rewakes() + + # show process details if requested + for i in Vmscan.iterate_proc(): + i.display_proc(), + i.display_proc_detail() + +def vmscan__mm_vmscan_direct_reclaim_begin(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + common_callchain, order, gfp_flags): + + Vmscan.direct_begin(common_pid, common_comm, common_secs, + common_nsecs, order) + +def vmscan__mm_vmscan_direct_reclaim_end(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + common_callchain, nr_reclaimed): + + Vmscan.direct_end(common_pid, common_secs, common_nsecs, nr_reclaimed) + +def vmscan__mm_vmscan_kswapd_wake(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + common_callchain, nid, zid, order): + + Vmscan.kswapd_wake(common_pid, common_comm, common_secs, common_nsecs, order) + +def vmscan__mm_vmscan_kswapd_sleep(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + common_callchain, nid): + + Vmscan.kswapd_sleep(common_pid, common_secs, common_nsecs) + +def vmscan__mm_vmscan_wakeup_kswapd(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + common_callchain, nid, zid, order, gfp_flags): + + Wakeup.wakeup_kswapd(order) + +def vmscan__mm_vmscan_lru_shrink_inactive(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + common_callchain, nid, nr_scanned, nr_reclaimed, nr_dirty, + nr_writeback, nr_congested, nr_immediate, nr_activate_anon, + nr_activate_file, nr_ref_keep, nr_unmap_fail, priority, flags): + + Vmscan.shrink_inactive(common_pid, nr_scanned, nr_reclaimed, flags) + +def vmscan__mm_vmscan_writepage(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + common_callchain, pfn, reclaim_flags): + + Vmscan.writepage(common_pid, reclaim_flags) + +def print_help(): + global usage + print(usage) + print(" -p show process latency (ms)") + print(" -v show process latency (ns) with timestamp") + +def option_parse(): + try: + opts, args = getopt.getopt(sys.argv[1:], "pvh") + except getopt.GetoptError: + print('Bad option!') + exit(1) + + global show_opt + for opt, arg in opts: + if opt == "-h": + print_help() + exit(0) + elif opt == "-p": + show_opt = Show.PROCCESS + elif opt == '-v': + show_opt = Show.VERBOSE + +option_parse() + -- 1.8.3.1