On Mon, 20 Jul 2020 06:53:14 +0000 "Du, Fan" <fan.du@xxxxxxxxx> wrote: [...] From: SeongJae Park <sjpark@xxxxxxxxx> > > > >This commit updates the DAMON user space tool (damo-record) for NUMA > >specific physical memory monitoring. With this change, users can > >monitor accesses to physical memory of specific NUMA node. > > > >Signed-off-by: SeongJae Park <sjpark@xxxxxxxxx> > >--- > > tools/damon/_paddr_layout.py | 158 > >+++++++++++++++++++++++++++++++++++ > > tools/damon/record.py | 21 ++++- > > 2 files changed, 178 insertions(+), 1 deletion(-) > > create mode 100644 tools/damon/_paddr_layout.py > > > >diff --git a/tools/damon/_paddr_layout.py b/tools/damon/_paddr_layout.py > >new file mode 100644 > >index 000000000000..10056172db21 > >--- /dev/null > >+++ b/tools/damon/_paddr_layout.py > >@@ -0,0 +1,158 @@ > >+#!/usr/bin/env python3 > >+# SPDX-License-Identifier: GPL-2.0 > >+ > >+import os > >+ > >+class PaddrRange: > >+ start = None > >+ end = None > >+ nid = None > >+ state = None > >+ name = None > >+ > >+ def __init__(self, start, end, nid, state, name): > >+ self.start = start > >+ self.end = end > >+ self.nid = nid > >+ self.state = state > >+ self.name = name > >+ > >+ def interleaved(self, prange): > >+ if self.end <= prange.start: > >+ return None > >+ if prange.end <= self.start: > >+ return None > >+ return [max(self.start, prange.start), min(self.end, prange.end)] > >+ > >+ def __str__(self): > >+ return '%x-%x, nid %s, state %s, name %s' % (self.start, self.end, > >+ self.nid, self.state, self.name) > >+ > >+class MemBlock: > >+ nid = None > >+ index = None > >+ state = None > >+ > >+ def __init__(self, nid, index, state): > >+ self.nid = nid > >+ self.index = index > >+ self.state = state > >+ > >+ def __str__(self): > >+ return '%d (%s)' % (self.index, self.state) > >+ > >+ def __repr__(self): > >+ return self.__str__() > >+ > >+def readfile(file_path): > >+ with open(file_path, 'r') as f: > >+ return f.read() > >+ > >+def collapse_ranges(ranges): > >+ ranges = sorted(ranges, key=lambda x: x.start) > >+ merged = [] > >+ for r in ranges: > >+ if not merged: > >+ merged.append(r) > >+ continue > >+ last = merged[-1] > >+ if last.end != r.start or last.nid != r.nid or last.state != r.state: > >+ merged.append(r) > >+ else: > >+ last.end = r.end > >+ return merged > >+ > >+def memblocks_to_ranges(blocks, block_size): > >+ ranges = [] > >+ for b in blocks: > >+ ranges.append(PaddrRange(b.index * block_size, > >+ (b.index + 1) * block_size, b.nid, b.state, None)) > >+ > >+ return collapse_ranges(ranges) > >+ > >+def memblock_ranges(): > >+ SYSFS='/sys/devices/system/node' > >+ sz_block = int(readfile('/sys/devices/system/memory/block_size_bytes'), > >16) > >+ sys_nodes = [x for x in os.listdir(SYSFS) if x.startswith('node')] > >+ > >+ blocks = [] > >+ for sys_node in sys_nodes: > >+ nid = int(sys_node[4:]) > >+ > >+ sys_node_files = os.listdir(os.path.join(SYSFS, sys_node)) > >+ for f in sys_node_files: > >+ if not f.startswith('memory'): > >+ continue > >+ index = int(f[6:]) > >+ sys_state = os.path.join(SYSFS, sys_node, f, 'state') > >+ state = readfile(sys_state).strip() > >+ > >+ blocks.append(MemBlock(nid, index, state)) > >+ > >+ return memblocks_to_ranges(blocks, sz_block) > >+ > >+def iomem_ranges(): > >+ ranges = [] > >+ > >+ with open('/proc/iomem', 'r') as f: > >+ # example of the line: '100000000-42b201fff : System RAM' > >+ for line in f: > >+ fields = line.split(':') > >+ if len(fields) < 2: > >+ continue > >+ name = ':'.join(fields[1:]).strip() > >+ addrs = fields[0].split('-') > >+ if len(addrs) != 2: > >+ continue > >+ start = int(addrs[0], 16) > >+ end = int(addrs[1], 16) + 1 > >+ ranges.append(PaddrRange(start, end, None, None, name)) > >+ > >+ return ranges > > Hi SeongJae > > Here on system with persistent memory, user can plug {all or portion of }persistent memory into buddy system > with drivers from patchset[1] implemented. The persistent memory in this setup will be treated as normal system > RAM, and have valid full-fledged page structure as well. Cool! I didn't read the patchset in detail yet. I will take a look soon! > > For example, here is what /proc/iomem looks like in system with such configuration on my testbed. > > 1840000000-963fffffff : Persistent Memory > 1840000000-18be1fffff : namespace0.0 > 18c0000000-37bfffffff : dax0.0 > 18c0000000-37bfffffff : System RAM <- first 128G of persistent memory corresponding to node 2 > > # numactl -H > available: 3 nodes (0-2) > node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 > node 0 size: 95447 MB > node 0 free: 92391 MB > node 1 cpus: 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 > node 1 size: 96733 MB > node 1 free: 95670 MB > node 2 cpus: > node 2 size: 126976 MB > node 2 free: 126935 MB > node distances: > node 0 1 2 > 0: 10 21 17 > 1: 21 10 28 > 2: 17 28 10 > > So here we have two ranges: > 1840000000-963fffffff : Persistent Memory > 18c0000000-37bfffffff : System RAM > > [1]: > https://patchwork.kernel.org/cover/10829019/ > > > >+def paddr_ranges(): > >+ ranges1 = memblock_ranges() > >+ ranges2 = iomem_ranges() > >+ merged = [] > >+ > >+ for r in ranges1: > >+ subsets = [] > >+ for r2 in ranges2: > >+ interleaved = r.interleaved(r2) > >+ if interleaved == None: > >+ continue > >+ > >+ start, end = interleaved > >+ left = None > >+ if start > r.start: > >+ left = PaddrRange(r.start, start, r.nid, r.state, r.name) > >+ subsets.append(left) > >+ > >+ middle = PaddrRange(start, end, r.nid, r.state, r.name) > >+ if r2.nid: > >+ middle.nid = r2.nid > >+ if r2.state: > >+ middle.state = r2.state > >+ if r2.name: > >+ middle.name = r2.name > > Memory block from numa node 2 will match with range "1840000000-963fffffff : Persistent Memory" > But take the name "Persistent memory", expected "System RAM" here. You're right, current code cares only the outermost regions. I will fix this in the next spin. > > >+ subsets.append(middle) > >+ r.start = end > >+ if r.start < r.end: > >+ subsets = [r] > >+ > >+ merged += subsets > >+ return merged > >+ > >+def pr_ranges(ranges): > >+ print('#%12s %13s\tnode\tstate\tresource\tsize' % ('start', 'end')) > >+ for r in ranges: > >+ print('%13d %13d\t%s\t%s\t%s\t%d' % (r.start, r.end, r.nid, > >+ r.state, r.name, r.end - r.start)) > >+ > >+def main(): > >+ ranges = paddr_ranges() > >+ > >+ pr_ranges(ranges) > >+ > >+if __name__ == '__main__': > >+ main() > >diff --git a/tools/damon/record.py b/tools/damon/record.py > >index 416dca940c1d..8440a9818810 100644 > >--- a/tools/damon/record.py > >+++ b/tools/damon/record.py > >@@ -12,6 +12,7 @@ import subprocess > > import time > > > > import _damon > >+import _paddr_layout > > > > def do_record(target, is_target_cmd, init_regions, attrs, old_attrs): > > if os.path.isfile(attrs.rfile_path): > >@@ -70,6 +71,8 @@ def set_argparser(parser): > > help='the target command or the pid to record') > > parser.add_argument('-l', '--rbuf', metavar='<len>', type=int, > > default=1024*1024, help='length of record result buffer') > >+ parser.add_argument('--numa_node', metavar='<node id>', type=int, > >+ help='if target is \'paddr\', limit it to the numa node') > > parser.add_argument('-o', '--out', metavar='<file path>', type=str, > > default='damon.data', help='output file path') > > > >@@ -96,6 +99,18 @@ def default_paddr_region(): > > ret = [start, end] > > return ret > > > >+def paddr_region_of(numa_node): > >+ regions = [] > >+ default_region = default_paddr_region() > >+ paddr_ranges = _paddr_layout.paddr_ranges() > >+ for r in paddr_ranges: > >+ if r.end <= default_region[0] or default_region[1] <= r.start: > >+ continue > >+ if r.nid == numa_node and r.name == 'System RAM': > >+ regions.append([r.start, r.end]) > > To profile physical address for numa node 2, above checking will not return valid node 2 range > i.e., 18c0000000-37bfffffff : System RAM, but empty ones. > > I tweaked the checking to match "Persistent Memory" also, it looks physical address monitoring with > numa node specified works from first glance. > > I'm willing to give it a try once you posted next updated version. So glad to hear that! Hope to get your feedback again! > > >+ return regions > >+ > > def main(args=None): > > global orig_attrs > > if not args: > >@@ -113,12 +128,16 @@ def main(args=None): > > args.schemes = '' > > new_attrs = _damon.cmd_args_to_attrs(args) > > init_regions = _damon.cmd_args_to_init_regions(args) > >+ numa_node = args.numa_node > > target = args.target > > > > target_fields = target.split() > > if target == 'paddr': # physical memory address space > > if not init_regions: > >- init_regions = [default_paddr_region()] BTW, until the next spin, you would be able to use the your tweak or directly set the address ranges using '--regions' option. Thanks, SeongJae Park > >+ if numa_node: > >+ init_regions = paddr_region_of(numa_node) > >+ else: > >+ init_regions = [default_paddr_region()] > > do_record(target, False, init_regions, new_attrs, orig_attrs) > > elif not subprocess.call('which %s > /dev/null' % target_fields[0], > > shell=True, executable='/bin/bash'): > >-- > >2.17.1 > > >