Add a script which converts /sys/kernel/debug/kmemleak_all to the pprof format, which can be used for analysing memory usage. See https://github.com/google/pprof. $ ./kmemleak2pprof.py kmemleak_all $ pprof -text -ignore free_area_init_node -compact_labels -nodecount 10 prof Showing nodes accounting for 4.85MB, 34.05% of 14.23MB total Dropped 3989 nodes (cum <= 0.07MB) Showing top 10 nodes out of 190 flat flat% sum% cum cum% 1.39MB 9.78% 9.78% 1.61MB 11.29% new_inode_pseudo+0x8/0x4c 0.75MB 5.27% 15.04% 0.75MB 5.27% alloc_large_system_hash+0x19c/0x2b8 0.73MB 5.12% 20.17% 0.86MB 6.07% kernfs_new_node+0x30/0x50 0.66MB 4.62% 24.79% 0.66MB 4.62% __vmalloc_node.constprop.9+0x48/0x50 0.61MB 4.28% 29.06% 0.61MB 4.28% d_alloc+0x10/0x78 0.22MB 1.52% 30.58% 0.22MB 1.52% alloc_inode+0x1c/0xa4 0.18MB 1.28% 31.86% 0.20MB 1.42% _do_fork+0xb0/0x41c 0.13MB 0.88% 32.74% 0.13MB 0.88% early_trace_init+0x16c/0x374 0.09MB 0.66% 33.40% 0.17MB 1.17% inet_init+0x128/0x24c 0.09MB 0.65% 34.05% 0.09MB 0.65% __kernfs_new_node+0x34/0x1a8 Signed-off-by: Vincent Whitchurch <vincent.whitchurch@xxxxxxxx> --- scripts/kmemleak2pprof.py | 164 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100755 scripts/kmemleak2pprof.py diff --git a/scripts/kmemleak2pprof.py b/scripts/kmemleak2pprof.py new file mode 100755 index 000000000000..1295d3ca9a9d --- /dev/null +++ b/scripts/kmemleak2pprof.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2018 Axis Communications AB +# +# Converts /sys/kernel/debug/kmemleak_all to the pprof format, see +# https://github.com/google/pprof. +# +# profile_pb2.py can be generated with the following commands. protoc is +# packaged as protobuf-compiler in Debian: +# +# wget https://raw.githubusercontent.com/google/pprof/master/proto/profile.proto +# protoc -I. --python_out=. profile.proto + +import argparse + +from collections import defaultdict + +import profile_pb2 + + +# object 0xee0243b0 (size 464): +# comm "swapper/0", pid 0, jiffies 4294937296 +# [<80220673>] alloc_inode+0x13/0x60 +# [<80221cc5>] new_inode_pseudo+0xd/0x38 +# [<802568a3>] proc_setup_thread_self+0x37/0xc4 +# [<8020e8c1>] mount_ns+0x55/0x94 +# [<8024f2e1>] proc_mount+0x45/0x48 +# [<8020ee9b>] mount_fs+0x1f/0x104 +# [<80224785>] vfs_kern_mount.part.3+0x35/0xbc +# [<80224833>] kern_mount_data+0x17/0x2c +# [<8024f44b>] pid_ns_prepare_proc+0x13/0x24 +# [<8012ed0d>] alloc_pid+0x309/0x338 +# [<80118e2b>] copy_process.part.5+0xa2b/0x1308 +# [<80119807>] _do_fork+0x77/0x2f0 +# [<80119abf>] kernel_thread+0x23/0x28 +# [<8053517f>] rest_init+0x27/0xb4 +# [<80900afb>] start_kernel+0x369/0x372 +# [<0000807b>] 0x807b +class KmemleakAll(object): + def __init__(self): + pass + + def analyze(self, f): + allocs = defaultdict(int) + stack = [] + size = 0 + + while True: + line = f.readline() + if not line: + break + + line = line.strip() + + if line.startswith('['): + # (null) is in the address part so later parsing steps fail. + # Don't bother fixing it up since it's clearly bogus. + if '(null)' in line: + continue + + stack.append(line) + continue + elif line.startswith('comm'): + continue + + if size: + allocs[(tuple(stack), size)] += 1 + size = 0 + + stack = [] + size = int(line.split('(size ')[1].strip('):')) + + return sorted(allocs.items(), key=lambda x: x[0][1] * x[1], reverse=True) + + +class ProfileWriter(object): + def __init__(self, allocs): + self.profile = profile_pb2.Profile() + self.strings = [''] + self.allocs = allocs + self.locations = {} + self.functions = {} + + def stridx(self, s): + try: + idx = self.strings.index(s) + except ValueError: + idx = len(self.strings) + self.strings.append(s) + + return idx + + def get_function_id(self, funcname, filename): + try: + return self.functions[(funcname, filename)].id + except KeyError: + pass + + function = self.profile.function.add() + function.id = len(self.functions) + 1 + function.name = self.stridx(funcname) + function.filename = self.stridx(filename) + + self.functions[(funcname, filename)] = function + + return function.id + + def get_location_id(self, addr): + if addr.startswith('['): + _, func = addr.split(' ', maxsplit=1) + + try: + return self.locations[addr].id + except KeyError: + pass + + location = self.profile.location.add() + location.id = len(self.locations) + 1 + + # We don't have access to the file or line information. + locline = location.line.add() + locline.function_id = self.get_function_id(func, 'dummy.c') + + self.locations[addr] = location + + return location.id + + def write(self, fn): + valuetype = self.profile.sample_type.add() + valuetype.type = self.stridx('slab') + valuetype.unit = self.stridx('bytes') + + for i, alloc in enumerate(self.allocs): + stacksize, count = alloc + stack, size = stacksize + + for instance in range(count): + sample = self.profile.sample.add() + sample.value.append(size) + + for addr in stack: + sample.location_id.append(self.get_location_id(addr)) + + self.profile.string_table.extend(self.strings) + + with open(fn, 'wb') as f: + f.write(self.profile.SerializeToString()) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--output', default='prof') + parser.add_argument('data') + args = parser.parse_args() + + with open(args.data) as f: + allocs = KmemleakAll().analyze(f) + + ProfileWriter(allocs).write(args.output) + + +if __name__ == '__main__': + main() -- 2.11.0