Hi Bill, Thanks. This numastat works fine in my tests. It is included in numactl-2.0.8-rc6.tar.gz at ftp://oss.sgi.com/www/projects/libnuma/download/ To all on linux-numa: This makes 22 patches since release 2.0.7 in April of 2011. It's way past due to release this as version 2.0.8. I will do that very soon, unless any of you have found any reason not to. Thx. -Cliff On Sun, Oct 07, 2012 at 01:36:06AM -0400, Bill Gray wrote: > > This patch rewrites Andi's numastat perl script as a C program to add > features > to show per-node statistics for processes as well as the system as a > whole. By > default, the new version is strictly compatible with the old version. The > behavior changes if any command line options or program arguments are > used, as > described in the new man page. This provides a convenient way to > monitor the > NUMA node distribution of memory for various sets of processes. For > example, to > check on KVM guests, one might run it like this: > > # ./numastat -c qemu > > Per-node process memory usage (in MBs) > PID Node 0 Node 1 Node 2 Node 3 Node 4 Node 5 Node 6 Node 7 > Total > --------------- ------ ------ ------ ------ ------ ------ ------ ------ > ----- > 7054 (qemu-kvm) 2 1 0 1391 0 0 0 0 > 1395 > 7174 (qemu-kvm) 2 1 0 1 0 0 1404 0 > 1409 > 7294 (qemu-kvm) 2 1 1389 1 0 0 0 0 > 1394 > 7406 (qemu-kvm) 2 1 0 1 0 1391 0 0 > 1395 > 7530 (qemu-kvm) 2 1401 0 1 0 0 0 0 > 1405 > 7638 (qemu-kvm) 2 1 0 1 1404 0 0 0 > 1409 > 7742 (qemu-kvm) 2 1 0 1 0 0 0 1392 > 1397 > 7858 (qemu-kvm) 1396 1 0 1 0 0 0 0 > 1398 > --------------- ------ ------ ------ ------ ------ ------ ------ ------ > ----- > Total 1413 1407 1391 1395 1407 1392 1405 1392 > 11202 > > - numastat.8: is rewritten > - numastat: is replaced by numastat.c > - Makefile: is modified to compile numastat.c and to clean numastat > > Signed-off-by: Bill Gray <bgray@xxxxxxxxxx> > > > Makefile | 6 > numastat | 91 --- > numastat.8 | 213 +++++--- > numastat.c | 1548 > +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 1695 insertions(+), 163 deletions(-) > > > diff -ruN numactl-2.0.8-rc5.orig/Makefile numactl-2.0.8-rc5.new/Makefile > --- numactl-2.0.8-rc5.orig/Makefile 2012-08-23 15:50:37.000000000 -0400 > +++ numactl-2.0.8-rc5.new/Makefile 2012-09-06 13:58:40.056795146 -0400 > @@ -32,7 +32,7 @@ > test/mbind_mig_pages test/migrate_pages \ > migratepages migspeed migspeed.o libnuma.a \ > test/move_pages test/realloc_test sysfs.o affinity.o \ > - test/node-parse rtnetlink.o test/A > + test/node-parse rtnetlink.o test/A numastat > SOURCES := bitops.c libnuma.c distance.c memhog.c numactl.c numademo.c \ > numamon.c shm.c stream_lib.c stream_main.c syscall.c util.c mt.c \ > clearcache.c test/*.c affinity.c sysfs.c rtnetlink.c > @@ -45,10 +45,12 @@ > test/tshared stream test/mynode test/pagesize test/ftok > test/prefered \ > test/randmap test/nodemap test/distance test/tbitmap > test/move_pages \ > test/mbind_mig_pages test/migrate_pages test/realloc_test libnuma.a \ > - test/node-parse > + test/node-parse numastat > > numactl: numactl.o util.o shm.o bitops.o libnuma.so > > +numastat: CFLAGS += -std=gnu99 > + > migratepages: migratepages.c util.o bitops.o libnuma.so > > migspeed: LDLIBS += -lrt > diff -ruN numactl-2.0.8-rc5.orig/numastat numactl-2.0.8-rc5.new/numastat > --- numactl-2.0.8-rc5.orig/numastat 2012-08-23 15:50:37.000000000 -0400 > +++ numactl-2.0.8-rc5.new/numastat 1969-12-31 19:00:00.000000000 -0500 > @@ -1,91 +0,0 @@ > -#!/usr/bin/perl > -# Print numa statistics for all nodes > -# Copyright (C) 2003,2004 Andi Kleen, SuSE Labs. > -# > -# numastat is free software; you can redistribute it and/or > -# modify it under the terms of the GNU General Public > -# License as published by the Free Software Foundation; version > -# 2. > -# > -# numastat is distributed in the hope that it will be useful, > -# but WITHOUT ANY WARRANTY; without even the implied warranty of > -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > -# General Public License for more details. > - > -# You should find a copy of v2 of the GNU General Public License somewhere > -# on your Linux system; if not, write to the Free Software Foundation, > -# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ > -# > -# Example: NUMASTAT_WIDTH=80 watch -n1 numastat > -# > - > -# output width > -$WIDTH=80; > -if (defined($ENV{'NUMASTAT_WIDTH'})) { > - $WIDTH=$ENV{'NUMASTAT_WIDTH'}; > -} else { > - use POSIX; > - if (POSIX::isatty(fileno(STDOUT))) { > - if (open(R, "resize |")) { > - while (<R>) { > - $WIDTH=$1 if /COLUMNS=(\d+)/; > - } > - close R; > - } > - } else { > - # don't split it up for easier parsing > - $WIDTH=10000000; > - } > -} > -$WIDTH = 32 if $WIDTH < 32; > - > -if (! -d "/sys/devices/system/node" ) { > - print STDERR "sysfs not mounted or system not NUMA aware\n"; > - exit 1; > -} > - > -%stat = (); > -$title = ""; > -$mode = 0; > -opendir(NODES, "/sys/devices/system/node") || exit 1; > -foreach $nd (readdir(NODES)) { > - next unless $nd =~ /node(\d+)/; > - # On newer kernels, readdir may enumerate the 'node(\d+) subdirs > - # in opposite order from older kernels--e.g., node{0,1,2,...} > - # as opposed to node{N,N-1,N-2,...}. Accomodate this by > - # switching to new mode so that the stats get emitted in > - # the same order. > - #print "readdir(NODES) returns $nd\n"; > - if (!$title && $nd =~ /node0/) { > - $mode = 1; > - } > - open(STAT, "/sys/devices/system/node/$nd/numastat") || > - die "cannot open $nd: $!\n"; > - if (! $mode) { > - $title = sprintf("%16s",$nd) . $title; > - } else { > - $title = $title . sprintf("%16s",$nd); > - } > - @fields = (); > - while (<STAT>) { > - ($name, $val) = split; > - if (! $mode) { > - $stat{$name} = sprintf("%16u", $val) . $stat{$name}; > - } else { > - $stat{$name} = $stat{$name} . sprintf("%16u", $val); > - } > - push(@fields, $name); > - } > - close STAT; > -} > -closedir NODES; > - > -$numfields = int(($WIDTH - 16) / 16); > -$l = 16 * $numfields; > -for ($i = 0; $i < length($title); $i += $l) { > - print "\n" if $i > 0; > - printf "%16s%s\n","",substr($title,$i,$l); > - foreach (@fields) { > - printf "%-16s%s\n",$_,substr($stat{$_},$i,$l); > - } > -} > diff -ruN numactl-2.0.8-rc5.orig/numastat.8 numactl-2.0.8-rc5.new/numastat.8 > --- numactl-2.0.8-rc5.orig/numastat.8 2012-08-23 15:50:37.000000000 -0400 > +++ numactl-2.0.8-rc5.new/numastat.8 2012-10-07 00:05:46.676484265 -0400 > @@ -1,82 +1,155 @@ > -.\" t > -.\" Copyright 2004 Andi Kleen, SuSE Labs. > -.\" > -.\" Permission is granted to make and distribute verbatim copies of this > -.\" manual provided the copyright notice and this permission notice are > -.\" preserved on all copies. > -.\" > -.\" Permission is granted to copy and distribute modified versions of this > -.\" manual under the conditions for verbatim copying, provided that the > -.\" entire resulting derived work is distributed under the terms of a > -.\" permission notice identical to this one. > -.\" > -.\" Since the Linux kernel and libraries are constantly changing, this > -.\" manual page may be incorrect or out-of-date. The author(s) assume no > -.\" responsibility for errors or omissions, or for damages resulting from > -.\" the use of the information contained herein. > -.\" > -.\" Formatted or processed versions of this manual, if unaccompanied by > -.\" the source, must acknowledge the copyright and authors of this work. > -.TH NUMACTL 8 "Nov 2004" "SuSE Labs" "Linux Administrator's Manual" > -.SH NAME > -numastat \- Print statistics about NUMA memory allocation > -.SH SYNOPSIS > -numastat > -.SH DESCRIPTION > +.TH "numastat" "8" "1.0.0" "Bill Gray" "Administration" > +.SH "numastat" > +.LP > +\fBnumastat\fP \- Show per-NUMA-node memory statistics for processes > and the operating system > +.SH "SYNTAX" > +.LP > +\fBnumastat\fP > +.br > +.LP > +\fBnumastat\fP [\fI\-V\fP] > +.br > +.LP > +\fBnumastat\fP [\fI\<PID>|<pattern>...\fP] > +.br > +.LP > +\fBnumastat\fP [\fI\-c\fP] [\fI\-m\fP] [\fI\-n\fP] [\fI\-p > <PID>|<pattern>\fP] [\fI\-s[<node>]\fP] [\fI\-v\fP] [\fI\-z\fP] > [\fI\<PID>|<pattern>...\fP] > +.br > +.SH "DESCRIPTION" > +.LP > .B numastat > -displays NUMA allocations statistics from the kernel memory allocator. > -Each process has NUMA policies that specifies on which node pages > -are allocated. See > -.I set_mempolicy(2) > -or > -.I numactl(8) > -on details of the available policies. > -The numastat counters keep track on what nodes memory is finally allocated. > - > -The counters are separated for each node. Each count event is the > allocation > -of a page of memory. > - > +with no command options or arguments at all, displays per-node NUMA hit and > +miss system statistics from the kernel memory allocator. This default > +\fBnumastat\fP behavior is strictly compatible with the previous > long-standing > +\fBnumastat\fP perl script, written by Andi Kleen. The default > \fBnumastat\fP > +statistics shows per-node numbers (in units of pages of memory) in > these categories: > +.LP > .B numa_hit > -is the number of allocations where an allocation was intended for > -that node and succeeded there. > - > +is memory successfully allocated on this node as intended. > +.LP > .B numa_miss > -shows how often an allocation was intended for this node, but ended up > -on another node due to low memory. > - > +is memory allocated on this node despite the process preferring some > different node. Each > +.I numa_miss > +has a > +.I numa_foreign > +on another node. > +.LP > .B numa_foreign > -is the number of allocations that were intended for another node, > -but ended up on this node. Each > +is memory intended for this node, but actually allocated on some > different node. Each > .I numa_foreign > -event has a > +has a > .I numa_miss > on another node. > - > +.LP > .B interleave_hit > -is the number of interleave policy allocations that were intended for a > -specific node and succeeded there. > - > +is interleaved memory successfully allocated on this node as intended. > +.LP > .B local_node > -is incremented when a process running on the node allocated > -memory on the same node. > - > +is memory allocated on this node while a process was running on it. > +.LP > .B other_node > -is incremented when a process running on another node allocated memory > on that node. > -.SH SEE ALSO > -.I numactl(8) > -.I set_mempolicy(2) > -.I numa(3) > +is memory allocated on this node while a process was running on some > other node. > +.LP > +Any supplied options or arguments with the \fBnumastat\fP command will > +significantly change both the content and the format of the display. > Specified > +options will cause display units to change to megabytes of memory, and will > +change other specific behaviors of \fBnumastat\fP as described below. > +.SH "OPTIONS" > +.LP > +.TP > +\fB\-c\fR > +Minimize table display width by dynamically shrinking column widths > based on > +data contents. With this option, amounts of memory will be rounded to the > +nearest megabyte (rather than the usual display with two decimal places). > +Column width and inter-column spacing will be somewhat unpredictable > with this > +option, but the more dense display will be very useful on systems with many > +NUMA nodes. > +.TP > +\fB\-m\fR > +Show the meminfo-like system-wide memory usage information. This option > +produces a per-node breakdown of memory usage information similar to > that found > +in /proc/meminfo. > +.TP > +\fB\-n\fR > +Show the original \fBnumastat\fP statistics info. This will show the same > +information as the default \fBnumastat\fP behavior but the units will > be megabytes of > +memory, and there will be other formatting and layout changes versus the > +original \fBnumastat\fP behavior. > +.TP > +\fB\-p\fR <\fBPID\fP> or <\fBpattern\fP> > +Show per-node memory allocation information for the specified PID or > pattern. > +If the \-p argument is only digits, it is assumed to be a numerical > PID. If > +the argument characters are not only digits, it is assumed to be a text > +fragment pattern to search for in process command lines. For example, > +\fBnumastat -p qemu\fP will attempt to find and show information for > processes > +with "qemu" in the command line. Any command line arguments remaining > after > +\fBnumastat\fP option flag processing is completed, are assumed to be > +additional <\fBPID\fP> or <\fBpattern\fP> process specifiers. In this > sense, > +the \fB\-p\fP option flag is optional: \fBnumastat qemu\fP is equivalent to > +\fBnumastat -p qemu\fP > +.TP > +\fB\-s[<node>]\fR > +Sort the table data in descending order before displaying it, so the > biggest > +memory consumers are listed first. With no specified <node>, the table > will be > +sorted by the total column. If the optional <node> argument is > supplied, the > +data will be sorted by the <node> column. Note that <node> must follow the > +\fB\-s\fP immediately with no intermediate white space (e.g., \fBnumastat > +\-s2\fP). > +.TP > +\fB\-v\fR > +Make some reports more verbose. In particular, process information for > +multiple processes will display detailed information for each process. > +Normally when per-node information for multiple processes is displayed, > only > +the total lines are shown. > +.TP > +\fB\-V\fR > +Display \fBnumastat\fP version information and exit. > +.TP > +\fB\-z\fR > +Skip display of table rows and columns of only zero valuess. This can > be used > +to greatly reduce the amount of uninteresting zero data on systems with > many > +NUMA nodes. Note that when rows or columns of zeros are still > displayed with > +this option, that probably means there is at least one value in the row or > +column that is actually non-zero, but rounded to zero for display. > .SH NOTES > -numastat output is only available on NUMA systems. > - > -numastat assumes the output terminal has a width of 80 characters > -and tries to format the output accordingly. > -.SH EXAMPLES > -.I watch -n1 numastat > +\fBnumastat\fP attempts to fold each table display so it will be > conveniently > +readable on the output terminal. Normally a terminal width of 80 > characters is > +assumed. When the \fBresize\fP command is available, \fBnumastat\fP > attempts > +to dynamically determine and fine tune the output tty width from > \fBresize\fP > +output. If \fBnumastat\fP output is not to a tty, very long output > lines can > +be produced, depending on how many NUMA nodes are present. In all cases, > +output width can be explicitly specified via the \fBNUMASTAT_WIDTH\fP > +environment variable. For example, \fBNUMASTAT_WIDTH=100 numastat\fP. > On > +systems with many NUMA nodes, \fBnumastat \-c \-z ....\fP can be very > helpful > +to selectively reduce the amount of displayed information. > +.SH "ENVIRONMENT VARIABLES" > +.LP > +.TP > +NUMASTAT_WIDTH > +.SH "FILES" > +.LP > +\fI/proc/*/numa_maps\fP > +.br > +\fI/sys/devices/system/node/node*/meminfo\fP > +.br > +\fI/sys/devices/system/node/node*/numastat\fP > +.SH "EXAMPLES" > +.I numastat \-c \-z \-m \-n > +.br > +.I numastat \-czs libvirt kvm qemu > +.br > +.I watch \-n1 numastat > .br > -.I watch -n1 --differences=accumulative numastat > -.SH FILES > -/sys/devices/system/node/node*/numastat > -.SH BUGS > -The output formatting on machines with a large number of nodes > -could be improved. > +.I watch \-n1 \-\-differences=cumulative numastat > +.SH "AUTHORS" > +.LP > +The original \fBnumastat\fP perl script was written circa 2003 by Andi > Kleen > +<andi.kleen@xxxxxxxxx>. The current \fBnumastat\fP program was written > in 2012 > +by Bill Gray <bgray@xxxxxxxxxx> to be compatible by default with the > original, > +and to add options to display per-node system memory usage and per-node > process > +memory allocation. > +.SH "SEE ALSO" > +.LP > +.BR numactl (8), > +.BR set_mempolicy( 2), > +.BR numa (3) > diff -ruN numactl-2.0.8-rc5.orig/numastat.c numactl-2.0.8-rc5.new/numastat.c > --- numactl-2.0.8-rc5.orig/numastat.c 1969-12-31 19:00:00.000000000 -0500 > +++ numactl-2.0.8-rc5.new/numastat.c 2012-09-06 22:48:18.704586776 -0400 > @@ -0,0 +1,1548 @@ > +/* > + > +numastat - NUMA monitoring tool to show per-node usage of memory > +Copyright (C) 2012 Bill Gray (bgray@xxxxxxxxxx), Red Hat Inc > + > +numastat is free software; you can redistribute it and/or modify it > under the > +terms of the GNU Lesser General Public License as published by the Free > +Software Foundation; version 2.1. > + > +numastat is distributed in the hope that it will be useful, but WITHOUT ANY > +WARRANTY; without even the implied warranty of MERCHANTABILITY or > FITNESS FOR A > +PARTICULAR PURPOSE. See the GNU Lesser General Public License for more > details. > + > +You should find a copy of v2.1 of the GNU Lesser General Public License > +somewhere on your Linux system; if not, write to the Free Software > Foundation, > +Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > + > +*/ > + > + > +/* > + > +Historical note: From approximately 2003 to 2012, numastat was a perl > script > +written by Andi Kleen to display the > /sys/devices/system/node/node<N>/numastat > +statistics. In 2012, numastat was rewritten as a C program by Red Hat to > +display per-node memory data for applications and the system in general, > +while also remaining strictly compatible by default with the original > numastat. > +A copy of the original numastat perl script is included for reference > at the > +end of this file. > + > +*/ > + > + > +// Compile with: gcc -O -std=gnu99 -Wall -o numastat numastat.c > + > + > +#define __USE_MISC > +#include <ctype.h> > +#include <dirent.h> > +#include <errno.h> > +#include <getopt.h> > +#include <stdint.h> > +#include <stdio.h> > +#include <stdlib.h> > +#include <string.h> > +#include <sys/types.h> > +#include <unistd.h> > + > + > +#define STRINGIZE(s) #s > +#define STRINGIFY(s) STRINGIZE(s) > + > +#define KILOBYTE (1024) > +#define MEGABYTE (1024 * 1024) > + > +#define BUF_SIZE 2048 > +#define SMALL_BUF_SIZE 128 > + > + > +// Don't assume nodes are sequential or contiguous. > +// Need to discover and map node numbers. > + > +int *node_ix_map = NULL; > +char **node_header; > + > + > +// Structure to organize memory info from /proc/<PID>/numa_maps for a > specific > +// process, or from /sys/devices/system/node/node?/meminfo for system-wide > +// data. Tables are defined below for each process and for system-wide > data. > + > +typedef struct meminfo { > + int index; > + char *token; > + char *label; > +} meminfo_t, *meminfo_p; > + > +#define PROCESS_HUGE_INDEX 0 > +#define PROCESS_PRIVATE_INDEX 3 > + > +meminfo_t process_meminfo[] = { > + { PROCESS_HUGE_INDEX, "huge", "Huge" }, > + { 1, "heap", "Heap" }, > + { 2, "stack", "Stack" }, > + { PROCESS_PRIVATE_INDEX, "N", "Private" } > +}; > + > +#define PROCESS_MEMINFO_ROWS (sizeof(process_meminfo) / > sizeof(process_meminfo[0])) > + > +meminfo_t numastat_meminfo[] = { > + { 0, "numa_hit", "Numa_Hit" }, > + { 1, "numa_miss", "Numa_Miss" }, > + { 2, "numa_foreign", "Numa_Foreign" }, > + { 3, "interleave_hit", "Interleave_Hit" }, > + { 4, "local_node", "Local_Node" }, > + { 5, "other_node", "Other_Node" }, > +}; > + > +#define NUMASTAT_MEMINFO_ROWS (sizeof(numastat_meminfo) / > sizeof(numastat_meminfo[0])) > + > +meminfo_t system_meminfo[] = { > + { 0, "MemTotal", "MemTotal" }, > + { 1, "MemFree", "MemFree" }, > + { 2, "MemUsed", "MemUsed" }, > + { 3, "HighTotal", "HighTotal" }, > + { 4, "HighFree", "HighFree" }, > + { 5, "LowTotal", "LowTotal" }, > + { 6, "LowFree", "LowFree" }, > + { 7, "Active", "Active" }, > + { 8, "Inactive", "Inactive" }, > + { 9, "Active(anon)", "Active(anon)" }, > + { 10, "Inactive(anon)", "Inactive(anon)" }, > + { 11, "Active(file)", "Active(file)" }, > + { 12, "Inactive(file)", "Inactive(file)" }, > + { 13, "Unevictable", "Unevictable" }, > + { 14, "Mlocked", "Mlocked" }, > + { 15, "Dirty", "Dirty" }, > + { 16, "Writeback", "Writeback" }, > + { 17, "FilePages", "FilePages" }, > + { 18, "Mapped", "Mapped" }, > + { 19, "AnonPages", "AnonPages" }, > + { 20, "Shmem", "Shmem" }, > + { 21, "KernelStack", "KernelStack" }, > + { 22, "PageTables", "PageTables" }, > + { 23, "NFS_Unstable", "NFS_Unstable" }, > + { 24, "Bounce", "Bounce" }, > + { 25, "WritebackTmp", "WritebackTmp" }, > + { 26, "Slab", "Slab" }, > + { 27, "SReclaimable", "SReclaimable" }, > + { 28, "SUnreclaim", "SUnreclaim" }, > + { 29, "AnonHugePages", "AnonHugePages" }, > + { 30, "HugePages_Total", "HugePages_Total" }, > + { 31, "HugePages_Free", "HugePages_Free" }, > + { 32, "HugePages_Surp", "HugePages_Surp" } > +}; > + > +#define SYSTEM_MEMINFO_ROWS (sizeof(system_meminfo) / > sizeof(system_meminfo[0])) > + > + > + > + > + > + > +// To allow re-ordering the meminfo memory categories in system_meminfo and > +// numastat_meminfo relative to order in /proc, etc., a simple hash > index is > +// used to look up the meminfo categories. The allocated hash table > size must > +// be bigger than necessary to reduce collisions (and because these > specific > +// hash algorithms depend on having some unused buckets. > + > +#define HASH_TABLE_SIZE 151 > +int hash_collisions = 0; > + > +struct hash_entry { > + char *name; > + int index; > +} hash_table[HASH_TABLE_SIZE]; > + > + > +void init_hash_table() { > + memset(hash_table, 0, sizeof(hash_table)); > +} > + > + > +int hash_ix(char *s) { > + unsigned int h = 17; > + while (*s) { > + // h * 33 + *s++ > + h = ((h << 5) + h) + *s++; > + } > + return (h % HASH_TABLE_SIZE); > +} > + > + > +int hash_lookup(char *s) { > + int ix = hash_ix(s); > + while (hash_table[ix].name) { // Assumes big table with blank entries > + if (!strcmp(s, hash_table[ix].name)) { > + return hash_table[ix].index; // found it > + } > + ix += 1; > + if (ix >= HASH_TABLE_SIZE) { > + ix = 0; > + } > + } > + return -1; > +} > + > + > +int hash_insert(char *s, int i) { > + int ix = hash_ix(s); > + while (hash_table[ix].name) { // assumes no duplicate entries > + hash_collisions += 1; > + ix += 1; > + if (ix >= HASH_TABLE_SIZE) { > + ix = 0; > + } > + } > + hash_table[ix].name = s; > + hash_table[ix].index = i; > + return ix; > +} > + > + > + > + > + > + > +// To decouple details of table display (e.g. column width, line > folding for > +// display screen width, et cetera) from acquiring the data and > populating the > +// tables, this semi-general table handling code is used. There are > various > +// routines to set table attributes, assign and test some cell contents, > +// initialize and actually display the table. > + > +#define CELL_TYPE_NULL 0 > +#define CELL_TYPE_LONG 1 > +#define CELL_TYPE_DOUBLE 2 > +#define CELL_TYPE_STRING 3 > +#define CELL_TYPE_CHAR8 4 > +#define CELL_TYPE_REPCHAR 5 > + > +#define CELL_FLAG_FREEABLE (1 << 0) > +#define CELL_FLAG_ROWSPAN (1 << 1) > +#define CELL_FLAG_COLSPAN (1 << 2) > + > +#define COL_JUSTIFY_LEFT (1 << 0) > +#define COL_JUSTIFY_RIGHT (1 << 1) > +#define COL_JUSTIFY_CENTER 3 > +#define COL_JUSTIFY_MASK 0x3 > +#define COL_FLAG_SEEN_DATA (1 << 2) > +#define COL_FLAG_NON_ZERO_DATA (1 << 3) > +#define COL_FLAG_ALWAYS_SHOW (1 << 4) > + > +#define ROW_FLAG_SEEN_DATA COL_FLAG_SEEN_DATA > +#define ROW_FLAG_NON_ZERO_DATA COL_FLAG_NON_ZERO_DATA > +#define ROW_FLAG_ALWAYS_SHOW COL_FLAG_ALWAYS_SHOW > + > +typedef struct cell { > + uint32_t type; > + uint32_t flags; > + union { > + char *s; > + double d; > + int64_t l; > + char c[8]; > + }; > +} cell_t, *cell_p; > + > +typedef struct vtab { > + int header_rows; > + int header_cols; > + int data_rows; > + int data_cols; > + cell_p cell; > + int *row_ix_map; > + uint8_t *row_flags; > + uint8_t *col_flags; > + uint8_t *col_width; > + uint8_t *col_decimal_places; > +} vtab_t, *vtab_p; > + > +#define ALL_TABLE_ROWS (table->header_rows + table->data_rows) > +#define ALL_TABLE_COLS (table->header_cols + table->data_cols) > +#define GET_CELL_PTR(row, col) (&table->cell[(row * ALL_TABLE_COLS) + col]) > + > +#define USUAL_GUTTER_WIDTH 1 > + > + > +void set_row_flag(vtab_p table, int row, int flag) { > + table->row_flags[row] |= (uint8_t)flag; > +} > + > +void set_col_flag(vtab_p table, int col, int flag) { > + table->col_flags[col] |= (uint8_t)flag; > +} > + > +void clear_row_flag(vtab_p table, int row, int flag) { > + table->row_flags[row] &= (uint8_t)~flag; > +} > + > +void clear_col_flag(vtab_p table, int col, int flag) { > + table->col_flags[col] &= (uint8_t)~flag; > +} > + > +int test_row_flag(vtab_p table, int row, int flag) { > + return ((table->row_flags[row] & (uint8_t)flag) != 0); > +} > + > +int test_col_flag(vtab_p table, int col, int flag) { > + return ((table->col_flags[col] & (uint8_t)flag) != 0); > +} > + > + > +void set_col_justification(vtab_p table, int col, int justify) { > + table->col_flags[col] &= (uint8_t)~COL_JUSTIFY_MASK; > + table->col_flags[col] |= (uint8_t)(justify & COL_JUSTIFY_MASK); > +} > + > + > +void set_col_width(vtab_p table, int col, uint8_t width) { > + if (width >= SMALL_BUF_SIZE) { > + width = SMALL_BUF_SIZE - 1; > + } > + table->col_width[col] = width; > +} > + > + > +void set_col_decimal_places(vtab_p table, int col, uint8_t places) { > + table->col_decimal_places[col] = places; > +} > + > + > +void set_cell_flag(vtab_p table, int row, int col, int flag) { > + cell_p c_ptr = GET_CELL_PTR(row, col); > + c_ptr->flags |= (uint32_t)flag; > +} > + > + > +void clear_cell_flag(vtab_p table, int row, int col, int flag) { > + cell_p c_ptr = GET_CELL_PTR(row, col); > + c_ptr->flags &= (uint32_t)~flag; > +} > + > + > +int test_cell_flag(vtab_p table, int row, int col, int flag) { > + cell_p c_ptr = GET_CELL_PTR(row, col); > + return ((c_ptr->flags & (uint32_t)flag) != 0); > +} > + > + > +void string_assign(vtab_p table, int row, int col, char *s) { > + cell_p c_ptr = GET_CELL_PTR(row, col); > + c_ptr->type = CELL_TYPE_STRING; > + c_ptr->s = s; > +} > + > + > +void repchar_assign(vtab_p table, int row, int col, char c) { > + cell_p c_ptr = GET_CELL_PTR(row, col); > + c_ptr->type = CELL_TYPE_REPCHAR; > + c_ptr->c[0] = c; > +} > + > + > +void double_assign(vtab_p table, int row, int col, double d) { > + cell_p c_ptr = GET_CELL_PTR(row, col); > + c_ptr->type = CELL_TYPE_DOUBLE; > + c_ptr->d = d; > +} > + > + > +void long_assign(vtab_p table, int row, int col, int64_t l) { > + cell_p c_ptr = GET_CELL_PTR(row, col); > + c_ptr->type = CELL_TYPE_LONG; > + c_ptr->l = l; > +} > + > + > +void double_addto(vtab_p table, int row, int col, double d) { > + cell_p c_ptr = GET_CELL_PTR(row, col); > + c_ptr->type = CELL_TYPE_DOUBLE; > + c_ptr->d += d; > +} > + > + > +void long_addto(vtab_p table, int row, int col, int64_t l) { > + cell_p c_ptr = GET_CELL_PTR(row, col); > + c_ptr->type = CELL_TYPE_LONG; > + c_ptr->l += l; > +} > + > + > +void clear_assign(vtab_p table, int row, int col) { > + cell_p c_ptr = GET_CELL_PTR(row, col); > + memset(c_ptr, 0, sizeof(cell_t)); > +} > + > + > +void zero_table_data(vtab_p table, int type) { > + // Sets data area of table to zeros of specified type > + for (int row = table->header_rows; (row < ALL_TABLE_ROWS); row++) { > + for (int col = table->header_cols; (col < ALL_TABLE_COLS); col++) { > + cell_p c_ptr = GET_CELL_PTR(row, col); > + memset(c_ptr, 0, sizeof(cell_t)); > + c_ptr->type = type; > + } > + } > +} > + > + > +void sort_rows_descending_by_col(vtab_p table, int start_row, int > stop_row, int col) { > + // Rearrange row_ix_map[] indices so the rows will be in > + // descending order by the value in the specified column > + for (int ix = start_row; (ix <= stop_row); ix++) { > + int biggest_ix = ix; > + cell_p biggest_ix_c_ptr = GET_CELL_PTR(table->row_ix_map[ix], col); > + for (int iy = ix + 1; (iy <= stop_row); iy++) { > + cell_p iy_c_ptr = GET_CELL_PTR(table->row_ix_map[iy], col); > + if (biggest_ix_c_ptr->d < iy_c_ptr->d) { > + biggest_ix_c_ptr = iy_c_ptr; > + biggest_ix = iy; > + } > + } > + if (biggest_ix != ix) { > + int tmp = table->row_ix_map[ix]; > + table->row_ix_map[ix] = table->row_ix_map[biggest_ix]; > + table->row_ix_map[biggest_ix] = tmp; > + } > + } > +} > + > + > +void span(vtab_p table, int first_row, int first_col, int last_row, int > last_col) { > + // FIXME: implement row / col spannnig someday? > +} > + > + > +void init_table(vtab_p table, int header_rows, int header_cols, int > data_rows, int data_cols) { > + // init table sizes > + table->header_rows = header_rows; > + table->header_cols = header_cols; > + table->data_rows = data_rows; > + table->data_cols = data_cols; > + // allocate memory for all the cells > + int alloc_size = ALL_TABLE_ROWS * ALL_TABLE_COLS * sizeof(cell_t); > + table->cell = malloc(alloc_size); > + if (table->cell == NULL) { > + perror("malloc failed line: " STRINGIFY(__LINE__)); > + exit(EXIT_FAILURE); > + } > + memset(table->cell, 0, alloc_size); > + // allocate memory for the row map vector > + alloc_size = ALL_TABLE_ROWS * sizeof(int); > + table->row_ix_map = malloc(alloc_size); > + if (table->row_ix_map == NULL) { > + perror("malloc failed line: " STRINGIFY(__LINE__)); > + exit(EXIT_FAILURE); > + } > + for (int row = 0; (row < ALL_TABLE_ROWS); row++) { > + table->row_ix_map[row] = row; > + } > + // allocate memory for the row flags vector > + alloc_size = ALL_TABLE_ROWS * sizeof(uint8_t); > + table->row_flags = malloc(alloc_size); > + if (table->row_flags == NULL) { > + perror("malloc failed line: " STRINGIFY(__LINE__)); > + exit(EXIT_FAILURE); > + } > + memset(table->row_flags, 0, alloc_size); > + // allocate memory for the column flags vector > + alloc_size = ALL_TABLE_COLS * sizeof(uint8_t); > + table->col_flags = malloc(alloc_size); > + if (table->col_flags == NULL) { > + perror("malloc failed line: " STRINGIFY(__LINE__)); > + exit(EXIT_FAILURE); > + } > + memset(table->col_flags, 0, alloc_size); > + // allocate memory for the column width vector > + alloc_size = ALL_TABLE_COLS * sizeof(uint8_t); > + table->col_width = malloc(alloc_size); > + if (table->col_width == NULL) { > + perror("malloc failed line: " STRINGIFY(__LINE__)); > + exit(EXIT_FAILURE); > + } > + memset(table->col_width, 0, alloc_size); > + // allocate memory for the column precision vector > + alloc_size = ALL_TABLE_COLS * sizeof(uint8_t); > + table->col_decimal_places = malloc(alloc_size); > + if (table->col_decimal_places == NULL) { > + perror("malloc failed line: " STRINGIFY(__LINE__)); > + exit(EXIT_FAILURE); > + } > + memset(table->col_decimal_places, 0, alloc_size); > +} > + > + > +void free_cell(vtab_p table, int row, int col) { > + cell_p c_ptr = GET_CELL_PTR(row, col); > + if ((c_ptr->type == CELL_TYPE_STRING) > + && (c_ptr->flags & CELL_FLAG_FREEABLE) > + && (c_ptr->s != NULL)) { > + free(c_ptr->s); > + } > + memset(c_ptr, 0, sizeof(cell_t)); > +} > + > + > +void free_table(vtab_p table) { > + if (table->cell != NULL) { > + for (int row = 0; (row < ALL_TABLE_ROWS); row++) { > + for (int col = 0; (col < ALL_TABLE_COLS); col++) { > + free_cell(table, row, col); > + } > + } > + free(table->cell); > + } > + if (table->row_ix_map != NULL) { > + free(table->row_ix_map); > + } > + if (table->row_flags != NULL) { > + free(table->row_flags); > + } > + if (table->col_flags != NULL) { > + free(table->col_flags); > + } > + if (table->col_width != NULL) { > + free(table->col_width); > + } > + if (table->col_decimal_places != NULL) { > + free(table->col_decimal_places); > + } > +} > + > + > +char *fmt_cell_data(cell_p c_ptr, int max_width, int decimal_places) { > + // Returns pointer to a static buffer, expecting caller to > + // immediately use or copy the contents before calling again. > + int rep_width = max_width - USUAL_GUTTER_WIDTH; > + static char buf[SMALL_BUF_SIZE]; > + switch (c_ptr->type) { > + case CELL_TYPE_NULL: > + buf[0] = '\0'; > + break; > + case CELL_TYPE_LONG: > + snprintf(buf, SMALL_BUF_SIZE, "%ld", c_ptr->l); > + break; > + case CELL_TYPE_DOUBLE: > + snprintf(buf, SMALL_BUF_SIZE, "%.*f", decimal_places, c_ptr->d); > + break; > + case CELL_TYPE_STRING: > + snprintf(buf, SMALL_BUF_SIZE, "%s", c_ptr->s); > + break; > + case CELL_TYPE_CHAR8: > + strncpy(buf, c_ptr->c, 8); > + buf[8] = '\0'; > + break; > + case CELL_TYPE_REPCHAR: > + memset(buf, c_ptr->c[0], rep_width); > + buf[rep_width] = '\0'; > + break; > + default: > + strcpy(buf, "Unknown"); > + break; > + } > + buf[max_width] = '\0'; > + return buf; > +} > + > + > +void auto_set_col_width(vtab_p table, int col, int min_width, int > max_width) { > + int width = min_width; > + for (int row = 0; (row < ALL_TABLE_ROWS); row++) { > + cell_p c_ptr = GET_CELL_PTR(row, col); > + if (c_ptr->type == CELL_TYPE_REPCHAR) { > + continue; > + } > + char *p = fmt_cell_data(c_ptr, max_width, > (int)(table->col_decimal_places[col])); > + int l = strlen(p); > + if (width < l) { > + width = l; > + } > + } > + width += USUAL_GUTTER_WIDTH; > + if (width > max_width) { > + width = max_width; > + } > + table->col_width[col] = (uint8_t)width; > +} > + > + > +void display_justified_cell(cell_p c_ptr, int row_flags, int col_flags, > int width, int decimal_places) { > + char *p = fmt_cell_data(c_ptr, width, decimal_places); > + int l = strlen(p); > + char buf[SMALL_BUF_SIZE]; > + switch (col_flags & COL_JUSTIFY_MASK) { > + case COL_JUSTIFY_LEFT: > + memcpy(buf, p, l); > + if (l < width) { > + memset(&buf[l], ' ', width - l); > + } > + break; > + case COL_JUSTIFY_RIGHT: > + if (l < width) { > + memset(buf, ' ', width - l); > + } > + memcpy(&buf[width - l], p, l); > + break; > + case COL_JUSTIFY_CENTER: > + default: > + memset(buf, ' ', width); > + memcpy(&buf[(width - l + 1) / 2], p, l); > + break; > + } > + buf[width] = '\0'; > + printf("%s", buf); > +} > + > + > +void display_table(vtab_p table, > + int screen_width, > + int show_unseen_rows, > + int show_unseen_cols, > + int show_zero_rows, > + int show_zero_cols) > +{ > + // Set row and column flags according to whether data in rows and cols > + // has been assigned, and is currently non-zero. > + int some_seen_data = 0; > + int some_non_zero_data = 0; > + for (int row = table->header_rows; (row < ALL_TABLE_ROWS); row++) { > + for (int col = table->header_cols; (col < ALL_TABLE_COLS); col++) { > + cell_p c_ptr = GET_CELL_PTR(row, col); > + // Currently, "seen data" includes not only numeric data, but also > + // any strings, etc -- anything non-NULL (other than rephcars). > + if ((c_ptr->type != CELL_TYPE_NULL) && (c_ptr->type != > CELL_TYPE_REPCHAR)) { > + some_seen_data = 1; > + set_row_flag(table, row, ROW_FLAG_SEEN_DATA); > + set_col_flag(table, col, COL_FLAG_SEEN_DATA); > + // Currently, "non-zero data" includes not only numeric data, > + // but also any strings, etc -- anything non-zero (other than > + // repchars, which are already excluded above). So, note a > + // valid non-NULL pointer to an empty string would still be > + // counted as non-zero data. > + if (c_ptr->l != (int64_t)0) { > + some_non_zero_data = 1; > + set_row_flag(table, row, ROW_FLAG_NON_ZERO_DATA); > + set_col_flag(table, col, COL_FLAG_NON_ZERO_DATA); > + } > + } > + } > + } > + if (!some_seen_data) { > + printf("Table has no data.\n"); > + return; > + } > + if (!some_non_zero_data && !show_zero_rows && !show_zero_cols) { > + printf("Table has no non-zero data.\n"); > + return; > + } > + // Start with first data column and try to display table, > + // folding lines as necessary per screen_width > + int col = -1; > + int data_col = table->header_cols; > + while (data_col < ALL_TABLE_COLS) { > + // Skip data columns until we have one to display > + if ((!test_col_flag(table, data_col, COL_FLAG_ALWAYS_SHOW)) && > + (((!show_unseen_cols) && (!test_col_flag(table, data_col, > COL_FLAG_SEEN_DATA))) || > + ((!show_zero_cols) && (!test_col_flag(table, data_col, > COL_FLAG_NON_ZERO_DATA))))) { > + data_col += 1; > + continue; > + } > + // Display blank line between table sections > + if (col > 0) { > + printf("\n"); > + } > + // For each row, display as many columns as possible > + for (int row_ix = 0; (row_ix < ALL_TABLE_ROWS); row_ix++) { > + int row = table->row_ix_map[row_ix]; > + // If past the header rows, conditionally skip rows > + if ((row >= table->header_rows) && (!test_row_flag(table, row, > ROW_FLAG_ALWAYS_SHOW))) { > + // Optionally skip row if no data seen or if all zeros > + if (((!show_unseen_rows) && (!test_row_flag(table, row, > ROW_FLAG_SEEN_DATA))) || > + ((!show_zero_rows) && (!test_row_flag(table, row, > ROW_FLAG_NON_ZERO_DATA)))) { > + continue; > + } > + } > + // Begin a new row... > + int cur_line_width = 0; > + // All lines start with the left header columns > + for (col = 0; (col < table->header_cols); col++) { > + display_justified_cell(GET_CELL_PTR(row, col), > + (int)(table->row_flags[row]), > + (int)(table->col_flags[col]), > + (int)(table->col_width[col]), > + (int)(table->col_decimal_places[col])); > + cur_line_width += (int)(table->col_width[col]); > + } > + // Reset column index to starting data column for each new row > + col = data_col; > + // Try to display as many data columns as possible in every section > + for (;;) { > + // See if we should print this column > + if (test_col_flag(table, col, COL_FLAG_ALWAYS_SHOW) || > + (((show_unseen_cols) || (test_col_flag(table, col, > COL_FLAG_SEEN_DATA))) && > + ((show_zero_cols) || (test_col_flag(table, col, > COL_FLAG_NON_ZERO_DATA))))) { > + display_justified_cell(GET_CELL_PTR(row, col), > + (int)(table->row_flags[row]), > + (int)(table->col_flags[col]), > + (int)(table->col_width[col]), > + (int)(table->col_decimal_places[col])); > + cur_line_width += (int)(table->col_width[col]); > + } > + col += 1; > + // End the line if no more columns or next column would exceed > screen width > + if ((col >= ALL_TABLE_COLS) || > + ((cur_line_width + (int)(table->col_width[col])) > screen_width)) { > + break; > + } > + } > + printf("\n"); > + } > + // Remember next starting data column for next section > + data_col = col; > + } > +} > + > + > + > + > + > + > +int verbose = 0; > +int num_pids = 0; > +int num_nodes = 0; > +int screen_width = 0; > +int show_zero_data = 1; > +int compress_display = 0; > +int sort_table = 0; > +int sort_table_node = -1; > +int compatibility_mode = 0; > +int pid_array_max_pids = 0; > +int *pid_array = NULL; > +char *prog_name = NULL; > +double page_size_in_bytes = 0; > +double huge_page_size_in_bytes = 0; > + > + > +void display_version_and_exit() { > + char *version_string = "20120821"; > + printf("%s version: %s: %s\n", prog_name, version_string, __DATE__); > + exit(EXIT_SUCCESS); > +} > + > + > +void display_usage_and_exit() { > + fprintf(stderr, "Usage: %s [-c] [-m] [-n] [-p <PID>|<pattern>] > [-s[<node>]] [-v] [-V] [-z] [ <PID>|<pattern>... ]\n", prog_name); > + fprintf(stderr, "-c to minimize column widths\n"); > + fprintf(stderr, "-m to show meminfo-like system-wide memory usage\n"); > + fprintf(stderr, "-n to show the numastat statistics info\n"); > + fprintf(stderr, "-p <PID>|<pattern> to show process info\n"); > + fprintf(stderr, "-s[<node>] to sort data by total column or <node>\n"); > + fprintf(stderr, "-v to make some reports more verbose\n"); > + fprintf(stderr, "-V to show the %s code version\n", prog_name); > + fprintf(stderr, "-z to skip rows and columns of zeros\n"); > + exit(EXIT_FAILURE); > +} > + > + > +int get_screen_width() { > + int width = 80; > + char *p = getenv("NUMASTAT_WIDTH"); > + if (p != NULL) { > + width = atoi(p); > + if ((width < 1) || (width > 10000000)) { > + width = 80; > + } > + } else if (isatty(fileno(stdout))) { > + FILE *fs = popen("resize 2>/dev/null", "r"); > + if (fs != NULL) { > + char columns[72]; > + fgets(columns, sizeof(columns), fs); > + pclose(fs); > + if (strncmp(columns, "COLUMNS=", 8) == 0) { > + width = atoi(&columns[8]); > + if ((width < 1) || (width > 10000000)) { > + width = 80; > + } > + } > + } > + } else { > + // Not a tty, so allow a really long line > + width = 10000000; > + } > + if (width < 32) { > + width = 32; > + } > + return width; > +} > + > + > +char *command_name_for_pid(int pid) { > + // Get the PID command name field from /proc/PID/status file. Return > + // pointer to a static buffer, expecting caller to immediately copy > result. > + static char buf[SMALL_BUF_SIZE]; > + char fname[64]; > + snprintf(fname, sizeof(fname), "/proc/%d/status", pid); > + FILE *fs = fopen(fname, "r"); > + if (!fs) { > + return NULL; > + } else { > + while (fgets(buf, SMALL_BUF_SIZE, fs)) { > + if (strstr(buf, "Name:") == buf) { > + char *p = &buf[5]; > + while (isspace(*p)) { > + p++; > + } > + if (p[strlen(p) - 1] == '\n') { > + p[strlen(p) - 1] = '\0'; > + } > + fclose(fs); > + return p; > + } > + } > + fclose(fs); > + } > + return NULL; > +} > + > + > +void show_info_from_system_file(char *file, meminfo_p meminfo, int > meminfo_rows, int tok_offset) { > + // Setup and init table > + vtab_t table; > + int header_rows = 2 - compatibility_mode; > + int header_cols = 1; > + // Add an extra data column for a total column > + init_table(&table, header_rows, header_cols, meminfo_rows, num_nodes + 1); > + int total_col_ix = header_cols + num_nodes; > + // Insert token mapping in hash table and assign left header column > label for each row in table > + init_hash_table(); > + for (int row = 0; (row < meminfo_rows); row++) { > + hash_insert(meminfo[row].token, meminfo[row].index); > + if (compatibility_mode) { > + string_assign(&table, (header_rows + row), 0, meminfo[row].token); > + } else { > + string_assign(&table, (header_rows + row), 0, meminfo[row].label); > + } > + } > + // printf("There are %d table hash collisions.\n", hash_collisions); > + // Set left header column width and left justify it > + set_col_width(&table, 0, 16); > + set_col_justification(&table, 0, COL_JUSTIFY_LEFT); > + // Open /sys/devices/system/node/node?/<file> for each node and store data > + // in table. If not compatibility_mode, do approximately first third of > + // this loop also for (node_ix == num_nodes) to get "Total" column header. > + for (int node_ix = 0; (node_ix < (num_nodes + (1 - > compatibility_mode))); node_ix++) { > + int col = header_cols + node_ix; > + // Assign header row label and horizontal line for this column... > + string_assign(&table, 0, col, node_header[node_ix]); > + if (!compatibility_mode) { > + repchar_assign(&table, 1, col, '-'); > + int decimal_places = 2; > + if (compress_display) { > + decimal_places = 0; > + } > + set_col_decimal_places(&table, col, decimal_places); > + } > + // Set column width and right justify data > + set_col_width(&table, col, 16); > + set_col_justification(&table, col, COL_JUSTIFY_RIGHT); > + if (node_ix == num_nodes) { > + break; > + } > + // Open /sys/.../node<N>/numstast file for this node... > + char buf[SMALL_BUF_SIZE]; > + char fname[64]; > + snprintf(fname, sizeof(fname), "/sys/devices/system/node/node%d/%s", > node_ix_map[node_ix], file); > + FILE *fs = fopen(fname, "r"); > + if (!fs) { > + sprintf(buf, "cannot open %s", fname); > + perror(buf); > + exit(EXIT_FAILURE); > + } > + // Get table values for this node... > + while (fgets(buf, SMALL_BUF_SIZE, fs)) { > + char *tok[64]; > + int tokens = 0; > + const char *delimiters = " \t\r\n:"; > + char *p = strtok(buf, delimiters); > + if (p == NULL) { > + continue; // Skip blank lines; > + } > + while (p) { > + tok[tokens++] = p; > + p = strtok(NULL, delimiters); > + } > + // example line from numastat file: "numa_miss 16463" > + // example line from meminfo file: "Node 3 Inactive: 210680 kB" > + int index = hash_lookup(tok[0 + tok_offset]); > + if (index < 0) { > + printf("Token %s not in hash table.\n", tok[0]); > + } else { > + double value = (double)atol(tok[1 + tok_offset]); > + if (!compatibility_mode) { > + double multiplier = 1.0; > + if (tokens < 5) { > + multiplier = page_size_in_bytes; > + } else if (!strncmp("HugePages", tok[2], 9)) { > + multiplier = huge_page_size_in_bytes; > + } else if (!strncmp("kB", tok[4], 2)) { > + multiplier = KILOBYTE; > + } > + value *= multiplier; > + value /= (double)MEGABYTE; > + } > + double_assign(&table, header_rows + index, col, value); > + double_addto(&table, header_rows + index, total_col_ix, value); > + } > + } > + fclose(fs); > + } > + // Crompress display column widths, if requested > + if (compress_display) { > + for (int col = 0; (col < header_cols + num_nodes + 1); col++) { > + auto_set_col_width(&table, col, 4, 16); > + } > + } > + // Optionally sort the table data > + if (sort_table) { > + int sort_col; > + if ((sort_table_node < 0) || (sort_table_node >= num_nodes)) { > + sort_col = total_col_ix; > + } else { > + sort_col = header_cols + node_ix_map[sort_table_node]; > + } > + sort_rows_descending_by_col(&table, header_rows, header_rows + > meminfo_rows - 1, sort_col); > + } > + // Actually display the table now, doing line-folding as necessary > + display_table(&table, screen_width, 0, 0, show_zero_data, show_zero_data); > + free_table(&table); > +} > + > + > +void show_numastat_info() { > + if (!compatibility_mode) { > + printf("\nPer-node numastat info (in MBs):\n"); > + } > + show_info_from_system_file("numastat", numastat_meminfo, > NUMASTAT_MEMINFO_ROWS, 0); > +} > + > + > +void show_system_info() { > + printf("\nPer-node system memory usage (in MBs):\n"); > + show_info_from_system_file("meminfo", system_meminfo, > SYSTEM_MEMINFO_ROWS, 2); > +} > + > + > +void show_process_info() { > + vtab_t table; > + int header_rows = 2; > + int header_cols = 1; > + int data_rows; > + int show_sub_categories = (verbose || (num_pids == 1)); > + if (show_sub_categories) { > + data_rows = PROCESS_MEMINFO_ROWS; > + } else { > + data_rows = num_pids; > + } > + // Add two extra rows for a horizontal rule followed by a total row > + // Add one extra data column for a total column > + init_table(&table, header_rows, header_cols, data_rows + 2, num_nodes > + 1); > + int total_col_ix = header_cols + num_nodes; > + int total_row_ix = header_rows + data_rows + 1; > + string_assign(&table, total_row_ix, 0, "Total"); > + if (show_sub_categories) { > + // Assign left header column label for each row in table > + for (int row = 0; (row < PROCESS_MEMINFO_ROWS); row++) { > + string_assign(&table, (header_rows + row), 0, > process_meminfo[row].label); > + } > + } else { > + string_assign(&table, 0, 0, "PID"); > + repchar_assign(&table, 1, 0, '-'); > + printf("\nPer-node process memory usage (in MBs)\n"); > + } > + // Set left header column width and left justify it > + set_col_width(&table, 0, 16); > + set_col_justification(&table, 0, COL_JUSTIFY_LEFT); > + // Set up "Node <N>" column headers over data columns, plus "Total" column > + for (int node_ix = 0; (node_ix <= num_nodes); node_ix++) { > + int col = header_cols + node_ix; > + // Assign header row label and horizontal line for this column... > + string_assign(&table, 0, col, node_header[node_ix]); > + repchar_assign(&table, 1, col, '-'); > + // Set column width, decimal places, and right justify data > + set_col_width(&table, col, 16); > + int decimal_places = 2; > + if (compress_display) { > + decimal_places = 0; > + } > + set_col_decimal_places(&table, col, decimal_places); > + set_col_justification(&table, col, COL_JUSTIFY_RIGHT); > + } > + // Initialize data in table to all zeros > + zero_table_data(&table, CELL_TYPE_DOUBLE); > + // If (show_sub_categories), show individual process tables for each PID, > + // Otherwise show one big table of process total lines from all the PIDs. > + for (int pid_ix = 0; (pid_ix < num_pids); pid_ix++) { > + int pid = pid_array[pid_ix]; > + if (show_sub_categories) { > + printf("\nPer-node process memory usage (in MBs) for PID %d (%s)\n", > pid, command_name_for_pid(pid)); > + if (pid_ix > 0) { > + // Re-initialize show_sub_categories table, because we re-use it > for each PID. > + zero_table_data(&table, CELL_TYPE_DOUBLE); > + } > + } else { > + // Put this row's "PID (cmd)" label in left header column for this > PID total row > + char tmp_buf[64]; > + snprintf(tmp_buf, sizeof(tmp_buf), "%d (%s)", pid, > command_name_for_pid(pid)); > + char *p = strdup(tmp_buf); > + if (p == NULL) { > + perror("malloc failed line: " STRINGIFY(__LINE__)); > + exit(EXIT_FAILURE); > + } > + string_assign(&table, header_rows + pid_ix, 0, p); > + set_cell_flag(&table, header_rows + pid_ix, 0, CELL_FLAG_FREEABLE); > + } > + // Open numa_map for this PID to get per-node data > + char fname[64]; > + snprintf(fname, sizeof(fname), "/proc/%d/numa_maps", pid); > + char buf[BUF_SIZE]; > + FILE *fs = fopen(fname, "r"); > + if (!fs) { > + sprintf(buf, "Can't read /proc/%d/numa_maps", pid); > + perror(buf); > + continue; > + } > + // Add up sub-category memory used from each node. Must go line by line > + // through the numa_map figuring out which category memory, node, and the > + // amount. > + while (fgets(buf, BUF_SIZE, fs)) { > + int category = PROCESS_PRIVATE_INDEX; // init category to the > catch-all... > + const char *delimiters = " \t\r\n"; > + char *p = strtok(buf, delimiters); > + while (p) { > + // If the memory category for this line is still the catch-all > + // (i.e. private), then see if the current token is a special > + // keyword for a specific memory sub-category. > + if (category == PROCESS_PRIVATE_INDEX) { > + for (int ix = 0; (ix < PROCESS_PRIVATE_INDEX); ix++) { > + if (!strncmp(p, process_meminfo[ix].token, > strlen(process_meminfo[ix].token))) { > + category = ix; > + break; > + } > + } > + } > + // If the current token is a per-node pages quantity, parse the > + // node number and accumulate the number of pages in the specific > + // category (and also add to the total). > + if (p[0] == 'N') { > + int node_num = (int)strtol(&p[1], &p, 10); > + if (p[0] != '=') { > + perror("node value parse error"); > + exit(EXIT_FAILURE); > + } > + double value = (double)strtol(&p[1], &p, 10); > + double multiplier = page_size_in_bytes; > + if (category == PROCESS_HUGE_INDEX) { > + multiplier = huge_page_size_in_bytes; > + } > + value *= multiplier; > + value /= (double)MEGABYTE; > + // Add value to data cell, total_col, and total_row > + int tmp_row; > + if (show_sub_categories) { > + tmp_row = header_rows + category; > + } else { > + tmp_row = header_rows + pid_ix; > + } > + int tmp_col = header_cols + node_num; > + double_addto(&table, tmp_row, tmp_col, value); > + double_addto(&table, tmp_row, total_col_ix, value); > + double_addto(&table, total_row_ix, tmp_col, value); > + double_addto(&table, total_row_ix, total_col_ix, value); > + } > + // Get next token on the line > + p = strtok(NULL, delimiters); > + } > + } > + // Currently, a non-root user can open some numa_map files successfully > + // without error, but can't actually read the contents -- despite the > + // 444 file permissions. So, use ferror() to check here to see if we > + // actually got a read error, and if so, alert the user so they know > + // not to trust the zero in the table. > + if (ferror(fs)) { > + sprintf(buf, "Can't read /proc/%d/numa_maps", pid); > + perror(buf); > + } > + fclose(fs); > + // If showing individual tables, or we just added the last total line, > + // prepare the table for display and display it... > + if ((show_sub_categories) || (pid_ix + 1 == num_pids)) { > + // Crompress display column widths, if requested > + if (compress_display) { > + for (int col = 0; (col < header_cols + num_nodes + 1); col++) { > + auto_set_col_width(&table, col, 4, 16); > + } > + } else { > + // Since not compressing the display, allow the left header > + // column to be wider. Otherwise, sometimes process command > + // name instance numbers can be truncated in an annoying way. > + auto_set_col_width(&table, 0, 16, 24); > + } > + // Put dashes above Total line... > + set_row_flag(&table, total_row_ix - 1, COL_FLAG_ALWAYS_SHOW); > + for (int col = 0; (col < header_cols + num_nodes + 1); col++) { > + repchar_assign(&table, total_row_ix - 1, col, '-'); > + } > + // Optionally sort the table data > + if (sort_table) { > + int sort_col; > + if ((sort_table_node < 0) || (sort_table_node >= num_nodes)) { > + sort_col = total_col_ix; > + } else { > + sort_col = header_cols + node_ix_map[sort_table_node]; > + } > + sort_rows_descending_by_col(&table, header_rows, header_rows + > data_rows - 1, sort_col); > + } > + // Actually show the table > + display_table(&table, screen_width, 0, 0, show_zero_data, > show_zero_data); > + } > + } // END OF FOR_EACH-PID loop > + free_table(&table); > +} // show_process_info() > + > + > +int node_and_digits(const struct dirent *dptr) { > + char *p = (char *)(dptr->d_name); > + if (*p++ != 'n') return 0; > + if (*p++ != 'o') return 0; > + if (*p++ != 'd') return 0; > + if (*p++ != 'e') return 0; > + do { > + if (!isdigit(*p++)) return 0; > + } while (*p != '\0'); > + return 1; > +} > + > + > +void init_node_ix_map_and_header(int compatibility_mode) { > + // Count directory names of the form: /sys/devices/system/node/node<N> > + struct dirent **namelist; > + num_nodes = scandir("/sys/devices/system/node", &namelist, > node_and_digits, NULL); > + if (num_nodes < 1) { > + if (compatibility_mode) { > + perror("sysfs not mounted or system not NUMA aware"); > + } else { > + perror("Couldn't open /sys/devices/system/node"); > + } > + exit(EXIT_FAILURE); > + } else { > + node_ix_map = malloc(num_nodes * sizeof(int)); > + if (node_ix_map == NULL) { > + perror("malloc failed line: " STRINGIFY(__LINE__)); > + exit(EXIT_FAILURE); > + } > + // For each "node<N>" filename present, save <N> in node_ix_map > + for (int ix = 0; (ix < num_nodes); ix++) { > + node_ix_map[ix] = atoi(&namelist[ix]->d_name[4]); > + free(namelist[ix]); > + } > + free(namelist); > + // Now, sort the node map in increasing order. Use a simplistic sort > + // since we expect a relatively short (and maybe pre-ordered) list. > + for (int ix = 0; (ix < num_nodes); ix++) { > + int smallest_ix = ix; > + for (int iy = ix + 1; (iy < num_nodes); iy++) { > + if (node_ix_map[smallest_ix] > node_ix_map[iy]) { > + smallest_ix = iy; > + } > + } > + if (smallest_ix != ix) { > + int tmp = node_ix_map[ix]; > + node_ix_map[ix] = node_ix_map[smallest_ix]; > + node_ix_map[smallest_ix] = tmp; > + } > + } > + // Construct vector of "Node <N>" and "Total" column headers. Allocate > + // one for each NUMA node, plus one on the end for the "Total" column > + node_header = malloc((num_nodes + 1) * sizeof(char *)); > + if (node_header == NULL) { > + perror("malloc failed line: " STRINGIFY(__LINE__)); > + exit(EXIT_FAILURE); > + } > + for (int node_ix = 0; (node_ix <= num_nodes); node_ix++) { > + char node_label[64]; > + if (node_ix == num_nodes) { > + strcpy(node_label, "Total"); > + } else if (compatibility_mode) { > + snprintf(node_label, sizeof(node_label), "node%d", > node_ix_map[node_ix]); > + } else { > + snprintf(node_label, sizeof(node_label), "Node %d", > node_ix_map[node_ix]); > + } > + char *s = strdup(node_label); > + if (s == NULL) { > + perror("malloc failed line: " STRINGIFY(__LINE__)); > + exit(EXIT_FAILURE); > + } > + node_header[node_ix] = s; > + } > + } > +} > + > + > +void free_node_ix_map_and_header() { > + if (node_ix_map != NULL) { > + free(node_ix_map); > + node_ix_map = NULL; > + } > + if (node_header != NULL) { > + for (int ix = 0; (ix <= num_nodes); ix++) { > + free(node_header[ix]); > + } > + free(node_header); > + node_header = NULL; > + } > +} > + > + > +double get_huge_page_size_in_bytes() { > + double huge_page_size = 0;; > + FILE *fs = fopen("/proc/meminfo", "r"); > + if (!fs) { > + perror("Can't open /proc/meminfo"); > + exit(EXIT_FAILURE); > + } > + char buf[SMALL_BUF_SIZE]; > + while (fgets(buf, SMALL_BUF_SIZE, fs)) { > + if (!strncmp("Hugepagesize", buf, 12)) { > + char *p = &buf[12]; > + while ((!isdigit(*p)) && (p < buf + SMALL_BUF_SIZE)) { > + p++; > + } > + huge_page_size = strtod(p, NULL); > + break; > + } > + } > + fclose(fs); > + return huge_page_size * KILOBYTE; > +} > + > + > +int all_digits(char *p) { > + if (p == NULL) { > + return 0; > + } > + while (*p != '\0') { > + if (!isdigit(*p++)) return 0; > + } > + return 1; > +} > + > + > +int starts_with_digit(const struct dirent *dptr) { > + return (isdigit(dptr->d_name[0])); > +} > + > + > +void add_pid_to_list(int pid) { > + if (num_pids < pid_array_max_pids) { > + pid_array[num_pids++] = pid; > + } else { > + if (pid_array_max_pids == 0) { > + pid_array_max_pids = 32; > + } > + int *tmp_int_ptr = realloc(pid_array, 2 * pid_array_max_pids * > sizeof(int)); > + if (tmp_int_ptr == NULL) { > + char buf[SMALL_BUF_SIZE]; > + sprintf(buf, "Too many PIDs, skipping %d", pid); > + perror(buf); > + } else { > + pid_array = tmp_int_ptr; > + pid_array_max_pids *= 2; > + pid_array[num_pids++] = pid; > + } > + } > +} > + > + > +int ascending(const void *p1, const void *p2) { > + return *(int *)p1 - *(int *) p2; > +} > + > +void sort_pids_and_remove_duplicates() { > + if (num_pids > 1) { > + qsort(pid_array, num_pids, sizeof(int), ascending); > + int ix1 = 0; > + for (int ix2 = 1; (ix2 < num_pids); ix2++) { > + if (pid_array[ix2] == pid_array[ix1]) { > + continue; > + } > + ix1 += 1; > + if (ix2 > ix1) { > + pid_array[ix1] = pid_array[ix2]; > + } > + } > + num_pids = ix1 + 1; > + } > +} > + > + > +void add_pids_from_pattern_search(char *pattern) { > + // Search all /proc/<PID>/cmdline files and /proc/<PID>/status:Name fields > + // for matching patterns. Show the memory details for matching PIDs. > + int num_matches_found = 0; > + struct dirent **namelist; > + int files = scandir("/proc", &namelist, starts_with_digit, NULL); > + if (files < 0) { > + perror("Couldn't open /proc"); > + } > + for (int ix = 0; (ix < files); ix++) { > + char buf[BUF_SIZE]; > + // First get Name field from status file > + int pid = atoi(namelist[ix]->d_name); > + char *p = command_name_for_pid(pid); > + if (p) { > + strcpy(buf, p); > + } else { > + buf[0] = '\0'; > + } > + // Next copy cmdline file contents onto end of buffer. Do it a > + // character at a time to convert nulls to spaces. > + char fname[64]; > + snprintf(fname, sizeof(fname), "/proc/%s/cmdline", namelist[ix]->d_name); > + FILE *fs = fopen(fname, "r"); > + if (fs) { > + p = buf; > + while (*p != '\0') { > + p++; > + } > + *p++ = ' '; > + int c; > + while (((c = fgetc(fs)) != EOF) && (p < buf + BUF_SIZE - 1)) { > + if (c == '\0') { > + c = ' '; > + } > + *p++ = c; > + } > + *p++ = '\0'; > + fclose(fs); > + } > + if (strstr(buf, pattern)) { > + if (pid != getpid()) { > + add_pid_to_list(pid); > + num_matches_found += 1; > + } > + } > + free(namelist[ix]); > + } > + free(namelist); > + if (num_matches_found == 0) { > + printf("Found no processes containing pattern: \"%s\"\n", pattern); > + } > +} > + > + > +int main(int argc, char **argv) { > + prog_name = argv[0]; > + int show_the_system_info = 0; > + int show_the_numastat_info = 0; > + static struct option long_options[] = { > + {"help", 0, 0, '?'}, > + {0, 0, 0, 0} > + }; > + int long_option_index = 0; > + int opt; > + while ((opt = getopt_long(argc, argv, "cmnp:s::vVz?", long_options, > &long_option_index)) != -1) { > + switch (opt) { > + case 0: > + printf("Unexpected long option %s", > long_options[long_option_index].name); > + if (optarg) { > + printf(" with arg %s", optarg); > + } > + printf("\n"); > + display_usage_and_exit(); > + break; > + case 'c': > + compress_display = 1; > + break; > + case 'm': > + show_the_system_info = 1; > + break; > + case 'n': > + show_the_numastat_info = 1; > + break; > + case 'p': > + if ((optarg) && (all_digits(optarg))) { > + add_pid_to_list(atoi(optarg)); > + } else { > + add_pids_from_pattern_search(optarg); > + } > + break; > + case 's': > + sort_table = 1; > + if ((optarg) && (all_digits(optarg))) { > + sort_table_node = atoi(optarg); > + } > + break; > + case 'v': > + verbose = 1; > + break; > + case 'V': > + display_version_and_exit(); > + break; > + case 'z': > + show_zero_data = 0; > + break; > + default: > + case '?': > + display_usage_and_exit(); > + break; > + } > + } > + // Figure out the display width, which is used to format the tables > + // and limit the output columns per row > + screen_width = get_screen_width(); > + // Any remaining arguments are assumed to be additional process specifiers > + while (optind < argc) { > + if (all_digits(argv[optind])) { > + add_pid_to_list(atoi(argv[optind])); > + } else { > + add_pids_from_pattern_search(argv[optind]); > + } > + optind += 1; > + } > + // If there are no program options or arguments, be extremely compatible > + // with the old numastat perl script (which is included at the end of this > + // file for reference) > + compatibility_mode = (argc == 1); > + init_node_ix_map_and_header(compatibility_mode); // enumarate the NUMA > nodes > + if (compatibility_mode) { > + show_numastat_info(); > + free_node_ix_map_and_header(); > + exit(EXIT_SUCCESS); > + } > + // Figure out page sizes > + page_size_in_bytes = (double)sysconf(_SC_PAGESIZE); > + huge_page_size_in_bytes = get_huge_page_size_in_bytes(); > + // Display the info for the process specifiers > + if (num_pids > 0) { > + sort_pids_and_remove_duplicates(); > + show_process_info(); > + } > + if (pid_array != NULL) { > + free(pid_array); > + } > + // Display the system-wide memory usage info > + if (show_the_system_info) { > + show_system_info(); > + } > + // Display the numastat statistics info > + if ((show_the_numastat_info) || ((num_pids == 0) && > (!show_the_system_info))) { > + show_numastat_info(); > + } > + free_node_ix_map_and_header(); > + exit(EXIT_SUCCESS); > +} > + > + > + > + > + > + > +#if 0 > +/* > + > + > +#!/usr/bin/perl > +# Print numa statistics for all nodes > +# Copyright (C) 2003,2004 Andi Kleen, SuSE Labs. > +# > +# numastat is free software; you can redistribute it and/or > +# modify it under the terms of the GNU General Public > +# License as published by the Free Software Foundation; version > +# 2. > +# > +# numastat is distributed in the hope that it will be useful, > +# but WITHOUT ANY WARRANTY; without even the implied warranty of > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +# General Public License for more details. > + > +# You should find a copy of v2 of the GNU General Public License somewhere > +# on your Linux system; if not, write to the Free Software Foundation, > +# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > +# > +# Example: NUMASTAT_WIDTH=80 watch -n1 numastat > +# > + > +# output width > +$WIDTH=80; > +if (defined($ENV{'NUMASTAT_WIDTH'})) { > + $WIDTH=$ENV{'NUMASTAT_WIDTH'}; > +} else { > + use POSIX; > + if (POSIX::isatty(fileno(STDOUT))) { > + if (open(R, "resize |")) { > + while (<R>) { > + $WIDTH=$1 if /COLUMNS=(\d+)/; > + } > + close R; > + } > + } else { > + # don't split it up for easier parsing > + $WIDTH=10000000; > + } > +} > +$WIDTH = 32 if $WIDTH < 32; > + > +if (! -d "/sys/devices/system/node" ) { > + print STDERR "sysfs not mounted or system not NUMA aware\n"; > + exit 1; > +} > + > +%stat = (); > +$title = ""; > +$mode = 0; > +opendir(NODES, "/sys/devices/system/node") || exit 1; > +foreach $nd (readdir(NODES)) { > + next unless $nd =~ /node(\d+)/; > + # On newer kernels, readdir may enumerate the 'node(\d+) subdirs > + # in opposite order from older kernels--e.g., node{0,1,2,...} > + # as opposed to node{N,N-1,N-2,...}. Accomodate this by > + # switching to new mode so that the stats get emitted in > + # the same order. > + #print "readdir(NODES) returns $nd\n"; > + if (!$title && $nd =~ /node0/) { > + $mode = 1; > + } > + open(STAT, "/sys/devices/system/node/$nd/numastat") || > + die "cannot open $nd: $!\n"; > + if (! $mode) { > + $title = sprintf("%16s",$nd) . $title; > + } else { > + $title = $title . sprintf("%16s",$nd); > + } > + @fields = (); > + while (<STAT>) { > + ($name, $val) = split; > + if (! $mode) { > + $stat{$name} = sprintf("%16u", $val) . $stat{$name}; > + } else { > + $stat{$name} = $stat{$name} . sprintf("%16u", $val); > + } > + push(@fields, $name); > + } > + close STAT; > +} > +closedir NODES; > + > +$numfields = int(($WIDTH - 16) / 16); > +$l = 16 * $numfields; > +for ($i = 0; $i < length($title); $i += $l) { > + print "\n" if $i > 0; > + printf "%16s%s\n","",substr($title,$i,$l); > + foreach (@fields) { > + printf "%-16s%s\n",$_,substr($stat{$_},$i,$l); > + } > +} > + > + > +*/ > +#endif -- Cliff Wickman SGI cpw@xxxxxxx (651) 683-3824 -- To unsubscribe from this list: send the line "unsubscribe linux-numa" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html