[PATCH] add a new command: cgget

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hello Dave,

I made a new command 'cgget' to help investigating parameters
of cgroup.

Followings are 2 examples.

1. Print parameters of some cgroup
crash> cgget -g cpu /
/:
cpu.rt_period_us: 1000000
cpu.rt_runtime_us: 950000
cpu.stat:
        nr_periods: 0
        nr_throttled: 0
        throttled_time: 0
cpu.cfs_period_us: 0
cpu.cfs_quota_us: 0
cpu.shares: 1024

2. Print parameters of all cgroups
crash> cgget -a /
/:
cpuset.cpu_exclusive: 1
cpuset.mem_exclusive: 1
cpuset.mem_hardwall: 0
cpuset.memory_migrate: 0
cpuset.sched_load_balance: 1
cpuset.memory_spread_page: 0
cpuset.memory_spread_slab: 0
cpuset.memory_pressure_enabled: 0
cpuset.memory_pressure: 0
cpuset.sched_relax_domain_level: -1
cpuset.mems: 0
cpuset.cpus: 0-3
...
blkio.io_merged:
        8:0 Read 10925
        8:0 Write 31704
        8:0 Sync 25413
        8:0 Async 17216
        8:0 Total 42629
        Total 42629
blkio.io_queued:
        8:0 Read 0
        8:0 Write 0
        8:0 Sync 0
        8:0 Async 0
        8:0 Total 0
        Total 0
blkio.reset_stats:

To build the module from the top-level crash-<version> directory, enter:
  $ cp <path-to>/cgget.c extensions
  $ make extensions

Please refer to the attachment for more information. And I'm
expecting you to give me some advices soon.

--
Zhang Xiaohe
Regards
--------------------------------------------------
Development Dept.I
Nanjing Fujitsu Nanda Software Tech. Co., Ltd.(FNST)
No. 6 Wenzhu Road, Nanjing, 210012, China
TEL: +86+25-86630566-8552
FAX: +86+25-83317685
MAIL: zhangxh@xxxxxxxxxxxxxx
--------------------------------------------------
/*
 * cgget.c - Display the parameters of cgroup.
 *
 * Copyright (C) 2012 FUJITSU LIMITED
 * Author: Zhang Xiaohe <zhangxh@xxxxxxxxxxxxxx>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 */

#include "defs.h"
#include <getopt.h>

/* HZ is 1000 as default after kernel 2.6 */
#ifdef HZ
#undef HZ
#define HZ 1000
#endif
/* USER_HZ is 100 only on X86 platform */
#define USER_HZ 100
#define NSEC_PER_SEC 1000000000ULL
#define NSEC_PER_USEC 1000L

#define DEV_BLOCK 1
#define DEV_CHAR  2
#define DEV_ALL   4
#define MAJMINLEN 13
#define ACCLEN 4
#define ACC_MKNOD 1
#define ACC_READ  2
#define ACC_WRITE 4

#define LRU_BASE 0
#define LRU_ACTIVE 1
#define LRU_FILE 2

#define CGROUP_HIER_MAX 100
#define CGROUP_STR_LEN 32
#define MODE_SEPARATE_PATH 0x01
#define MODE_COMBINE_PATH 0x02

#define CGGET_MEMBER_OFFSET_INIT(TABLE, MEMBER, STRUCT, X)		\
	TABLE.MEMBER = MEMBER_OFFSET(STRUCT, X)
#define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_LONG)

#define BLKIO_MERGE_POL(x, val)		(((x) << 16) | (val))
#define MINORBITS       20
#define MINORMASK       ((1U << MINORBITS) - 1)
#define MAJOR(dev)      ((unsigned int) ((dev) >> MINORBITS))
#define MINOR(dev)      ((unsigned int) ((dev) & MINORMASK))

#define CGROUP_SUBSYS_COUNT cgroup_subsys_num

#define CGROUP_NOT_SUPPORT 0
#define CGROUP_SUPPORTED   1

#define for_each_possible_cpu(cpu)				\
	for ((cpu) = -1; (cpu) = next_possible_cpu(cpu),	\
			 (cpu) < kt->cpus;)

struct cgroup_spec {
	char subsys_str[CGROUP_STR_LEN];
	char path[FILENAME_MAX];
};

struct cgroupfs_root_offset_table {
	long cgroupfs_root_top_cgroup;
	long cgroupfs_root_number_of_cgroups;
	long cgroupfs_root_root_list;
};

struct cgroup_offset_table {
	long cgroup_sibling;
	long cgroup_children;
	long cgroup_parent;
	long cgroup_dentry;
	long cgroup_subsys;
};

struct cpuset_offset_table {
	long cpuset_flags;
	long cpuset_cpus_allowed;
	long cpuset_mems_allowed;
	long cpuset_fmeter;
	long cpuset_shed_relax_domain_level;
};

struct tg_offset_table {
	long tg_shares;
	long tg_rt_bandwidth;
	long tg_cfs_bandwidth;
};

struct cpuacct_offset_table {
	long cpuacct_cpuusage;
	long cpuacct_cpustat;
};

struct hugetlb_offset_table {
	long hugetlb_hugepage;
};

struct memory_offset_table {
	long memory_res;
	long memory_memsw;
	long memory_info;
	long memory_stat;
	long memory_oom_kill_disable;
	long memory_under_oom;
	long memory_mcai;
	long memory_swappiness;
	long memory_use_hierarchy;
	long counter_usage;
	long counter_max_usage;
	long counter_limit;
	long counter_soft_limit;
	long counter_failcnt;
	long perzone_count;
};

struct devices_offset_table {
	long devices_whitelist;
};

struct freezer_offset_table {
	long freezer_state;
};

struct cls_offset_table {
	long cls_classid;
};

struct blkio_offset_table {
	long blkio_blkg_list;
	long blkio_policy_list;
	long blkio_weight;
	long blkg_blkcg_node;
	long blkg_dev;
	long blkg_plid;
	long blkg_stats;
	long blkg_stats_cpu;
	long blkp_dev;
	long blkp_plid;
	long blkp_fileid;
	long blkp_weight;
	long blkg_pd;
	long cfq_group_stats;
	long cfqg_stats_service_bytes;
	long cfqg_stats_serviced;
	long cfqg_stats_time;
	long cfqg_stats_sectors;
	long cfqg_stats_service_time;
	long cfqg_stats_wait_time;
	long cfqg_stats_merged;
	long cfqg_stats_queued;
};

struct netprio_offset_table {
	long netprio_prioidx;
};

static struct cgroupfs_root_offset_table cgroupfs_root_offset_table = {0};
static struct cgroup_offset_table cgroup_offset_table = {0};
static struct cpuset_offset_table cpuset_offset_table = {0};
static struct tg_offset_table tg_offset_table = {0};
static struct cpuacct_offset_table cpuacct_offset_table = {0};
static struct hugetlb_offset_table hugetlb_offset_table = {0};
static struct memory_offset_table memory_offset_table = {0};
static struct devices_offset_table devices_offset_table = {0};
static struct freezer_offset_table freezer_offset_table = {0};
static struct cls_offset_table cls_offset_table = {0};
static struct blkio_offset_table blkio_offset_table = {0};
static struct netprio_offset_table netprio_offset_table = {0};

static const char *cpuset_params[] = {
	"cpu_exclusive",
	"mem_exclusive",
	"mem_hardwall",
	"memory_migrate",
	"sched_load_balance",
	"memory_spread_page",
	"memory_spread_slab",
	"memory_pressure_enabled",
	"memory_pressure",
	"sched_relax_domain_level",
	"mems",
	"cpus"
};

enum {
	CS_CPU_EXCLUSIVE,
	CS_MEM_EXCLUSIVE,
	CS_MEM_HARDWALL,
	CS_MEMORY_MIGRATE,
	CS_SCHED_LOAD_BALANCE,
	CS_SPREAD_PAGE,
	CS_SPREAD_SLAB,
	CS_MEM_PRESSURE_ENABLE,
	CS_MEM_PRESSURE,
	CS_SHED_RELAX_DOMAIN_LEVEL,
	CS_MEMS,
	CS_CPUS,
};

static const char *cpu_params[] = {
	"rt_period_us",
	"rt_runtime_us",
	"stat",
	"cfs_period_us",
	"cfs_quota_us",
	"shares",
};

enum cpu_param_id {
	CPU_RT_PERIOD,
	CPU_RT_RUNTIME,
	CPU_STAT,
	CPU_CFS_PERIOD,
	CPU_CFS_QUOTA,
	CPU_SHARES,
	CPU_NR_PARAMS,
};

static const char *cpuacct_params[] = {
	"stat",
	"usage_percpu",
	"usage",
};

enum cpuacct_param_id {
	CPUACCT_STAT,
	CPUACCT_USAGE_PERCPU,
	CPUACCT_USAGE,
	CPUACCT_NR_PARAMS,
};

enum memory_param_id {
	MEM_MEMSW_FAILCNT,
	MEM_MEMSW_LIMIT,
	MEM_MEMSW_MAX_USAGE,
	MEM_MEMSW_USAGE,
	MEM_NUMA_STAT,
	MEM_OOM_CTRL,
	MEM_MCAI,
	MEM_SWAP,
	MEM_USE_HIER,
	MEM_FORCE_EMPTY,
	MEM_STAT,
	MEM_FAILCNT,
	MEM_SOFT_LIMIT,
	MEM_LIMIT,
	MEM_MAX_USAGE,
	MEM_USAGE,
	MEM_NR_PARAMS,
};

static const char *memory_params[] = {
	"memsw.failcnt",
	"memsw.limit_in_bytes",
	"memsw.max_usage_in_bytes",
	"memsw.usage_in_bytes",
	"numa_stat",
	"oom_control",
	"move_charge_at_immigrate",
	"swappiness",
	"use_hierarchy",
	"force_empty",
	"stat",
	"failcnt",
	"soft_limit_in_bytes",
	"limit_in_bytes",
	"max_usage_in_bytes",
	"usage_in_bytes",
	NULL,
};

enum lru_list {
	LRU_INACTIVE_ANON = LRU_BASE,
	LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE,
	LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE,
	LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE,
	LRU_UNEVICTABLE,
	NR_LRU_LISTS
};

enum {
	MCS_CACHE,
	MCS_RSS,
	MCS_FILE_MAPPED,
	MCS_SWAP,
	MCS_PGPGIN,
	MCS_PGPGOUT,
	MCS_PGFAULT,
	MCS_PGMAJFAULT,
	MCS_INACTIVE_ANON,
	MCS_ACTIVE_ANON,
	MCS_INACTIVE_FILE,
	MCS_ACTIVE_FILE,
	MCS_UNEVICTABLE,
	NR_MCS_STAT,
};

struct {
	char *local_name;
	char *total_name;
} memcg_stat_strings[NR_MCS_STAT] = {
	{"cache", "total_cache"},
	{"rss", "total_rss"},
	{"mapped_file", "total_mapped_file"},
	{"swap", "total_swap"},
	{"pgpgin", "total_pgpgin"},
	{"pgpgout", "total_pgpgout"},
	{"pgfault", "total_pgfault"},
	{"pgmajfault", "total_pgmajfault"},
	{"inactive_anon", "total_inactive_anon"},
	{"active_anon", "total_active_anon"},
	{"inactive_file", "total_inactive_file"},
	{"active_file", "total_active_file"},
	{"unevictable", "total_unevictable"}
};

enum freezer_state {
	CGROUP_THAWED = 0,
	CGROUP_FREEZING,
	CGROUP_FROZEN,
};

static const char *freezer_state_strs[] = {
	"THAWED",
	"FREEZING",
	"FROZEN",
};

static const char *blkio_prop_strs[] = {
	"weight",
	"weight_device",
	"io_service_bytes",
	"io_serviced",
	"time",
	"sectors",
	"io_service_time",
	"io_wait_time",
	"io_merged",
	"io_queued",
	"reset_stats",
};

static const char *blkio_thro_strs[] = {
	"throttle.read_bps_device",
	"throttle.write_bps_device",
	"throttle.read_iops_device",
	"throttle.write_iops_device",
	"throttle.io_service_bytes",
	"throttle.io_serviced",
};


enum blkio_plid {
	BLKIO_POLICY_PROP = 0,          /* Proportional Bandwidth division */
	BLKIO_POLICY_THROTL,            /* Throttling */
	BLKCG_POLICY_THROTL,            /* Id of throtl is 0 from v3.5 */
	BLKCG_POLICY_PROP,
};

//enum blkcg_plid {
//	BLKCG_POLICY_THROTL = 0,        /* Id of throtl is 0 from version 3.5 */
//	BLKCG_POLICY_PROP,
//};

/* blkio attributes owned by proportional weight policy */
enum blkcg_file_name_prop {
	BLKIO_PROP_weight = 1,
	BLKIO_PROP_weight_device,
	BLKIO_PROP_io_service_bytes,
	BLKIO_PROP_io_serviced,
	BLKIO_PROP_time,
	BLKIO_PROP_sectors,
	BLKIO_PROP_io_service_time,
	BLKIO_PROP_io_wait_time,
	BLKIO_PROP_io_merged,
	BLKIO_PROP_io_queued,
	BLKIO_PROP_avg_queue_size,
	BLKIO_PROP_group_wait_time,
	BLKIO_PROP_idle_time,
	BLKIO_PROP_empty_time,
	BLKIO_PROP_dequeue,
};

enum stat_type {
	BLKIO_STAT_SERVICE_TIME,
	BLKIO_STAT_SERVICE_BYTES,
	BLKIO_STAT_SERVICED,
	BLKIO_STAT_WAIT_TIME,
	BLKIO_STAT_MERGED,
	BLKIO_STAT_QUEUED,
	BLKIO_STAT_TIME,
	BLKIO_STAT_SECTORS,
};

/* blkio attributes owned by throttle policy */
enum blkcg_file_name_throtl {
	BLKIO_THROTL_read_bps_device,
	BLKIO_THROTL_write_bps_device,
	BLKIO_THROTL_read_iops_device,
	BLKIO_THROTL_write_iops_device,
	BLKIO_THROTL_io_service_bytes,
	BLKIO_THROTL_io_serviced,
};

/* Per cpu stats, added from kernel version 3.0 */
enum stat_type_cpu {
	BLKIO_STAT_CPU_SECTORS,
	BLKIO_STAT_CPU_SERVICE_BYTES,
	BLKIO_STAT_CPU_SERVICED,
	BLKIO_STAT_CPU_MERGED,
	BLKIO_STAT_CPU_NR
};

enum stat_sub_type {
	BLKIO_STAT_READ = 0,
	BLKIO_STAT_WRITE,
	BLKIO_STAT_SYNC,
	BLKIO_STAT_ASYNC,
	BLKIO_STAT_TOTAL
};

enum all_subsys_id {
	cpuset_subsys_id,
	debug_subsys_id,
	ns_subsys_id,
	cpu_cgroup_subsys_id,
	cpuacct_subsys_id,
	hugetlb_subsys_id,
	mem_cgroup_subsys_id,
	devices_subsys_id,
	freezer_subsys_id,
	net_cls_subsys_id,
	blkio_subsys_id,
	perf_subsys_id,
	net_prio_subsys_id,
	CGROUP_SUBSYS_MAX
};

static const char *subsys_name[] = {
	"cpuset",
	"debug",
	"ns",
	"cpu",
	"cpuacct",
	"hugetlb",
	"memory",
	"devices",
	"freezer",
	"net_cls",
	"blkio",
	"perf_event",
	"net_prio"
};

struct cgroup_subsys_table {
	int subsys_id;
	char subsys_str[CGROUP_STR_LEN];
};

static struct cgroup_subsys_table cgroup_subsys_table[CGROUP_SUBSYS_MAX];
static int cgroup_subsys_num = 0;
static int is_cgroup_supported = 0;

static struct option long_options[] = {
	{"help", no_argument, 0, 'h'},
	{"group", required_argument, 0, 'g'},
	{"all", no_argument, 0, 'a'},
	{0, 0, 0, 0}
};

int _init(void);
int _fini(void);

char *help_cgget[];
void cmd_cgget(void);

/* printing functions for every subsys */
static void print_cpuset(struct cgroup_spec *, int, ulong);
static void print_cpu(struct cgroup_spec *, int, ulong);
static void print_cpuacct(struct cgroup_spec *, int, ulong);
static void print_hugetlb(struct cgroup_spec *, int, ulong);
static void print_memory(struct cgroup_spec *, int, ulong);
static void print_devices(struct cgroup_spec *, int, ulong);
static void print_freezer(struct cgroup_spec *, int, ulong);
static void print_net_cls(struct cgroup_spec *, int, ulong);
static void print_blkio(struct cgroup_spec *, int, ulong);
static void print_net_prio(struct cgroup_spec *, int, ulong);
#if 0
/* ns and perf_event has nothing to print for now*/
static void print_ns(struct cgroup_spec *, int, ulong);
static void print_perf(struct cgroup_spec *, int, ulong);
#endif

/* offset table initialization functions */
static void cgget_offset_table_init();
static void cgroupfs_root_offset_table_init();
static void cgroup_offset_table_init();
static void cpuset_offset_table_init();
static void tg_offset_table_init();
static void cpuacct_offset_table_init();
static void hugetlb_offset_table_init();
static void memory_offset_table_init();
static void devices_offset_table_init();
static void freezer_offset_table_init();
static void cls_offset_table_init();
static void blkio_offset_table_init();
static void netprio_offset_table_init();

/* printing-assisted functions */
static int read_whitelist(struct cgroup_spec *, ulong);
static char *hugepage_fmt(char *, uint64_t);
static inline int is_root_mem_cgroup(ulong);
static uint64_t read_res_counter(ulong, long, char *);
static uint64_t mem_cgroup_read_local_zonestat(ulong, int);
static int64_t mem_cgroup_read_stat(void*, int);
static int get_mem_local_stats(ulong, int64_t *, int);
static int get_mem_total_stats(ulong, int64_t *, int);
static void get_mem_hierarchical_limit(ulong, uint64_t *, uint64_t *, int);
static int read_blkcg_stat(ulong, enum stat_type, int, int, int, char *);
static int read_policy_node(ulong, int, int, char *);
static int blkio_read_map(ulong, int, int, char *);
static uint64_t blkio_get_stat_cpu(ulong, enum stat_type, uint32_t, char *);
static uint64_t blkio_get_stat(ulong, enum stat_type, uint32_t, char *);
static void cpuacct_print_stat(ulong);
static uint64_t cpuacct_print_usage_percpu(ulong);
static void cpu_print_stat(ulong);
static void cpu_print_bandwidth(ulong, long, int, char *);
static ulong get_mz_lru_val(ulong, int, int, int);
static int64_t get_value_acc_ver(int64_t *, int, int, int);
static void get_mem_percpu_stats(ulong, void *);
static int css_member(ulong, char *, int);
static ulong idr_get_next(ulong, int *);
static ulong mem_cgroup_iter(ulong, ulong);
static uint64_t get_mem_usage(ulong, int);
static void mem_print_oom_ctrl(ulong);
static void mem_print_swap(ulong, char *);
static void mem_print_numa_stat(ulong, uint64_t *);
static uint64_t print_u64_rw(char *, uint64_t *);
static uint64_t print_thro_u64_rw(char *, ulong, int);
static void dev_name(ulong, char *);
static int test_policy(ulong, int);
static uint64_t print_cfq_group(ulong, int, long, size_t);
static uint64_t print_throtl_grp(ulong, int, long, size_t);
static int read_policy_group(ulong, int, int, char *);
static void blkio_read_each_blkg_for35(ulong, int, int, char *);
static void blkio_read_each_blkg_for37(ulong, int, int, int, char *);
static void blkio_read_each_blkg_for300(ulong, int, int, int, int, char *);
static ulong read_blkcg_stat_old(ulong, int, char *);
static void blkio_print_param_old(ulong);
static int blkio_print_param_no_group(ulong, int, int, char *);

/* general purpose functions */
static inline int next_possible_cpu(int);
static struct list_head *list_next(void *, void *, long);
static inline int test_bit(int, ulong);
static inline int bitmap_scnlistprintf(char *, unsigned, ulong *, int);
static inline uint64_t jiffies_64_to_clock_t(uint64_t);
static inline int check_endian();
static inline uint64_t ktime_to_ns(ulong);
static inline int bitstr_edit(char *, int, int, int);
static inline void set_majmin(char *, unsigned);
static inline void set_access(char *, short);
static inline char type_to_char(short);
static ulong get_subsys_parent(ulong, int);
static inline char *get_dentry_path(ulong, char *, int);
static void format_path_str(const char *, char *);
static int make_cgroup_spec(struct cgroup_spec **, char **,
			    char **, int, int);
static int make_all_cgroup_spec(struct cgroup_spec **, char **,
				char **, int, int);
static void cgroup_subsys_table_init();
static int parse_cgroup_spec(struct cgroup_spec **, char *, int, int);
static void print_cgroup_list(char *, struct cgroup_spec **, int, int);
static ulong retrieve_path(ulong, ulong, ulong *, const char *);
static ulong get_css_addr(struct cgroup_spec *, int, ulong);
static int get_subsys_id(struct cgroup_spec *);
static void print_cgroup(char *, struct cgroup_spec *, int, ulong, int);
static int64_t read_member_64(ulong, char *);
static int32_t read_member_32(ulong, char *);
static int fls(int);
static ulong get_subsys_parent(ulong, int);
static uint64_t print_u64(char *, uint64_t);
static void format_path_str(const char *, char *);

static struct command_table_entry command_table[] = {
        {"cgget", cmd_cgget, help_cgget, 0},
        {NULL}
};

char *help_cgget[] = {
	"cgget",		/* command name */
	"display parameters of cgroup.",
	"cgget [-g <controller>] [-a] <path> ...\n"
	"  or\n"
	"  cgget -g <controller>:<path> ...",
	"Displays the parameter(s) of input cgroup(s).\n"
	"If no controller is specified, the values of "
	"all possible variables are printed.\n"
	"Either command line style is OK, but these can not be mixed.\n",
	"-a, --all",
	"print the variables for all controllers which consist in the given path.\n",
	"-g <controller>",
	"defines controllers whose values should be displayed.",
	"This option can be used multiple times.\n",
	"-g <controller>:<path>",
	"defines control groups whose values should be displayed.",
	"This option can be used multiple times.\n",
	"-h, --help",
	"display this message.\n",
	"EXAMPLES",
	"display the controller 'cpu' in path '/'",
	" crash>cgget -g cpu:/",
	" /:",
	" cpu.rt_period_us: 1000000",
	" cpu.rt_runtime_us: 950000",
	" cpu.stat: nr_periods 0",
	" \tnr_throttled 0",
	" \tthrottled_time 0",
	" cpu.cfs_period_us: 0",
	" cpu.cfs_quota_us: 0",
	" cpu.shares: 1024",
	" or",
	" crash>cgget -g cpu /",
	" /:",
	" cpu.rt_period_us: 1000000",
	" cpu.rt_runtime_us: 950000",
	" cpu.stat: nr_periods 0",
	" \tnr_throttled 0",
	" \tthrottled_time 0",
	" cpu.cfs_period_us: 0",
	" cpu.cfs_quota_us: 0",
	" cpu.shares: 1024",
	NULL
};

int
_init(void)
{
	cgroup_subsys_table_init();
	cgget_offset_table_init();
	register_extension(command_table);
	return 1;
}

/*
 *  The _fini() function is called if the shared object is unloaded. 
 *  If desired, perform any cleanups here. 
 */
int 
_fini(void)
{ 
	return 1;
}

static inline int
next_possible_cpu(int cpu)
{
	ulong p, mask_addr, mask[BITS_TO_LONGS(kt->cpus)];

	if (symbol_exists("cpu_possible_mask")) {
		p = symbol_value("cpu_possible_mask");
		mask_addr = (ulong)read_member_64(p, "cpu_possible_mask");
		readmem(mask_addr, KVADDR, mask,
			BITS_TO_LONGS(kt->cpus) * sizeof(ulong),
			"cpu_possible_mask", FAULT_ON_ERROR);
	} else {
		readmem(symbol_value("cpu_possible_map"), KVADDR, mask,
			BITS_TO_LONGS(kt->cpus) * sizeof(ulong),
			"cpu_possible_mask", FAULT_ON_ERROR);
	}

	do {
		cpu++;
		if (NUM_IN_BITMAP(mask, cpu))
			return cpu;
	} while(cpu < kt->cpus);

	return kt->cpus;
}

static inline uint64_t
jiffies_64_to_clock_t(uint64_t x)
{
	/*
	 * This is only the ideal case. It's more
	 * complacated in reality.
	 */
#if HZ < USER_HZ
	x = x * USER_HZ / HZ;
#elif HZ > USER_HZ
	x = x / (HZ / USER_HZ);
#endif
	return x;
}

/*
 * The return value 0 indicates little endian,
 * and value 1 indicates big endian.
 */
static inline int
check_endian()
{
	int i = 1;
	char *p = (char *)&i;
	if (*p == 1)
		return 0;
	else
		return 1;
}

static inline uint64_t
ktime_to_ns(ulong ktime_addr)
{
	uint64_t ret;
	uint32_t sec, nsec;

	if (MEMBER_EXISTS("ktime_t", "tv64")) {
		/* if member tv64 exists in ktime_t */
		readmem(ktime_addr, KVADDR, &ret, sizeof(ulong),
			"ktime", FAULT_ON_ERROR);
	} else {
		if (check_endian()) {
			readmem(ktime_addr, KVADDR, &sec, sizeof(ulong),
				"ktime sec", FAULT_ON_ERROR);
			readmem(ktime_addr + sizeof(uint32_t), KVADDR, &nsec,
				sizeof(ulong), "ktime nsec", FAULT_ON_ERROR);
		} else {
			readmem(ktime_addr, KVADDR, &nsec, sizeof(ulong),
				"ktime nsec", FAULT_ON_ERROR);
			readmem(ktime_addr + sizeof(uint32_t), KVADDR, &sec,
				sizeof(ulong), "ktime sec", FAULT_ON_ERROR);
		}
		ret = NSEC_PER_SEC * sec + nsec;
	}

	return ret;
}

static inline int
bitstr_edit(char *buf, int rbot, int rtop, int len)
{
	if (len == 0) {
		if (rtop == rbot)
			sprintf(buf, "%s%d", buf, rtop);
		else if (rtop > rbot + 1)
			sprintf(buf, "%s%d-%d", buf, rbot, rtop);
		else
			sprintf(buf, "%s%d,%d", buf, rbot, rtop);
	} else {
		if (rtop == rbot)
			sprintf(buf, "%s,%d", buf, rtop);
		else if (rtop > rbot + 1)
			sprintf(buf, "%s,%d-%d", buf, rbot, rtop);
		else
			sprintf(buf, "%s,%d,%d", buf, rbot, rtop);
	}

	return strlen(buf);
}

static inline void
set_majmin(char *str, unsigned m)
{
	if (m == ~0)
		strcpy(str, "*");
	else
		sprintf(str, "%u", m);
}

static inline void
set_access(char *acc, short access)
{
	int idx = 0;

	memset(acc, 0, ACCLEN);
	if (access & ACC_READ)
		acc[idx++] = 'r';
	if (access & ACC_WRITE)
		acc[idx++] = 'w';
	if (access & ACC_MKNOD)
		acc[idx++] = 'm';
}

static inline char
type_to_char(short type)
{
	if (type == DEV_ALL)
		return 'a';
	if (type == DEV_CHAR)
		return 'c';
	if (type == DEV_BLOCK)
		return 'b';
	return 'X';
}

static char *
hugepage_fmt(char *buf, uint64_t size)
{
	if (size >= (1UL << 30))
		sprintf(buf, "%luGB", size >> 30);
	else if (size >= (1UL << 20))
		sprintf(buf, "%luMB", size >> 20);
	else
		sprintf(buf, "%luKB", size >> 10);
	return buf;
}

static inline int
is_root_mem_cgroup(ulong mem_addr)
{
	ulong root_mem_addr;

	if (symbol_exists("root_mem_cgroup"))
		readmem(symbol_value("root_mem_cgroup"), KVADDR, &root_mem_addr,
			sizeof(ulong), "root_mem_cgroup", FAULT_ON_ERROR);
	else
		return 1;

	if (mem_addr == root_mem_addr)
		return 1;
	return 0;
}

static inline char *
get_dentry_path(ulong dentry_addr, char *path, int len)
{
	char buf[FILENAME_MAX] = {0};
	int qstr_len;
	ulong name_addr;

	memset(path, 0, len);

	if (!readmem(dentry_addr + offset_table.dentry_d_name + offset_table.qstr_len,
		     KVADDR, &qstr_len, sizeof(unsigned int),
		     "qstr_len", FAULT_ON_ERROR))
		return NULL;
	if (!readmem(dentry_addr + offset_table.dentry_d_name + offset_table.qstr_name,
		     KVADDR, &name_addr, sizeof(char *), "name_addr", FAULT_ON_ERROR))
		return NULL;
	if (!readmem(name_addr, KVADDR, buf, qstr_len, "qstr_name", FAULT_ON_ERROR))
		return NULL;
	strncpy(path, buf, qstr_len);
	return path;
}

static int
read_whitelist(struct cgroup_spec *group_list, ulong whitelist_addr)
{
	int ret = 0;
	uint32_t major, minor;
	short type, access;
	char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];

	if (!whitelist_addr)
		return -1;
	if (!readmem(whitelist_addr + MEMBER_OFFSET("dev_whitelist_item", "major"),
		KVADDR, &major, sizeof(uint32_t), "whitelist_item_major",
		FAULT_ON_ERROR))
		return -1;
	set_majmin(maj, major);

	if (!readmem(whitelist_addr + MEMBER_OFFSET("dev_whitelist_item", "minor"),
		KVADDR, &minor, sizeof(uint32_t), "whitelist_item_minor",
		FAULT_ON_ERROR))
		return -1;
	set_majmin(min, minor);

	if (!readmem(whitelist_addr + MEMBER_OFFSET("dev_whitelist_item", "access"),
		KVADDR, &access, sizeof(short), "whitelist_item_access",
		FAULT_ON_ERROR))
		return -1;
	set_access(acc, access);

	if (!readmem(whitelist_addr + MEMBER_OFFSET("dev_whitelist_item", "type"),
		KVADDR, &type, sizeof(short), "whitelist_item_type",
		FAULT_ON_ERROR))
		return -1;
	fprintf(fp, "%s.list: %c %s:%s %s\n", group_list->subsys_str,
		type_to_char(type), maj, min, acc);

	return ret;
}

/*
 * Read the value of member. Only size_t of member matters.
 * Type should be maintained by the caller.
 */
static int64_t
read_member_64(ulong ptr, char *str)
{
	int64_t val = 0;

	readmem(ptr, KVADDR, &val, sizeof(int64_t),
		str, FAULT_ON_ERROR);
	return val;
}

/*
 * Read the value of member. Only size_t of member matters.
 * Type should be maintained by the caller.
 */
static int32_t
read_member_32(ulong ptr, char *str)
{
	int32_t val = 0;

	readmem(ptr, KVADDR, &val, sizeof(int32_t),
		str, FAULT_ON_ERROR);
	return val;
}

/*
 * Read the value of member. Only size_t of member matters.
 * Type should be maintained by the caller.
 */
static long
read_member_long(ulong ptr)
{
	long val = 0;

	readmem(ptr, KVADDR, &val, sizeof(long),
		"member value", FAULT_ON_ERROR);
	return val;
}

static uint64_t
read_res_counter(ulong counter_addr, long off, char *param)
{
	uint64_t val = 0;

	if (counter_addr == -1 || off == -1)
		return 0;

	val = (uint64_t)read_member_64(counter_addr + off, "res_counter");
	if (param)
		fprintf(fp, "%s: %lu\n", param, val);
	return val;
}

static void
cpuacct_print_stat(ulong subsys_addr)
{
	int i;
	ulong cpuacct_stat_addr, stat_ptr, tmp;
	int64_t userval = 0, systemval = 0;
	uint64_t result;
	enum cpuacct_stat_index {
		CPUACCT_STAT_USER,
		CPUACCT_STAT_SYSTEM,
		CPUACCT_STAT_NSTATS,
	};
	enum cpu_usage_stat {
		CPUTIME_USER,
		CPUTIME_NICE,
		CPUTIME_SYSTEM,
		CPUTIME_SOFTIRQ,
		CPUTIME_IRQ,
		CPUTIME_IDLE,
		CPUTIME_IOWAIT,
		CPUTIME_STEAL,
		CPUTIME_GUEST,
		CPUTIME_GUEST_NICE,
		NR_STATS,
	};
	static const char *cpuacct_stat_desc[] = {
		"user",
		"system",
	};

	/* get params of cpuacct.stat */
	cpuacct_stat_addr = subsys_addr +
			    cpuacct_offset_table.cpuacct_cpustat;
	if (STRUCT_EXISTS("kernel_cpustat")) {
		/* cpustat is a percpu variable */
		int64_t stat[NR_STATS];
		readmem(cpuacct_stat_addr, KVADDR, &tmp, sizeof(ulong),
			"cpuacct cpustat", FAULT_ON_ERROR);
		for_each_possible_cpu(i) {
			if (kt->flags & PER_CPU_OFF)
				stat_ptr = tmp + kt->__per_cpu_offset[i];
			readmem(stat_ptr, KVADDR, stat,
				STRUCT_SIZE("kernel_cpustat"),
				"kernel_cpustat", FAULT_ON_ERROR);
			userval += stat[CPUTIME_USER];
			userval += stat[CPUTIME_NICE];
			systemval += stat[CPUTIME_SYSTEM];
			systemval += stat[CPUTIME_IRQ];
			systemval += stat[CPUTIME_SOFTIRQ];
		}
	} else {
		readmem(cpuacct_stat_addr +
			MEMBER_OFFSET("percpu_counter", "count"), KVADDR,
			&userval, sizeof(int64_t), "cpuacct_cpustat count",
			FAULT_ON_ERROR);
		readmem(cpuacct_stat_addr + STRUCT_SIZE("percpu_counter") +
			MEMBER_OFFSET("percpu_counter", "count"), KVADDR,
			&systemval, sizeof(int64_t), "cpuacct_cpustat count",
			FAULT_ON_ERROR);
	}

	result = jiffies_64_to_clock_t((uint64_t)userval);
	fprintf(fp, "%s %lu\n", cpuacct_stat_desc[CPUACCT_STAT_USER],
		result);
	result = jiffies_64_to_clock_t((uint64_t)systemval);
	fprintf(fp, "\t%s %lu\n", cpuacct_stat_desc[CPUACCT_STAT_SYSTEM],
		result);
}

static uint64_t
cpuacct_print_usage_percpu(ulong subsys_addr)
{
	int i;
	uint64_t val, total = 0;
	ulong cpuusage_ptr, tmp;

	/* get params of cpuacct.usage_percpu */
	readmem(subsys_addr + cpuacct_offset_table.cpuacct_cpuusage,
		KVADDR, &tmp, sizeof(uint64_t *),
		"cpuacct_cpuusage", FAULT_ON_ERROR);

	for_each_possible_cpu(i) {
		if (!STRUCT_EXISTS("percpu_data")) {
			if (kt->flags & PER_CPU_OFF)
				cpuusage_ptr = tmp + kt->__per_cpu_offset[i];
		} else {
			cpuusage_ptr = ~tmp;
			readmem(cpuusage_ptr + i * sizeof(ulong),
				KVADDR, &cpuusage_ptr, sizeof(uint64_t),
				"percpu_cpuusage", FAULT_ON_ERROR);
		}
		readmem(cpuusage_ptr, KVADDR, &val, sizeof(uint64_t),
			"percpu_cpuusage", FAULT_ON_ERROR);
		total += val;
		fprintf(fp, "%lu ", val);
	}
	fprintf(fp, "\n");

	return total;
}

static void
cpu_print_stat(ulong cfs_bandwidth_ptr)
{
	int val;
	uint64_t time;
	static const char *stat_str[] = {
		"nr_periods",
		"nr_throttled",
		"throttled_time",
	};

	val = read_member_32(cfs_bandwidth_ptr +
			     MEMBER_OFFSET("cfs_bandwidth", "nr_periods"),
			     "cfs_bandwidth nr_periods");
	fprintf(fp, "\t%s: %d\n", stat_str[0], val);
	val = read_member_32(cfs_bandwidth_ptr +
			     MEMBER_OFFSET("cfs_bandwidth", "nr_throttled"),
			     "cfs_bandwidth nr_throttled");
	fprintf(fp, "\t%s: %d\n", stat_str[1], val);
	time = (uint64_t)read_member_64(cfs_bandwidth_ptr +
			 MEMBER_OFFSET("cfs_bandwidth", "throttled_time"),
			 "cfs_bandwidth throttled_time");
	fprintf(fp, "\t%s: %lu\n", stat_str[2], time);
}

static void
cpu_print_bandwidth(ulong bandwidth_ptr, long off, int ktime, char *param)
{
	int64_t val;

	if (bandwidth_ptr == -1 || off == -1)
		return;

	if (ktime)
		val = ktime_to_ns(bandwidth_ptr + off);
	else
		val = read_member_64(bandwidth_ptr + off, "ktime");
	if (val == ~0ULL)
		val = -1;
	else
		val = val / NSEC_PER_USEC;

	fprintf(fp, "%s: %ld\n", param, val);
}

static ulong
get_mz_lru_val(ulong mem_addr, int nid, int zid, int idx)
{
	ulong mz_addr, val_addr, ret = 0;

	readmem(mem_addr + memory_offset_table.memory_info +
		nid * sizeof(void *), KVADDR, &mz_addr, sizeof(ulong),
		"lruinfo_nodeinfo", FAULT_ON_ERROR);

	val_addr = mz_addr + zid * STRUCT_SIZE("mem_cgroup_per_zone") +
		   memory_offset_table.perzone_count;

	readmem(val_addr + idx * sizeof(ulong), KVADDR, &ret,
		sizeof(ulong), "per_zone_value", FAULT_ON_ERROR);

	return ret;

}

static uint64_t
mem_cgroup_read_local_zonestat(ulong mem_addr, int idx)
{
	uint64_t tmp, total = 0;
	int nid, zid;

	for (nid = 0; nid < vt->numnodes; nid++) {
		for (zid = 0; zid < vt->nr_zones; zid++) {
			tmp = get_mz_lru_val(mem_addr, nid, zid, idx);
			total += tmp;
		}
	}

	return total;
}

/*
 * mem_cgroup_stat_cpu differs from 2.6.25--3.6
 * it's very important to identify every element here.
 * @elem_nr: how many sizeof(int64_t) in struct mem_cgroup_stat_cpu.
 */
static int64_t
get_value_acc_ver(int64_t *stat, int idx, int cpu, int elem_nr)
{
	stat = (int64_t *)((ulong)stat +
			   cpu * STRUCT_SIZE("mem_cgroup_stat_cpu"));

	switch (idx)
	{
	case MCS_CACHE:
	case MCS_RSS:
		/* the first 2 stats are same for every kernel version */
		return stat[idx];
	case MCS_FILE_MAPPED:
		if (elem_nr > 4)
			/* kernel version 2.6.31--3.6 */
			return stat[idx];
		return -1;
	case MCS_SWAP:
		switch (elem_nr)
		{
		/* kernel version 2.6.34 */
		case 7:
			return stat[5];
		/* kernel version 2.6.32--2.6.33, 2.6.35--2.6.38 */
		case 8:
			if (MEMBER_EXISTS("res_counter", "soft_limit"))
				return stat[6];
			else if (MEMBER_EXISTS("mem_cgroup_stat_cpu",
						"nocpu_base"))
				return stat[5];
			else
				return -1;
		/* kernel version 2.6.39--3.6 */
		case 11:
		case 12:
		case 13:
		case 14:
			return stat[3];
		default:
			return -1;
		}
	case MCS_PGPGIN:
	case MCS_PGPGOUT:
		switch (elem_nr)
		{
		/* kernel version 2.6.26--2.6.30 */
		case 4:
			return stat[idx - 2];
		/* kernel version 2.6.31--2.6.38 */
		case 7:
		case 8:
			return stat[idx - 1];
		/* kernel version 2.6.39, 3.0--3.3 */
		case 11:
		case 14:
			return stat[idx + 2];
		/* kernel version 3.5 */
		case 12:
			return stat[idx];
		/* kernel version 3.4 */
		case 13:
			return stat[idx + 1];
		default:
			return -1;
		}
	case MCS_PGFAULT:
	case MCS_PGMAJFAULT:
		switch (elem_nr)
		{
		/* kernel version 3.5 */
		case 12:
			return stat[idx];
		/* kernel version 3.4 */
		case 13:
			return stat[idx + 2];
		/* kernel version 3.0--3.3 */
		case 14:
			return stat[idx + 3];
		default:
			return -1;
		}
	default:
		return 0;
	}
}

static int64_t
mem_cgroup_read_stat(void *stat, int idx)
{
	int cpu, elem_nr;
	int64_t ret = 0;

	elem_nr = STRUCT_SIZE("mem_cgroup_stat_cpu") / sizeof(int64_t);
	for_each_possible_cpu(cpu)
		ret += get_value_acc_ver((int64_t *)stat, idx, cpu, elem_nr);

	return ret;

}

static void
get_mem_percpu_stats(ulong stat_addr, void *src_stat)
{
	ulong statptr, tmp;
	int cpu;

	readmem(stat_addr, KVADDR, &statptr, sizeof(ulong),
		"mem_cgroup_stat", FAULT_ON_ERROR);
	tmp = statptr;
	for_each_possible_cpu(cpu) {
		if (kt->flags & PER_CPU_OFF)
			statptr = tmp + kt->__per_cpu_offset[cpu];
		readmem(statptr, KVADDR, src_stat +
			cpu * STRUCT_SIZE("mem_cgroup_stat_cpu"),
			STRUCT_SIZE("mem_cgroup_stat_cpu"),
			"mem_cgroup_stat", FAULT_ON_ERROR);
	}
}

static int
get_mem_local_stats(ulong mem_addr, int64_t *stats, int do_swap_account)
{
	int i;
	int64_t val = 0;
	void *src_stats;
	ulong stat_addr;

	/* read memory stats */
	stat_addr = mem_addr + memory_offset_table.memory_stat;
	src_stats = calloc(kt->cpus, STRUCT_SIZE("mem_cgroup_stat_cpu"));
	if (!src_stats)
		return -1;
	if (MEMBER_EXISTS("mem_cgroup", "thresholds"))
		/* stat of mem_cgroup is a percpu variable */
		get_mem_percpu_stats(stat_addr, src_stats);
	else
		readmem(stat_addr, KVADDR, src_stats,
			kt->cpus * STRUCT_SIZE("mem_cgroup_stat_cpu"),
			"mem_cgroup_stat", FAULT_ON_ERROR);

	/* cpu stat */
	for (i = MCS_CACHE; i <= MCS_PGMAJFAULT; i++) {
		if (!do_swap_account && i == MCS_SWAP)
			continue;
		val = mem_cgroup_read_stat(src_stats, i);
		if (val < 0) {
			stats[i] = -1;
			continue;
		}
		if (i > MCS_SWAP)
			stats[i] = val;
		else
			stats[i] = val * PAGE_SIZE;
	}

	/* per zone stat */
	for (i = LRU_INACTIVE_ANON; i < NR_LRU_LISTS; i++) {
		if (MEMBER_EXISTS("mem_cgroup_per_zone", "active_list") &&
		    i > LRU_ACTIVE_ANON)
			break;
		val = mem_cgroup_read_local_zonestat(mem_addr, i);
		stats[i + MCS_INACTIVE_ANON] = val * PAGE_SIZE;
	}

	free(src_stats);
	return 0;
}

static int
css_member(ulong css_addr, char *member, int depth)
{
	ulong id_addr;
	int val = 0;

	/* get member of css_id. the member will be "id", "depth" or "stack" */
	readmem(css_addr + MEMBER_OFFSET("cgroup_subsys_state", "id"), KVADDR,
		&id_addr, sizeof(ulong), "cgroup_subsys_state css_id",
		FAULT_ON_ERROR);
	if (0 == strcmp(member, "stack"))
		/* member "stack" is depend on "depth" */
		readmem(id_addr + MEMBER_OFFSET("css_id", member) +
			depth * sizeof(short), KVADDR, &val,
			sizeof(short), "member of css_id", FAULT_ON_ERROR);
	else
		readmem(id_addr + MEMBER_OFFSET("css_id", member), KVADDR, &val,
			sizeof(short), "member of css_id", FAULT_ON_ERROR);

	return val;
}

/*
 * find first set bit in word.
 * @x: the word to search
 *
 * fls(value) returns 0 if value is 0 or the position of the last
 * set bit if value is nonzero. The last (most significant) bit is
 * at position 32.
 */
static int
fls(int x)
{
	ulong i;

	if (x == 0)
		return 0;

	for (i = 31; i >= 0; i--)
		if ((x & (1ULL << i)))
			break;

	return i + 1;
}

static ulong
idr_get_next(ulong idp, int *nextid)
{
	int id = *nextid + 1;
	int n, max, idr_bits, idr_mask, layer;
	ulong p = 0, pa[7];
	ulong *paa = &pa[0];

	if (BITS_PER_LONG == 32)
		idr_bits = 5;
	else
		idr_bits = 6;
	idr_mask = (1 << idr_bits) -1;

	/* find first ent */
	readmem(idp, KVADDR, &p, sizeof(ulong), "idr top", FAULT_ON_ERROR);
	if (!p)
		return 0;
	readmem(p + MEMBER_OFFSET("idr_layer", "layer"), KVADDR, &layer,
		sizeof(int), "idr_layer layer", FAULT_ON_ERROR);
	n = (layer + 1) * idr_bits;
	max = 1 << n;

	while (id < max) {
		while (n > 0 && p) {
			n -= idr_bits;
			*paa++ = p;
			readmem(p + MEMBER_OFFSET("idr_layer", "ary") +
				sizeof(ulong) * ((id >> n) & idr_mask),
				KVADDR, &p, sizeof(ulong), "idr_layer ary",
				FAULT_ON_ERROR);
		}

		if (p) {
			*nextid = id;
			return p;
		}

		id += 1 << n;
		while (n < fls(id)) {
			n += idr_bits;
			p = *--paa;
		}
	}
	return 0;
}

static ulong
mem_cgroup_iter(ulong root, ulong prev)
{
	ulong idp, tmp = 0;
	int depth, stack, tmpid, id = 0,
	    rootid, rootdepth;

	if (!STRUCT_EXISTS("css_id"))
		return root;
	if (prev)
		id = css_member(prev, "id", 0);

	rootid = css_member(root, "id", 0);
	rootdepth = css_member(root, "depth", 0);
	tmpid = id;
	idp = symbol_value("mem_cgroup_subsys") + MEMBER_OFFSET("cgroup_subsys", "idr");
	while (1) {
		/* scan next css_id entry from bitmap */
		tmp = idr_get_next(idp, &tmpid);
		if (!tmp)
			break;
		/* address of css is at the beginning of struct css_id*/
		readmem(tmp, KVADDR, &tmp, sizeof(ulong),
			"css_id css", FAULT_ON_ERROR);
		depth = css_member(tmp, "depth", 0);
		stack = css_member(tmp, "stack", depth);
		if (depth >= rootdepth && stack == rootid)
			break;
		tmpid++;
	}

	return tmp;
}

static int
get_mem_total_stats(ulong mem_addr, int64_t *stats, int do_swap_account)
{
	int i;
	ulong root;
	int64_t val[NR_MCS_STAT];

	if (!STRUCT_EXISTS("css_id"))
		return get_mem_local_stats(mem_addr, stats, do_swap_account);

	/*
	 * when hierarchy is enabled, walking through the tree
	 * and add every single val to get the total stats.
	 */
	root = mem_addr;
	while(mem_addr) {
		get_mem_local_stats(mem_addr, val, do_swap_account);
		for (i = MCS_CACHE; i < NR_MCS_STAT; i++) {
			if (val[i] == -1)
				stats[i] = -1;
			else
				stats[i] += val[i];
		}
		mem_addr = mem_cgroup_iter(root, mem_addr);
	}

	return 0;
}

static ulong
get_subsys_parent(ulong subsys_addr, int subsys_id)
{
	ulong cgroup_addr, parent = 0, addr = 0;

	readmem(subsys_addr, KVADDR, &cgroup_addr, sizeof(ulong),
		"subsys_css_cgroup", FAULT_ON_ERROR);
	readmem(cgroup_addr + cgroup_offset_table.cgroup_parent, KVADDR, &parent,
		sizeof(ulong), "cgroup_parent", FAULT_ON_ERROR);
	if (parent)
		readmem(parent + cgroup_offset_table.cgroup_subsys + subsys_id *
			sizeof(ulong), KVADDR, &addr, sizeof(ulong),
			"cgroup_subsys", FAULT_ON_ERROR);

	return addr;
}

static void
get_mem_hierarchical_limit(ulong memcg_addr, uint64_t *mem_limit,
			   uint64_t *memsw_limit, int subsys_id)
{
	ulong memcg_res_addr, memcg_memsw_addr, parent = 0;
	uint64_t limit = ~0ULL, limitsw = ~0ULL, tmp = 0;

	parent = memcg_addr;
	/* the result should be the smallest. */
	do {
		memcg_res_addr = parent + memory_offset_table.memory_res;
		memcg_memsw_addr = parent + memory_offset_table.memory_memsw;
		tmp = read_res_counter(memcg_res_addr,
				       memory_offset_table.counter_limit,
				       NULL);
		limit = (limit < tmp ? limit : tmp);
		tmp = read_res_counter(memcg_memsw_addr,
				       memory_offset_table.counter_limit,
				       NULL);
		limitsw = (limitsw < tmp ? limitsw : tmp);
	} while (0 != (parent = get_subsys_parent(parent, subsys_id)));

	*mem_limit = limit;
	*memsw_limit = limitsw;
}

static uint64_t
get_mem_usage(ulong memcg_addr, int swap)
{
	uint64_t val;
	int64_t stats[NR_MCS_STAT];
	ulong iter;

	if (!is_root_mem_cgroup(memcg_addr)) {
		if (!swap)
			val = read_res_counter(memcg_addr +
					memory_offset_table.memory_res,
					memory_offset_table.counter_usage,
					NULL);
		else
			val = read_res_counter(memcg_addr +
					memory_offset_table.memory_memsw,
					memory_offset_table.counter_usage,
					NULL);
		return val;
	}

	val = 0;
	/* Go through the mem_cgroup tree */
	for (iter = mem_cgroup_iter(memcg_addr, 0);
	     iter != 0;
	     iter = mem_cgroup_iter(memcg_addr, iter)) {
		/* Get all the stats */
		memset(stats, 0, sizeof(stats));
		get_mem_local_stats(iter, stats, swap);
		/*
		 * Swap? Add all the swap ones besides
		 * the sum of cache and rss
		 */
		val += stats[MCS_CACHE] + stats[MCS_RSS];
		if (swap)
			val += stats[MCS_SWAP];
		if (!STRUCT_EXISTS("css_id"))
			break;
	}

	return val;
}

static void
mem_print_oom_ctrl(ulong subsys_addr)
{
	int val = 0;
	ulong ptr;

	ptr = subsys_addr + memory_offset_table.memory_oom_kill_disable;
	val = (int)read_member_32(ptr, "oom_kill_disable");
	fprintf(fp, "\toom_kill_disable %d\n", val);
	ptr = subsys_addr + memory_offset_table.memory_under_oom;
	val = (int)read_member_32(ptr, "under_oom");
	fprintf(fp, "\tunder_oom %d\n", (val > 0 ? 1 : 0));
}

static void
mem_print_swap(ulong subsys_addr, char *buf)
{
	int val = 0;

	if (is_root_mem_cgroup(subsys_addr)) {
		if (symbol_value("vm_swappiness") == -1)
			return;
		readmem(symbol_value("vm_swappiness"), KVADDR, &val,
			sizeof(int), "vm_swappiness", FAULT_ON_ERROR);
	} else {
		if (memory_offset_table.memory_swappiness == -1)
			return;
		readmem(subsys_addr + memory_offset_table.memory_swappiness,
			KVADDR, &val, sizeof(int), "mem_cgroup swappiness",
			FAULT_ON_ERROR);
	}
	fprintf(fp, "%s: %d\n", buf, val);
}

static void
mem_print_numa_stat(ulong mem_addr, uint64_t *lstats)
{
	uint64_t anon_node, file_node, unevictable_node, val,
		 anon_nr = 0, file_nr = 0, unevictable_nr = 0;
	int nid, zid;


	for (nid = 0; nid < vt->numnodes; nid++) {
		file_node = 0, anon_node = 0, unevictable_node = 0;
		for (zid = 0; zid < vt->nr_zones; zid++) {
			/* values per node */
			file_node += get_mz_lru_val(mem_addr, nid, zid,
						    LRU_INACTIVE_FILE);
			file_node += get_mz_lru_val(mem_addr, nid, zid,
						    LRU_ACTIVE_FILE);
			anon_node += get_mz_lru_val(mem_addr, nid, zid,
						    LRU_INACTIVE_ANON);
			anon_node += get_mz_lru_val(mem_addr, nid, zid,
						    LRU_ACTIVE_ANON);
			unevictable_node += get_mz_lru_val(mem_addr, nid, zid,
							   LRU_UNEVICTABLE);
		}
		val = file_node + anon_node + unevictable_node;
		/* values all nodes */
		file_nr += file_node;
		anon_nr += anon_node;
		unevictable_nr += unevictable_node;
		fprintf(fp, "\tfile_N%d=%lu\n", nid, file_node);
		fprintf(fp, "\tanon_N%d=%lu\n", nid, anon_node);
		fprintf(fp, "\tunevictable_N%d=%lu\n", nid, unevictable_node);
		fprintf(fp, "\ttotal_N%d=%lu\n", nid, val);
	}
	fprintf(fp, "\tfile=%lu\n", file_nr);
	fprintf(fp, "\tanon=%lu\n", anon_nr);
	fprintf(fp, "\tunevictable=%lu\n", unevictable_nr);
	val = (lstats[MCS_INACTIVE_ANON] + lstats[MCS_ACTIVE_ANON] +
		lstats[MCS_INACTIVE_FILE] + lstats[MCS_ACTIVE_FILE] +
		lstats[MCS_UNEVICTABLE]) / PAGE_SIZE;
	fprintf(fp, "\ttotal=%lu\n", val);
}

static uint64_t
blkio_get_stat_cpu(ulong blkg_addr, enum stat_type type,
		   uint32_t dev, char *buf)
{
	uint64_t total, val, tval, stats_arr_cpu[BLKIO_STAT_CPU_NR][BLKIO_STAT_TOTAL];
	char tmp[FILENAME_MAX] = {0};
	enum stat_sub_type sub_type;
	ulong stats_cpu_addr, stats_cpu_ptr;
	int i;
	static const char *sub_type_str[] = {
		"Read",
		"Write",
		"Sync",
		"Async",
		"Total",
	};

	total = 0, val = 0;
	readmem(blkg_addr + blkio_offset_table.blkg_stats_cpu,
		KVADDR, &stats_cpu_addr, sizeof(ulong),
		"blkio_group stats_cpu", FAULT_ON_ERROR);

	if (type == BLKIO_STAT_SECTORS) {
		for_each_possible_cpu(i) {
			if (kt->flags & PER_CPU_OFF)
				stats_cpu_ptr = stats_cpu_addr +
						kt->__per_cpu_offset[i];
			readmem(stats_cpu_ptr, KVADDR, &tval, sizeof(uint64_t),
				"blkio_group_stats_cpu_sectors",
				FAULT_ON_ERROR);
			val += tval;
		}
		fprintf(fp, "\t%d:%d %lu\n", MAJOR(dev), MINOR(dev), val);
		return val;
	}

	for (sub_type = BLKIO_STAT_READ; sub_type <= BLKIO_STAT_TOTAL; sub_type++) {
		tmp[0] = '\0', val = 0;
		sprintf(tmp, "%d:%d", MAJOR(dev), MINOR(dev));
		for_each_possible_cpu(i) {
			if (sub_type == BLKIO_STAT_TOTAL)
				break;
			if (kt->flags & PER_CPU_OFF)
				stats_cpu_ptr = stats_cpu_addr +
						kt->__per_cpu_offset[i];
			readmem(stats_cpu_ptr + sizeof(uint64_t), KVADDR,
				stats_arr_cpu, sizeof(uint64_t) * BLKIO_STAT_CPU_NR *
				BLKIO_STAT_TOTAL, "blkio_group_stats_cpu_stats_arr",
				FAULT_ON_ERROR);
			tval = stats_arr_cpu[type][sub_type];
			val += tval;
		}
		switch (sub_type)
		{
		case BLKIO_STAT_READ:
		case BLKIO_STAT_WRITE:
			total += val;
			fprintf(fp, "\t%s %s %lu\n", tmp,
				sub_type_str[sub_type], val);
			break;
		case BLKIO_STAT_SYNC:
		case BLKIO_STAT_ASYNC:
			fprintf(fp, "\t%s %s %lu\n", tmp,
				sub_type_str[sub_type], val);
			break;
		case BLKIO_STAT_TOTAL:
			fprintf(fp, "\t%s %s %lu\n", tmp,
				sub_type_str[sub_type], total);
			break;
		}
	}

	return total;
}

static uint64_t
blkio_get_stat(ulong blkg_addr, enum stat_type type, uint32_t dev, char *buf)
{
	uint64_t time, sectors, total,
		 stat_arr[BLKIO_STAT_QUEUED + 1][BLKIO_STAT_TOTAL];
	char tmp[FILENAME_MAX] = {0};
	enum stat_sub_type sub_type;
	static const char *sub_type_str[] = {
		"Read",
		"Write",
		"Sync",
		"Async",
		"Total",
	};

	if (type == BLKIO_STAT_TIME) {
		/* no sub_type */
		readmem(blkg_addr + blkio_offset_table.blkg_stats, KVADDR, &time,
			sizeof(uint64_t), "blkio_group_stat_time",
			FAULT_ON_ERROR);
		fprintf(fp, "\t%d:%d %lu\n", MAJOR(dev), MINOR(dev), time);
		return time;
	} else if (type == BLKIO_STAT_SECTORS) {
		/* no sub_type */
		readmem(blkg_addr + blkio_offset_table.blkg_stats +
			sizeof(uint64_t), KVADDR, &sectors,
			sizeof(uint64_t), "blkio_group_stat sectors",
			FAULT_ON_ERROR);
		fprintf(fp, "\t%d:%d %lu\n", MAJOR(dev), MINOR(dev), sectors);
		return sectors;
	}

	/* get values */
	readmem(blkg_addr + blkio_offset_table.blkg_stats +
		MEMBER_OFFSET("blkio_group_stats", "stat_arr"),
		KVADDR, stat_arr, sizeof(stat_arr),
		"blkio_group_stat_stat_arr", FAULT_ON_ERROR);
	for (sub_type = BLKIO_STAT_READ; sub_type <= BLKIO_STAT_TOTAL; sub_type++) {
		tmp[0] = '\0';
		sprintf(tmp, "%d:%d", MAJOR(dev), MINOR(dev));
		switch (sub_type)
		{
		case BLKIO_STAT_READ:
		case BLKIO_STAT_WRITE:
		case BLKIO_STAT_SYNC:
		case BLKIO_STAT_ASYNC:
			fprintf(fp, "\t%s %s %lu\n", tmp, sub_type_str[sub_type],
				stat_arr[type][sub_type]);
			break;
		case BLKIO_STAT_TOTAL:
			total = stat_arr[type][BLKIO_STAT_READ] +
				stat_arr[type][BLKIO_STAT_WRITE];
			fprintf(fp, "\t%s %s %lu\n", tmp, sub_type_str[sub_type],
				total);
			break;
		}
	}

	return total;
}

/*
 * print out the string with value.
 *@dev, the major and minor of a device
 *@val, value to be print out
 */
static uint64_t
print_u64(char *dev, uint64_t val)
{
	fprintf(fp, "\t%s %lu\n", dev, val);

	return val;
}

/*
 * print out the string with value.
 *@dev, the major and minor of a device
 *@type, 4 sub types(Read, Write, Sync, Async)
 */
static uint64_t
print_u64_rw(char *dev, uint64_t *val)
{
	uint64_t total;
	int i;
	static const char *blkio_rw_strs[] = {
		"Read",
		"Write",
		"Sync",
		"Async",
		"Total",
	};

	for (i = 0; i < BLKIO_STAT_TOTAL; i++)
		fprintf(fp, "\t%s %s %lu\n", dev, blkio_rw_strs[i], val[i]);

	total = val[BLKIO_STAT_READ] + val[BLKIO_STAT_WRITE];
	fprintf(fp, "\t%s %s %lu\n", dev, blkio_rw_strs[i], total);

	return total;
}

static uint64_t
print_thro_u64_rw(char *dev, ulong st_addr, int type)
{
	uint64_t val[4] = {0}, tval[4], total = 0;
	int i, j;
	ulong stptr;

	for_each_possible_cpu(i) {
		if (kt->flags & PER_CPU_OFF)
			stptr = st_addr + kt->__per_cpu_offset[i];
		if (type == BLKIO_THROTL_io_service_bytes)
			readmem(stptr, KVADDR, tval, sizeof(uint64_t) * 4,
				"tg_stat_cpu service_bytes",
				FAULT_ON_ERROR);
		else
			readmem(stptr + STRUCT_SIZE("blkg_rwstat"),
				KVADDR, tval, sizeof(uint64_t) * 4,
				"tg_stat_cpu serviced",
				FAULT_ON_ERROR);
		for (j = 0; j < 4; j++)
			val[j] += tval[j];
	}

	total += print_u64_rw(dev, val);
	return total;
}

static void
dev_name(ulong blkg_addr, char *buf)
{
	char tmp[FILENAME_MAX] = {0};
	ulong q_addr, dev_addr, name_addr;

	readmem(blkg_addr, KVADDR, &q_addr,
		sizeof(ulong), "blkcg_gq q", FAULT_ON_ERROR);
	readmem(q_addr + MEMBER_OFFSET("request_queue", "backing_dev_info") +
		MEMBER_OFFSET("backing_dev_info", "dev"), KVADDR, &dev_addr,
		sizeof(ulong), "dev", FAULT_ON_ERROR);
	/* Use the init name until the kobject becomes available */
	readmem(dev_addr + MEMBER_OFFSET("device", "init_name"), KVADDR,
		&name_addr, sizeof(ulong), "device init_name",
		FAULT_ON_ERROR);
	if (name_addr) {
		readmem(name_addr, KVADDR, tmp, FILENAME_MAX, "init_name",
			FAULT_ON_ERROR);
		strncpy(buf, tmp, strlen(tmp));
		return;
	}
	readmem(dev_addr + MEMBER_OFFSET("device", "kobj"), KVADDR,
		&name_addr, sizeof(ulong), "kobject name ptr", FAULT_ON_ERROR);
	/*
	 * We don't know the lenght of string, so just read enough
	 * and copy to destination
	 */
	readmem(name_addr, KVADDR, tmp, FILENAME_MAX,
		"kobject name", FAULT_ON_ERROR);
	strncpy(buf, tmp, strlen(tmp));
}

static int
test_policy(ulong blkg_addr, int policy)
{
	ulong blkcg_pols, q_addr, pol_addr = 0;

	if (policy == BLKCG_POLICY_PROP)
		pol_addr = symbol_value("blkcg_policy_cfq");
	else
		pol_addr = symbol_value("blkcg_policy_throtl");

	readmem(blkg_addr, KVADDR, &q_addr,
		sizeof(ulong), "blkcg_gq q", FAULT_ON_ERROR);
	/* there're 2 bits for pols, so only one ulong is enough */
	readmem(q_addr + MEMBER_OFFSET("request_queue", "blkcg_pols"),
		KVADDR, &blkcg_pols, sizeof(ulong),
		"request_queue blkcg_pols", FAULT_ON_ERROR);

	return pol_addr && test_bit(policy - 2, blkcg_pols);
}

static uint64_t
print_cfq_group(ulong blkg_addr, int attr_id, long off, size_t data_size)
{
	uint64_t val[4], total = 0;
	ulong pd_addr;
	char buf[FILENAME_MAX] = {0};

	do {
		memset(buf, 0, FILENAME_MAX);
		dev_name(blkg_addr, buf);
		readmem(blkg_addr + blkio_offset_table.blkg_pd +
			(BLKCG_POLICY_PROP - 2) * sizeof(ulong),
			KVADDR, &pd_addr, sizeof(ulong),
			"blkcg_gq pd", FAULT_ON_ERROR);
		/* 
		 * blkg_policy_data is at the beginning of
		 * throtl_grp or cfq_group
		 */
		if (pd_addr && test_policy(blkg_addr, BLKCG_POLICY_PROP)) {
			readmem(pd_addr + off, KVADDR, val, data_size,
				"policy_data member", FAULT_ON_ERROR);
			switch (attr_id)
			{
			case BLKIO_PROP_weight_device:
				if (val[0])
					total += print_u64(buf, val[0]);
				break;
			case BLKIO_PROP_weight:
				break;
			case BLKIO_PROP_time:
			case BLKIO_PROP_sectors:
				total += print_u64(buf, val[0]);
				break;
			case BLKIO_PROP_io_service_bytes:
			case BLKIO_PROP_io_serviced:
			case BLKIO_PROP_io_service_time:
			case BLKIO_PROP_io_wait_time:
			case BLKIO_PROP_io_merged:
			case BLKIO_PROP_io_queued:
				total += print_u64_rw(buf, val);
				break;
			default:
				break;
			}
		}
		blkg_addr = (ulong)list_next(NULL, (void *)blkg_addr,
				      blkio_offset_table.blkg_blkcg_node);
	} while (0 != blkg_addr + blkio_offset_table.blkg_blkcg_node);

	return total;
}

static uint64_t
print_throtl_grp(ulong blkg_addr, int attr_id, long off, size_t data_size)
{
	uint64_t val, total = 0;
	ulong pd_addr;
	char buf[FILENAME_MAX] = {0};

	do {
		memset(buf, 0, FILENAME_MAX);
		dev_name(blkg_addr, buf);
		readmem(blkg_addr + blkio_offset_table.blkg_pd +
			(BLKCG_POLICY_THROTL - 2) * sizeof(ulong),
			KVADDR, &pd_addr, sizeof(ulong),
			"blkcg_gq pd", FAULT_ON_ERROR);
		/* 
		 * blkg_policy_data is at the beginning of
		 * throtl_grp or cfq_group
		 */
		if (pd_addr && test_policy(blkg_addr, BLKCG_POLICY_THROTL)) {
			readmem(pd_addr + off , KVADDR, &val, data_size,
				"policy_data member", FAULT_ON_ERROR);
			switch (attr_id)
			{
			case BLKIO_THROTL_io_service_bytes:
			case BLKIO_THROTL_io_serviced:
				total += print_thro_u64_rw(buf, val, attr_id);
				break;
			case BLKIO_THROTL_read_bps_device:
			case BLKIO_THROTL_write_bps_device:
				if (val != -1)
					total += print_u64(buf, val);
				break;
			case BLKIO_THROTL_read_iops_device:
			case BLKIO_THROTL_write_iops_device:
				if ((uint)val != -1)
					total += print_u64(buf, val);
				break;
			default:
				break;
			}
		}
		blkg_addr = (ulong)list_next(NULL, (void *)blkg_addr,
				      blkio_offset_table.blkg_blkcg_node);
	} while (0 != blkg_addr + blkio_offset_table.blkg_blkcg_node);

	return total;
}

static int
read_policy_group(ulong blkcg_addr, int plid, int attr_id, char *str)
{
	uint64_t val, total = 0;
	ulong blkg_addr, hlist_head;
	long off;
	size_t data_size;
	int show_total = 0;

	readmem(blkcg_addr + blkio_offset_table.blkio_blkg_list, KVADDR,
		&hlist_head, sizeof(ulong), "blkio_cgroup blkg_list",
		FAULT_ON_ERROR);
	if (hlist_head == 0 || hlist_head == ~0UL)
		return blkio_print_param_no_group(blkcg_addr, plid,
						  attr_id, str);
	blkg_addr = hlist_head - blkio_offset_table.blkg_blkcg_node;

	fprintf(fp, "%s.", str);
	switch (plid)
	{
	case BLKCG_POLICY_PROP:
		/* target struct is cfq_group. */
		switch (attr_id)
		{
		case BLKIO_PROP_weight:
			readmem(blkcg_addr + blkio_offset_table.blkio_weight,
				KVADDR, &val, sizeof(uint64_t),
				"blkcg weight", FAULT_ON_ERROR);
			fprintf(fp, "%s: %lu\n", blkio_prop_strs[attr_id - 1],
				val);
			return 0;
		case BLKIO_PROP_weight_device:
			off = MEMBER_OFFSET("cfq_group", "dev_weight");
			data_size = sizeof(uint);
			break;
		case BLKIO_PROP_io_service_bytes:
			off = blkio_offset_table.cfq_group_stats +
			      blkio_offset_table.cfqg_stats_service_bytes +
			      MEMBER_OFFSET("blkg_rwstat", "cnt");
			data_size = sizeof(uint64_t) * 4;
			show_total = 1;
			break;
		case BLKIO_PROP_io_serviced:
			off = blkio_offset_table.cfq_group_stats +
			      blkio_offset_table.cfqg_stats_serviced +
			      MEMBER_OFFSET("blkg_rwstat", "cnt");
			data_size = sizeof(uint64_t) * 4;
			show_total = 1;
			break;
		case BLKIO_PROP_time:
			off = blkio_offset_table.cfq_group_stats +
			      blkio_offset_table.cfqg_stats_time +
			      MEMBER_OFFSET("blkg_stat", "cnt");
			data_size = sizeof(uint64_t);
			break;
		case BLKIO_PROP_sectors:
			off = blkio_offset_table.cfq_group_stats +
			      blkio_offset_table.cfqg_stats_sectors +
			      MEMBER_OFFSET("blkg_stat", "cnt");
			data_size = sizeof(uint64_t);
			break;
		case BLKIO_PROP_io_service_time:
			off = blkio_offset_table.cfq_group_stats +
			      blkio_offset_table.cfqg_stats_service_time +
			      MEMBER_OFFSET("blkg_rwstat", "cnt");
			data_size = sizeof(uint64_t) * 4;
			show_total = 1;
			break;
		case BLKIO_PROP_io_wait_time:
			off = blkio_offset_table.cfq_group_stats +
			      blkio_offset_table.cfqg_stats_wait_time +
			      MEMBER_OFFSET("blkg_rwstat", "cnt");
			data_size = sizeof(uint64_t) * 4;
			show_total = 1;
			break;
		case BLKIO_PROP_io_merged:
			off = blkio_offset_table.cfq_group_stats +
			      blkio_offset_table.cfqg_stats_merged +
			      MEMBER_OFFSET("blkg_rwstat", "cnt");
			data_size = sizeof(uint64_t) * 4;
			show_total = 1;
			break;
		case BLKIO_PROP_io_queued:
			off = blkio_offset_table.cfq_group_stats +
			      blkio_offset_table.cfqg_stats_queued +
			      MEMBER_OFFSET("blkg_rwstat", "cnt");
			data_size = sizeof(uint64_t) * 4;
			show_total = 1;
			break;
		default:
			/* doesn't support debug */
			break;
		}
		fprintf(fp, "%s: \n", blkio_prop_strs[attr_id - 1]);
		total += print_cfq_group(blkg_addr, attr_id, off, data_size);
		break;
	case BLKCG_POLICY_THROTL:
		/* target struct is throtl_grp. */
		switch (attr_id)
		{
		case BLKIO_THROTL_io_service_bytes:
		case BLKIO_THROTL_io_serviced:
			off = MEMBER_OFFSET("throtl_grp", "stats_cpu");
			data_size = sizeof(ulong);
			show_total = 1;
			break;
		case BLKIO_THROTL_read_bps_device:
			off = MEMBER_OFFSET("throtl_grp", "bps");
			data_size = sizeof(uint64_t);
			break;
		case BLKIO_THROTL_write_bps_device:
			off = sizeof(uint64_t) + MEMBER_OFFSET("throtl_grp", "bps");
			data_size = sizeof(uint64_t);
			break;
		case BLKIO_THROTL_read_iops_device:
			off = MEMBER_OFFSET("throtl_grp", "iops");
			data_size = sizeof(int);
			break;
		case BLKIO_THROTL_write_iops_device:
			off = sizeof(int) + MEMBER_OFFSET("throtl_grp", "iops");
			data_size = sizeof(int);
			break;
		default:
			break;
		}
		fprintf(fp, "%s: \n", blkio_thro_strs[attr_id]);
		total += print_throtl_grp(blkg_addr, attr_id, off, data_size);
		break;
	default:
		break;
	}

	if (show_total)
	fprintf(fp, "\tTotal %lu\n", total);

	return 0;
}

/* for kernel version 2.6.35, 2.6.36 */
static void
blkio_read_each_blkg_for35(ulong blkg_addr, int type,
			   int show_total, char *buf)
{
	uint64_t total = 0;
	uint32_t dev;

	do {
		dev = (uint32_t)read_member_32(blkg_addr +
				blkio_offset_table.blkg_dev, "blkg_dev");
		if (dev)
			total += blkio_get_stat(blkg_addr, type, dev, buf);
		blkg_addr = (ulong)list_next(NULL, (void *)blkg_addr,
				   blkio_offset_table.blkg_blkcg_node);
	} while (0 != blkg_addr + blkio_offset_table.blkg_blkcg_node);

	if (show_total)
		fprintf(fp, "\tTotal %lu\n", total);
}

/* for kernel version 2.6.37 -- 2.6.39 */
static void
blkio_read_each_blkg_for37(ulong blkg_addr, int type, int plid,
			   int show_total, char *buf)
{
	int blkg_plid;
	uint64_t total = 0;
	uint32_t dev;

	do {
		dev = (uint32_t)read_member_32(blkg_addr +
				blkio_offset_table.blkg_dev, "blkg_dev");
		blkg_plid = (int)read_member_32(blkg_addr +
				 blkio_offset_table.blkg_plid, "blkg_plid");
		if (dev && plid == blkg_plid)
			total += blkio_get_stat(blkg_addr, type, dev, buf);
		blkg_addr = (ulong)list_next(NULL, (void *)blkg_addr,
				   blkio_offset_table.blkg_blkcg_node);
	} while (0 != blkg_addr + blkio_offset_table.blkg_blkcg_node);

	if (show_total)
		fprintf(fp, "\tTotal %lu\n", total);
}

/* for kernel version 3.0 -- 3.4 */
static void
blkio_read_each_blkg_for300(ulong blkg_addr, int type, int plid,
			   int show_total, int pcpu, char *buf)
{
	int blkg_plid;
	uint64_t total = 0;
	uint32_t dev;

	/* type defined here is different from kernel v3.0 -- v3.4 */
	if (type > BLKIO_STAT_SERVICED &&
	    type < BLKIO_STAT_TIME)
		type -= 2;
	else if (type == BLKIO_STAT_SERVICE_BYTES)
		type = BLKIO_STAT_CPU_SERVICE_BYTES;
	else if (type == BLKIO_STAT_SERVICED)
		type = BLKIO_STAT_CPU_SERVICED;

	/*
	 * there is a BLKIO_STAT_MERGED before BLKIO_STAT_QUEUED in 6.3GA,
	 * while BLKIO_STAT_MERGED doesn't exist in kernel v3.0 -- v3.4
	 */
	
	if ((12 == MEMBER_SIZE("blkio_group_stats", "stat_arr") /
		   sizeof(uint64_t))) {
		if (type == BLKIO_STAT_QUEUED -2)
			type -= 1;
		else if(type == BLKIO_STAT_MERGED - 2) {
			type = BLKIO_STAT_CPU_MERGED;
			pcpu = 1;
		}
	}

	do {
		dev = (uint32_t)read_member_32(blkg_addr +
				blkio_offset_table.blkg_dev, "blkg_dev");
		blkg_plid = (int)read_member_32(blkg_addr +
				 blkio_offset_table.blkg_plid, "blkg_plid");
		if (dev && plid == blkg_plid) {
			if (pcpu)
				total += blkio_get_stat_cpu(blkg_addr, type, dev, buf);
			else
				total += blkio_get_stat(blkg_addr, type, dev, buf);
		}
		blkg_addr = (ulong)list_next(NULL, (void *)blkg_addr,
				   blkio_offset_table.blkg_blkcg_node);
	} while (0 != blkg_addr + blkio_offset_table.blkg_blkcg_node);

	if (show_total)
		fprintf(fp, "\tTotal %lu\n", total);
}

static int
read_blkcg_stat(ulong blkg_addr, enum stat_type type, int plid, int show_total, int pcpu, char *buf)
{
	if (blkio_offset_table.blkg_stats_cpu == -1) {
		if (!MEMBER_EXISTS("blkio_policy_node", "val"))
			/* kernel version 2.6.35, 2.6.36 */
			blkio_read_each_blkg_for35(blkg_addr, type,
						   show_total, buf);
		else
			/* kernel version 2.6.37 -- 2.6.39 */
			blkio_read_each_blkg_for37(blkg_addr, type, plid,
						   show_total, buf);
	} else {
		/* kernel version 3.0 -- 3.4 */
		blkio_read_each_blkg_for300(blkg_addr, type, plid,
					    show_total, pcpu, buf);
	}

	return 0;
}

static int
read_policy_node(ulong blkcg_addr, int plid, int attr_id, char *buf)
{
	ulong head;
	int policy_id, fileid;
	uint weight, iops;
	uint64_t bps;
	uint32_t dev;

	head = blkcg_addr + blkio_offset_table.blkio_policy_list;
	while (1) {
		if ((head = (ulong)list_next((void *)head, NULL, 0)) ==
			(blkcg_addr + blkio_offset_table.blkio_policy_list)) {
			fprintf(fp, "%s\n", buf);
			break;
		}
		policy_id = read_member_32(head +
				blkio_offset_table.blkp_plid, "blkp_plid");
		fileid = read_member_32(head + blkio_offset_table.blkp_fileid,
					"blkp_fileid");
		if (plid != policy_id || attr_id != fileid)
			continue;
		dev = read_member_32(head + blkio_offset_table.blkp_dev,
				     "blkp_dev");
		switch (plid)
		{
		case BLKIO_POLICY_PROP:
			if (fileid == BLKIO_PROP_weight_device) {
				weight = (uint)read_member_32(head +
						blkio_offset_table.blkp_weight,
						"blkp_weight");
				fprintf(fp, "%s%u\n", buf, weight);
			}
			break;
		case BLKIO_POLICY_THROTL:
			switch (attr_id)
			{
			case BLKIO_THROTL_read_bps_device:
			case BLKIO_THROTL_write_bps_device:
				readmem(head + blkio_offset_table.blkp_weight +
					sizeof(uint),
					KVADDR, &bps, sizeof(uint64_t),
					"blkio_policy_node val.bps",
					FAULT_ON_ERROR);
				fprintf(fp, "%s%lu\n", buf, bps);
				break;
			case BLKIO_THROTL_read_iops_device:
			case BLKIO_THROTL_write_iops_device:
				readmem(head+ blkio_offset_table.blkp_weight +
					sizeof(uint) + sizeof(uint64_t),
					KVADDR, &iops, sizeof(uint),
					"blkio_policy_node val.iops",
					FAULT_ON_ERROR);
				fprintf(fp, "%s%u\n", buf, iops);
				break;
			}
			break;
		}
	}
	return 0;
}

static ulong
read_blkcg_stat_old(ulong blkg_addr, int type, char *buf)
{
	ulong val;
	uint32_t dev;

	fprintf(fp, "%s\n", buf);
	do {
		/* get dev */
		readmem(blkg_addr + blkio_offset_table.blkg_dev,
			KVADDR, &dev, sizeof(uint32_t),
			"blkio_group dev", FAULT_ON_ERROR);
		if (dev) {
			if (type == BLKIO_STAT_TIME)
				readmem(blkg_addr +
					MEMBER_OFFSET("blkio_group", "time"),
					KVADDR, &val, sizeof(ulong),
					"blkio_group time", FAULT_ON_ERROR);
			else
				readmem(blkg_addr +
					MEMBER_OFFSET("blkio_group", "sectors"),
					KVADDR, &val, sizeof(ulong),
					"blkio_group sectors", FAULT_ON_ERROR);
			fprintf(fp, "\t%u:%u %lu\n",
				MAJOR(dev), MINOR(dev), val);
		}
		blkg_addr = (ulong)list_next(NULL, (void *)blkg_addr,
				   blkio_offset_table.blkg_blkcg_node);
	} while (0 != blkg_addr + blkio_offset_table.blkg_blkcg_node);

	return val;
}

static void
blkio_print_param_old(ulong blkcg_addr)
{
	uint weight;
	ulong blkg_addr, hlist_head;
	char buf[FILENAME_MAX] = {0};

	readmem(blkcg_addr + blkio_offset_table.blkio_blkg_list, KVADDR,
		&hlist_head, sizeof(ulong), "blkio_cgroup blkg_list",
		FAULT_ON_ERROR);
	blkg_addr = hlist_head - blkio_offset_table.blkg_blkcg_node;

	/* print weight */
	readmem(blkcg_addr + blkio_offset_table.blkio_weight,
		KVADDR, &weight, sizeof(uint),
		"blkio_cgroup weight", FAULT_ON_ERROR);
	fprintf(fp, "blkio.weight: %u\n", weight);

	if (hlist_head == 0 || hlist_head == ~0UL) {
		fprintf(fp, "blkio.time: 0\n");
		fprintf(fp, "blkio.sectors: 0\n");
		return;
	}

	/* print time */
	strcpy(buf, "blkio.time:");
	read_blkcg_stat_old(blkg_addr, BLKIO_STAT_TIME, buf);
	/* print sectors*/
	strcpy(buf, "blkio.sectors:");
	read_blkcg_stat_old(blkg_addr, BLKIO_STAT_CPU_SECTORS, buf);
}

/*
 * All the params except weight are assosiated with blkio_group.
 * So just print the params with value 0 if blkio_group does not exist.
 */
static int
blkio_print_param_no_group(ulong blkcg_addr, int plid, int attr_id, char *str)
{
	uint weight;
	char buf[FILENAME_MAX] = {0};

	fprintf(fp, "%s.", str);
	switch (plid)
	{
	case BLKIO_POLICY_PROP:
	case BLKCG_POLICY_PROP:
		sprintf(buf, "%s: ", blkio_prop_strs[attr_id - 1]);
		switch (attr_id)
		{
		case BLKIO_PROP_weight:
			weight = (uint)read_member_32(blkcg_addr +
					blkio_offset_table.blkio_weight,
					"blkio_weight");
			fprintf(fp, "%s%u\n", buf, weight);
			break;
		case BLKIO_PROP_weight_device:
		case BLKIO_PROP_time:
		case BLKIO_PROP_sectors:
			fprintf(fp, "%s\n", buf);
			break;
		case BLKIO_PROP_io_service_bytes:
		case BLKIO_PROP_io_serviced:
		case BLKIO_PROP_io_service_time:
		case BLKIO_PROP_io_wait_time:
		case BLKIO_PROP_io_merged:
		case BLKIO_PROP_io_queued:
			fprintf(fp, "%sTotal 0\n", buf);
		default:
			/* doesn't support debug */
			break;
		}
		break;
	case BLKIO_POLICY_THROTL:
	case BLKCG_POLICY_THROTL:
		sprintf(buf, "%s: ", blkio_thro_strs[attr_id]);
		switch (attr_id)
		{
		case BLKIO_THROTL_io_service_bytes:
		case BLKIO_THROTL_io_serviced:
			if (MEMBER_EXISTS("blkio_policy_node", "val"))
				fprintf(fp, "%sTotal 0\n", buf);
			else
				fprintf(fp, "%s\n", buf);
				break;
		case BLKIO_THROTL_read_bps_device:
		case BLKIO_THROTL_write_bps_device:
		case BLKIO_THROTL_read_iops_device:
		case BLKIO_THROTL_write_iops_device:
			fprintf(fp, "%s\n", buf);
			break;
		default:
			break;
		}
		break;
	default:
		break;
	}

	return 0;
}

static int
blkio_read_map(ulong blkcg_addr, int plid, int attr_id, char *str)
{
	uint weight;
	ulong blkg_addr, hlist_head;
	char buf[FILENAME_MAX] = {0};

	readmem(blkcg_addr + blkio_offset_table.blkio_blkg_list, KVADDR,
		&hlist_head, sizeof(ulong), "blkio_cgroup blkg_list",
		FAULT_ON_ERROR);
	if (hlist_head == 0 || hlist_head == ~0UL)
		return blkio_print_param_no_group(blkcg_addr, plid,
						  attr_id, str);
	blkg_addr = hlist_head - blkio_offset_table.blkg_blkcg_node;

	switch (plid)
	{
	case BLKIO_POLICY_PROP:
		sprintf(buf, "%s.%s: ", str, blkio_prop_strs[attr_id - 1]);
		switch (attr_id)
		{
		case BLKIO_PROP_weight:
			weight = (uint)read_member_32(blkcg_addr +
					blkio_offset_table.blkio_weight,
					"blkio_weight");
			fprintf(fp, "%s%u\n", buf, weight);
			break;
		case BLKIO_PROP_weight_device:
			return read_policy_node(blkcg_addr, plid, attr_id, buf);
		case BLKIO_PROP_io_service_bytes:
			fprintf(fp, "%s\n", buf);
			return read_blkcg_stat(blkg_addr,
					       BLKIO_STAT_CPU_SERVICE_BYTES,
					       plid, 1, 1, buf);
		case BLKIO_PROP_io_serviced:
			fprintf(fp, "%s\n", buf);
			return read_blkcg_stat(blkg_addr, BLKIO_STAT_CPU_SERVICED,
					       plid, 1, 1, buf);
		case BLKIO_PROP_time:
			fprintf(fp, "%s\n", buf);
			return read_blkcg_stat(blkg_addr, BLKIO_STAT_TIME,
					       plid, 0, 0, buf);
		case BLKIO_PROP_sectors:
			fprintf(fp, "%s\n", buf);
			return read_blkcg_stat(blkg_addr,
					       BLKIO_STAT_SECTORS,
					       plid, 0, 1, buf);
		case BLKIO_PROP_io_service_time:
			fprintf(fp, "%s\n", buf);
			return read_blkcg_stat(blkg_addr, BLKIO_STAT_SERVICE_TIME,
					       plid, 1, 0, buf);
		case BLKIO_PROP_io_wait_time:
			fprintf(fp, "%s\n", buf);
			return read_blkcg_stat(blkg_addr, BLKIO_STAT_WAIT_TIME,
					       plid, 1, 0, buf);
		case BLKIO_PROP_io_merged:
			fprintf(fp, "%s\n", buf);
			return read_blkcg_stat(blkg_addr, BLKIO_STAT_MERGED,
					       plid, 1, 0, buf);
		case BLKIO_PROP_io_queued:
			fprintf(fp, "%s\n", buf);
			return read_blkcg_stat(blkg_addr, BLKIO_STAT_QUEUED,
					       plid, 1, 0, buf);
		default:
			/* doesn't support debug */
			break;
		}
		break;
	case BLKIO_POLICY_THROTL:
		sprintf(buf, "%s.%s: ", str, blkio_thro_strs[attr_id]);
		switch (attr_id)
		{
		case BLKIO_THROTL_io_service_bytes:
			if (!MEMBER_EXISTS("blkio_policy_node", "val"))
				break;
			fprintf(fp, "%s\n", buf);
			return read_blkcg_stat(blkg_addr, BLKIO_STAT_SERVICE_BYTES,
					       plid, 1, 1, buf);
		case BLKIO_THROTL_io_serviced:
			if (!MEMBER_EXISTS("blkio_policy_node", "val"))
				break;
			fprintf(fp, "%s\n", buf);
			return read_blkcg_stat(blkg_addr, BLKIO_STAT_SERVICED,
					       plid, 1, 1, buf);
		case BLKIO_THROTL_read_bps_device:
		case BLKIO_THROTL_write_bps_device:
		case BLKIO_THROTL_read_iops_device:
		case BLKIO_THROTL_write_iops_device:
			if (!MEMBER_EXISTS("blkio_policy_node", "val"))
				break;
			return read_policy_node(blkcg_addr, plid, attr_id ,buf);
		default:
			break;
		}
		break;
	default:
		break;
	}

	return 0;
}

void
cmd_cgget(void)
{
	int c;
	int ret = 0;
	int i = 0, j = 0, k = 0;
	int dis_all_param = 0, group_flag = 0;
	struct cgroup_spec *group_list[CGROUP_HIER_MAX] = {NULL};
	char *subsys_str[CGROUP_HIER_MAX], *path[CGROUP_HIER_MAX];


	if (!is_cgroup_supported) {
		command_not_supported();
		return;
	}
	while ((c = getopt_long(argcnt, args, "hg:a", 
		long_options, NULL)) != EOF) {
		switch(c)
		{
		case 'g':
			if (strchr(optarg, ':') != NULL) {
				group_flag |= MODE_COMBINE_PATH;
				ret = parse_cgroup_spec(&group_list[i], optarg, i,
							CGROUP_HIER_MAX);
				if (ret != 0) {
					argerrs++;
					goto err;
				}
				i++;
			} else {
				group_flag |= MODE_SEPARATE_PATH;
				subsys_str[j] = strdup(optarg);
				j++;
			}
			break;
		case 'a':
			dis_all_param = 1;
			break;
		case 'h':
		default:
			argerrs++;
			break;
		}
	}

	if (((group_flag & MODE_COMBINE_PATH) && args[optind]) || 
	    (!(group_flag & MODE_COMBINE_PATH) && !args[optind]) || 
	    ((group_flag & MODE_COMBINE_PATH) && dis_all_param)) {
		argerrs++;
		goto err;
	}

	/* read the list of path */
	while (optind < argcnt) {
		if (group_flag & MODE_COMBINE_PATH) {
			argerrs++;
			goto err;
		}
		path[k] = strdup(args[optind]);
		k++;
		optind++;
	}

	/* if only PATH is specified, treat it as -a is specified. */
	if (!group_flag)
		dis_all_param = 1;

	if (group_flag & MODE_SEPARATE_PATH)
		i = make_cgroup_spec(group_list, subsys_str, path, j, k);
	if (dis_all_param)
		i = make_all_cgroup_spec(group_list, subsys_str, path, j, k);
	if (i < 1)
		goto err;

	print_cgroup_list(args[0], group_list, i, dis_all_param);

err:
	while (--i >= 0)
		free(group_list[i]);
	while (--j >= 0)
		free(subsys_str[j]);
	while (--k >= 0)
		free(path[k]);
	if (argerrs)
		cmd_usage(pc->curcmd, SYNOPSIS);

}

static int
make_cgroup_spec(struct cgroup_spec **group_list, char *subsys_str[],
			char *path[], int str_num, int path_num)
{
	int j, k, i = 0;

	for (j = 0; j < str_num; j++) {
		for (k = 0; k < path_num; k++) {
			group_list[i] = calloc(1, sizeof(struct cgroup_spec));
			if (!group_list[i]) {
				fprintf(stderr, "calloc error.\n");
				return -1;
			}
			strncpy(group_list[i]->subsys_str, subsys_str[j],
				strlen(subsys_str[j]));
			strncpy(group_list[i]->path, path[k],
				strlen(path[k]));
			i++;
		}
	}

	return i;
}

static int
make_all_cgroup_spec(struct cgroup_spec **group_list, char *subsys_str[],
		     char *path[], int str_num, int path_num)
{
	int i, j, k = 0;

	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
		for (j = 0; j < str_num; j++) {
			/* if matched string is found, it must be
			 * specified by user, jump out. */
			if (0 == strcmp(cgroup_subsys_table[i].subsys_str,
					subsys_str[j]))
				break;
		}
		/* "j == str_num" means no matched string is found,
		 * so add it to subsys_str. */
		if (j == str_num &&
		    cgroup_subsys_table[i].subsys_str != NULL &&
		    *cgroup_subsys_table[i].subsys_str != '\0' &&
		    *cgroup_subsys_table[i].subsys_str != ' ') {
			subsys_str[str_num] = strdup(cgroup_subsys_table[i].subsys_str);
			str_num++;
		}
	}

	k = make_cgroup_spec(group_list, subsys_str, path, str_num, path_num);

	return k;
}

static void
cgroup_subsys_table_init()
{
	int subsys_num, subsys_id, i;
	ulong cg_subsys_addr, strptr;
	char buf[CGROUP_STR_LEN] = {0};

	/* This is the total number. There may be empty element in the array. */
	subsys_num = get_array_length("subsys", NULL, 0);
	if (!subsys_num) {
		fprintf(fp, "Warning: cgroup is not supported by this OS.\n");
		is_cgroup_supported = CGROUP_NOT_SUPPORT;
		return;
	}
	is_cgroup_supported = CGROUP_SUPPORTED;
	memset(cgroup_subsys_table, 0, sizeof(struct cgroup_subsys_table) *
	       CGROUP_SUBSYS_MAX);

	/* get subsys_id and name pairs from kernel */
	for (i = 0; i < subsys_num; i++) {
		if (!readmem(symbol_value("subsys") + i * sizeof(ulong),
			     KVADDR, &cg_subsys_addr,
			     sizeof(ulong), "subsys", FAULT_ON_ERROR))
			return;
		if (!cg_subsys_addr)
			break;
		readmem(cg_subsys_addr + MEMBER_OFFSET("cgroup_subsys", "name"),
			KVADDR, &strptr, sizeof(ulong), "cgroup_subsys name",
			FAULT_ON_ERROR);
		readmem(strptr, KVADDR, buf, CGROUP_STR_LEN, "name", FAULT_ON_ERROR);
		readmem(cg_subsys_addr + MEMBER_OFFSET("cgroup_subsys", "subsys_id"),
			KVADDR, &subsys_id, sizeof(int), "cgroup_subsys subsys_id",
			FAULT_ON_ERROR);
		cgroup_subsys_table[i].subsys_id = subsys_id;
		strncpy(cgroup_subsys_table[i].subsys_str, buf, strlen(buf));
	}

	/* this is the real number */
	cgroup_subsys_num = i;
}

static int
parse_cgroup_spec(struct cgroup_spec **group_list, char *optarg, int index, int capacity)
{
	char *gctrlptr, *pathptr;

	if (!(index < capacity)) {
		fprintf(stderr, "Max allowed hierarchies %d reached.\n", capacity);
		return -1;
	}

	*group_list = calloc(1, sizeof(struct cgroup_spec));
	if (!(*group_list)) {
		fprintf(stderr, "calloc error.\n");
		return -1;
	}

	gctrlptr = strtok(optarg, ":");
	pathptr = strtok(NULL, ":");
	if ( gctrlptr == NULL || pathptr == NULL)
		goto failed;
	strncpy((*group_list)->subsys_str, gctrlptr, strlen(gctrlptr));
	strncpy((*group_list)->path, pathptr, strlen(pathptr));

	return 0;

failed:
	if (*group_list)
		free(*group_list);
	return -1;
}

static ulong
retrieve_path(ulong start, ulong end, ulong *srcptr, const char *path)
{
	char buf[FILENAME_MAX];
	char *p, *pos;
	ulong children_addr, sibling_addr, parent_addr, dtrp, find_len;

	/* already match */
	if (*srcptr)
		return *srcptr;

	p = calloc(1, strlen(path) + 1);
	strcpy(p, path);
	pos = strchr(p, '/');
	if (pos) {
		find_len = pos - p;
		pos++;
	}

	readmem(start + cgroup_offset_table.cgroup_dentry, KVADDR,
		&dtrp, sizeof(ulong), "cgroup dentry",
		FAULT_ON_ERROR);

	/* if match */
	if (0 == strcmp("/", get_dentry_path(dtrp, buf, FILENAME_MAX)) ||
	    0 == strncmp(p, get_dentry_path(dtrp, buf, FILENAME_MAX), find_len)) {
		if ((pos != NULL) && (*pos != '\0')) {
			/* continue with children */
			readmem(start + cgroup_offset_table.cgroup_children, KVADDR,
				&children_addr, sizeof(struct list_head *),
				"cgroup children", FAULT_ON_ERROR);
			/*
			 * "children.next" of parent is pointing to
			 * the struct "sibling" of children cgroup.
			 * Except there's not any child.
			 */
			if (start != (children_addr - cgroup_offset_table.cgroup_children)) {
				start = (ulong)list_next(NULL, (void *)start,
							 cgroup_offset_table.cgroup_children);
				start = start + cgroup_offset_table.cgroup_children -
						cgroup_offset_table.cgroup_sibling;
				retrieve_path(start, end, srcptr, pos);
			}
		} else {
			*srcptr = start;
			free(p);
			return *srcptr;
		}
	} else {
		/* no match, continue with sibling */
		readmem(start + cgroup_offset_table.cgroup_sibling, KVADDR,
			&sibling_addr, sizeof(struct list_head *), "cgroup sibling",
			FAULT_ON_ERROR);
		readmem(start + cgroup_offset_table.cgroup_parent, KVADDR,
			&parent_addr, sizeof(struct list_head *), "cgroup parent",
			FAULT_ON_ERROR);
		/*
		 * "sibling.next" of one cgroup is pointing to the struct "sibling" of
		 * its sibling cgroup. Except this is the last child of its parent.
		 * "sibling.next" of the last child is pointing to the struct 
		 * "children" of its parent.
		 */
		if ((end != (sibling_addr - cgroup_offset_table.cgroup_children)) &&
		    (parent_addr != (sibling_addr - cgroup_offset_table.cgroup_children))) {
			start = (ulong)list_next(NULL, (void *)start,
						 cgroup_offset_table.cgroup_sibling);
			retrieve_path(start, end, srcptr, p);
		}
	}

	free(p);
	return *srcptr;
}

static void
format_path_str(const char *str_in, char *str_out)
{
	int len;

	if (!str_in)
		return;

	len = strlen(str_in);
	if (!len)
		return;

	/* if only "/" or "." */
	if ((0 == strcmp("/", str_in)) || (0 == strcmp(".", str_in))) {
		strcpy(str_out, "/");
		return;
	}

	/* strip the '/' character at the last position */
	strcpy(str_out, str_in);
	if ('/' == str_out[len - 1])
		str_out[len - 1] = '\0';
}

static ulong
get_css_addr(struct cgroup_spec *group_list, int subsys_id, ulong root_addr)
{
	ulong subsys_addr, css_addr, top_cgroup, cgroup_addr = 0;
	char buf[FILENAME_MAX] = {0};

	top_cgroup = root_addr + cgroupfs_root_offset_table.cgroupfs_root_top_cgroup;

	format_path_str(group_list->path, buf);
	cgroup_addr = retrieve_path(top_cgroup, top_cgroup, &cgroup_addr, buf);

	if (cgroup_addr == 0)
		return 0;
	css_addr = cgroup_addr + cgroup_offset_table.cgroup_subsys;
	/* struct css is at the beginning of struct used by each subsystem */
	if (!readmem((css_addr + subsys_id * sizeof(ulong)), KVADDR,
		      &subsys_addr, sizeof(ulong), "cgroup subsys",
		      FAULT_ON_ERROR))
		subsys_addr = 0;
	return subsys_addr;
}

/*
 * get the address of next entry from an embedded list.
 */
static struct list_head *
list_next(void *head, void *struct_entry, long offset)
{
	ulong entry_addr;

	if (struct_entry) {
		head = (void *)((ulong)struct_entry + offset);
	} else if (head) {
		/* do nothing */
	} else {
		return NULL;
	}

	if (!readmem((ulong)head, KVADDR, &entry_addr, sizeof(struct list_head *),
		     "list_head->next", FAULT_ON_ERROR))
		return NULL;
	return (struct list_head *)(entry_addr - offset);
}

static int
get_subsys_id(struct cgroup_spec *group_list)
{
	int subsys_id = -1;
	int i;

	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
		if (0 == strcmp(cgroup_subsys_table[i].subsys_str,
				 group_list->subsys_str)) {
			subsys_id = cgroup_subsys_table[i].subsys_id;
			break;
		}
	}
	return subsys_id;
}

static void
print_cgroup(char *cmd_name, struct cgroup_spec *group_list, int subsys_id,
	     ulong root_addr, int disp_flag)
{
	ulong css_addr = 0;

	css_addr = get_css_addr(group_list, subsys_id, root_addr);
	if (!css_addr) {
		if (!disp_flag)
			fprintf(fp, "%s: can not find controller '%s' "
				"in group '%s'\n", cmd_name,
				group_list->subsys_str,
				group_list->path);
		return;
	}

	if (0 == strcmp(group_list->subsys_str, subsys_name[cpuset_subsys_id]))
		print_cpuset(group_list, subsys_id, css_addr);
	else if (0 == strcmp(group_list->subsys_str, subsys_name[ns_subsys_id]))
		/* nothing to be printed */
		;
	else if (0 == strcmp(group_list->subsys_str, subsys_name[cpu_cgroup_subsys_id]))
		print_cpu(group_list, subsys_id, css_addr);
	else if (0 == strcmp(group_list->subsys_str, subsys_name[cpuacct_subsys_id]))
		print_cpuacct(group_list, subsys_id, css_addr);
	else if (0 == strcmp(group_list->subsys_str, subsys_name[hugetlb_subsys_id]))
		print_hugetlb(group_list, subsys_id, css_addr);
	else if (0 == strcmp(group_list->subsys_str, subsys_name[mem_cgroup_subsys_id]))
		print_memory(group_list, subsys_id, css_addr);
	else if (0 == strcmp(group_list->subsys_str, subsys_name[devices_subsys_id]))
		print_devices(group_list, subsys_id, css_addr);
	else if (0 == strcmp(group_list->subsys_str, subsys_name[freezer_subsys_id]))
		print_freezer(group_list, subsys_id, css_addr);
	else if (0 == strcmp(group_list->subsys_str, subsys_name[net_cls_subsys_id]))
		print_net_cls(group_list, subsys_id, css_addr);
	else if (0 == strcmp(group_list->subsys_str, subsys_name[blkio_subsys_id]))
		print_blkio(group_list, subsys_id, css_addr);
	else if (0 == strcmp(group_list->subsys_str, subsys_name[perf_subsys_id]))
		/* nothing to be printed */
		;
	else if (0 == strcmp(group_list->subsys_str, subsys_name[net_prio_subsys_id]))
		print_net_prio(group_list, subsys_id, css_addr);
	else
		fprintf(fp, "not supported controller %s.\n",
			group_list->subsys_str);

}

static void
print_cpuset(struct cgroup_spec *group_list, int subsys_id, ulong subsys_addr)
{
	ulong cpuset_flags, cpuset_cpus_addr,
	      mems_allowed[BITS_TO_LONGS(vt->numnodes)],
	      cpus_allowed[BITS_TO_LONGS(kt->cpus)];
	int i, val;
	char buf[FILENAME_MAX] = {0};

	fprintf(fp, "%s:\n", group_list->path);

	for (i = CS_CPU_EXCLUSIVE; i <= CS_CPUS; i++) {
		if (cpuset_offset_table.cpuset_shed_relax_domain_level == -1 &&
		    i == CS_SHED_RELAX_DOMAIN_LEVEL)
			continue;
		fprintf(fp, "%s.%s: ", group_list->subsys_str,
			cpuset_params[i]);
		switch (i)
		{
		case CS_CPU_EXCLUSIVE:
		case CS_MEM_EXCLUSIVE:
		case CS_MEM_HARDWALL:
		case CS_MEMORY_MIGRATE:
		case CS_SCHED_LOAD_BALANCE:
		case CS_SPREAD_PAGE:
		case CS_SPREAD_SLAB:
			readmem(subsys_addr + cpuset_offset_table.cpuset_flags,
				KVADDR, &cpuset_flags, sizeof(ulong),
				"cpuset flags", FAULT_ON_ERROR);
			fprintf(fp, "%d\n", test_bit(i, cpuset_flags));
			break;
		case CS_MEM_PRESSURE_ENABLE:
			readmem(symbol_value("cpuset_memory_pressure_enabled"),
				KVADDR, &val, sizeof(int),
				"cpuset_memory_pressure_enabled", FAULT_ON_ERROR);
			fprintf(fp, "%d\n", val);
			break;
		case CS_MEM_PRESSURE:
			readmem(subsys_addr + cpuset_offset_table.cpuset_fmeter +
				MEMBER_OFFSET("fmeter", "val"), KVADDR,
				&val, sizeof(int),
				"cpuset_memory_pressure", FAULT_ON_ERROR);
			fprintf(fp, "%d\n", val);
			break;
		case CS_SHED_RELAX_DOMAIN_LEVEL:
			readmem(subsys_addr +
				cpuset_offset_table.cpuset_shed_relax_domain_level,
				KVADDR, &val, sizeof(int),
				"shed_relax_domain_level", FAULT_ON_ERROR);
			fprintf(fp, "%d\n", val);
			break;
		case CS_MEMS:
			readmem(subsys_addr +
				cpuset_offset_table.cpuset_mems_allowed,
				KVADDR, mems_allowed,
				BITS_TO_LONGS(vt->numnodes) * sizeof(long),
				"cpuset_mems", FAULT_ON_ERROR);
			bitmap_scnlistprintf(buf, FILENAME_MAX,
					     mems_allowed, vt->numnodes);
			fprintf(fp, "%s\n", buf);
			break;
		case CS_CPUS:
			readmem(subsys_addr +
				cpuset_offset_table.cpuset_cpus_allowed,
				KVADDR, &cpuset_cpus_addr, sizeof(ulong),
				"cpuset_cpus address", FAULT_ON_ERROR);
			if (!symbol_exists("alloc_cpumask_var"))
				readmem(subsys_addr +
					cpuset_offset_table.cpuset_cpus_allowed,
					KVADDR, &cpus_allowed,
					BITS_TO_LONGS(kt->cpus) * sizeof(long),
					"cpuset_cpus", FAULT_ON_ERROR);
			else
				readmem(cpuset_cpus_addr, KVADDR, cpus_allowed,
					BITS_TO_LONGS(kt->cpus) * sizeof(long),
					"cpuset_cpus", FAULT_ON_ERROR);
			bitmap_scnlistprintf(buf, FILENAME_MAX, cpus_allowed, kt->cpus);
			fprintf(fp, "%s\n", buf);
			break;
		}
	}
}

static void
print_cpu(struct cgroup_spec *group_list, int subsys_id, ulong subsys_addr)
{
	ulong rt_bandwidth_addr, cfs_bandwidth_addr;
	long i, off;
	uint64_t val;
	char buf[FILENAME_MAX];

	fprintf(fp, "%s:\n", group_list->path);

	rt_bandwidth_addr = subsys_addr + tg_offset_table.tg_rt_bandwidth;
	cfs_bandwidth_addr = subsys_addr + tg_offset_table.tg_cfs_bandwidth;

	for (i = CPU_RT_PERIOD; i < CPU_NR_PARAMS; i++) {
		/*
		 * when cfs_bandwidth or rt_bandwidth is not included,
		 * do not output that item.
		 */
		if ((tg_offset_table.tg_rt_bandwidth == -1 &&
		     (i == CPU_RT_PERIOD || i == CPU_RT_RUNTIME)) ||
		    (tg_offset_table.tg_cfs_bandwidth == -1 &&
		     (i > CPU_RT_RUNTIME && i < CPU_SHARES)))
			continue;

		fprintf(fp, "%s.", group_list->subsys_str);
		/* format string of the parameter */
		memset(buf, 0, FILENAME_MAX);
		strcpy(buf, cpu_params[i]);
		switch (i)
		{
		case CPU_RT_PERIOD:
			if (tg_offset_table.tg_rt_bandwidth == -1)
				continue;
			off = MEMBER_OFFSET("rt_bandwidth", "rt_period");
			cpu_print_bandwidth(rt_bandwidth_addr, off, 1, buf);
			break;
		case CPU_RT_RUNTIME:
			if (tg_offset_table.tg_rt_bandwidth == -1)
				continue;
			off = MEMBER_OFFSET("rt_bandwidth", "rt_runtime");
			cpu_print_bandwidth(rt_bandwidth_addr, off, 0, buf);
			break;
		case CPU_STAT:
			fprintf(fp, "%s: \n", buf);
			if (tg_offset_table.tg_cfs_bandwidth == -1)
				continue;
			cpu_print_stat(cfs_bandwidth_addr);
			break;
		case CPU_CFS_PERIOD:
			if (tg_offset_table.tg_cfs_bandwidth == -1)
				continue;
			off = MEMBER_OFFSET("cfs_bandwidth", "period");
			cpu_print_bandwidth(cfs_bandwidth_addr, off, 1, buf);
			break;
		case CPU_CFS_QUOTA:
			if (tg_offset_table.tg_cfs_bandwidth == -1)
				continue;
			off = MEMBER_OFFSET("cfs_bandwidth", "quota");
			cpu_print_bandwidth(cfs_bandwidth_addr, off, 0, buf);
			break;
		case CPU_SHARES:
			val = (ulong)read_member_long(subsys_addr +
						      tg_offset_table.tg_shares);
			fprintf(fp, "%s: %lu\n", buf, val);
		default:
			break;
		}
	}
}

static void
print_cpuacct(struct cgroup_spec *group_list, int subsys_id, ulong subsys_addr)
{
	int i = 0;
	uint64_t total;
	char buf[FILENAME_MAX];

	fprintf(fp, "%s:\n", group_list->path);


	for (i = CPUACCT_STAT; i < CPUACCT_NR_PARAMS; i++) {
		if (cpuacct_offset_table.cpuacct_cpustat == -1 &&
		    i == CPUACCT_STAT)
			continue;
		/* format string of the parameter */
		memset(buf, 0, FILENAME_MAX);
		strcpy(buf, cpuacct_params[i]);
		fprintf(fp, "%s.%s: ", group_list->subsys_str, buf);
		switch (i)
		{
		case CPUACCT_STAT:
			cpuacct_print_stat(subsys_addr);
			break;
		case CPUACCT_USAGE_PERCPU:
			total = cpuacct_print_usage_percpu(subsys_addr);
			break;
		case CPUACCT_USAGE:
			fprintf(fp, "%lu\n", total);
			break;
		default:
			break;
		}
	}
}

static void
print_hugetlb(struct cgroup_spec *group_list, int subsys_id, ulong subsys_addr)
{
	ulong hugepage_addr;
	uint order;
	uint64_t val;
	char buf[FILENAME_MAX] = {0};

	fprintf(fp, "%s:\n", group_list->path);

	hugepage_addr = subsys_addr + hugetlb_offset_table.hugetlb_hugepage;

	readmem(symbol_value("hstates") + MEMBER_OFFSET("hstate", "order"), KVADDR,
		&order, sizeof(uint), "hstate_order", FAULT_ON_ERROR);
	hugepage_fmt(buf, PAGE_SIZE << order);

	val = read_res_counter(hugepage_addr, MEMBER_OFFSET("res_counter",
				"failcnt"), NULL);
	fprintf(fp, "%s.%s.failcnt: %lu\n", group_list->subsys_str, buf, val);
	val = read_res_counter(hugepage_addr, MEMBER_OFFSET("res_counter",
				"max_usage"), NULL);
	fprintf(fp, "%s.%s.max_usage_in_bytes: %lu\n",
		group_list->subsys_str, buf, val);
	val = read_res_counter(hugepage_addr, MEMBER_OFFSET("res_counter",
				"usage"), NULL);
	fprintf(fp, "%s.%s.usage_in_bytes: %lu\n",
		group_list->subsys_str, buf, val);
	val = read_res_counter(hugepage_addr, MEMBER_OFFSET("res_counter",
				"limit"), NULL);
	fprintf(fp, "%s.%s.limit_in_bytes: %lu\n",
		group_list->subsys_str, buf, val);
}

static void
print_memory(struct cgroup_spec *group_list, int subsys_id, ulong subsys_addr)
{
	ulong mem_res_addr, mem_memsw_addr, ptr;
	uint64_t val64 = 0, usage = 0, mem_limit, memsw_limit;
	int i, j, val32 = 0, do_swap_account = 0;
	int64_t lstats[NR_MCS_STAT] = {0}, tstats[NR_MCS_STAT] = {0};
	char buf[FILENAME_MAX];

	fprintf(fp, "%s:\n", group_list->path);

	/* check if do swap account */
	if (symbol_exists("do_swap_account"))
		readmem(symbol_value("do_swap_account"), KVADDR, &do_swap_account,
			sizeof(int), "do_swap_account", FAULT_ON_ERROR);

	/* get local memory stat values */
	if ( 0 != get_mem_local_stats(subsys_addr, lstats, do_swap_account)) {
		fprintf(fp, "get parameters failed.\n");
		return;
	}
	/* get total memory stat values */
	if (0 != get_mem_total_stats(subsys_addr, tstats, do_swap_account)) {
		fprintf(fp, "get parameters failed.\n");
		return;
	}

	/* address of memsw and res */
	if(memory_offset_table.memory_memsw != -1)
		mem_memsw_addr = subsys_addr + memory_offset_table.memory_memsw;
	else
		mem_memsw_addr = -1;
	mem_res_addr = subsys_addr + memory_offset_table.memory_res;

	for (i = MEM_MEMSW_FAILCNT; i < MEM_NR_PARAMS; i++) {
		/* format string of the parameter */
		memset(buf, 0, FILENAME_MAX);
		sprintf(buf, "%s.%s", group_list->subsys_str,
			memory_params[i]);
		switch (i)
		{
		case MEM_MEMSW_FAILCNT:
			read_res_counter(mem_memsw_addr,
					 memory_offset_table.counter_failcnt,
					 buf);
			break;
		case MEM_MEMSW_LIMIT:
			read_res_counter(mem_memsw_addr,
					 memory_offset_table.counter_limit,
					 buf);
			break;
		case MEM_MEMSW_MAX_USAGE:
			read_res_counter(mem_memsw_addr,
					 memory_offset_table.counter_max_usage,
					 buf);
			break;
		case MEM_MEMSW_USAGE:
			usage = get_mem_usage(subsys_addr, do_swap_account);
			fprintf(fp, "%s: %lu\n", buf, usage);
			break;
		case MEM_NUMA_STAT:
			/* when numa is not configured, just break; */
			if (!MEMBER_EXISTS("mem_cgroup", "scan_nodes"))
				break;
			fprintf(fp, "%s:\n", buf);
			mem_print_numa_stat(subsys_addr, (uint64_t *)lstats);
			break;
		case MEM_OOM_CTRL:
			if (memory_offset_table.memory_oom_kill_disable != -1) {
				fprintf(fp, "%s:\n", buf);
				mem_print_oom_ctrl(subsys_addr);
			}
			break;
		case MEM_MCAI:
			if (memory_offset_table.memory_mcai != -1) {
				ptr = subsys_addr +
				      memory_offset_table.memory_mcai;
				val64 = (ulong)read_member_long(ptr);
				fprintf(fp, "%s: %lu\n", buf, val64);
			}
			break;
		case MEM_SWAP:
			mem_print_swap(subsys_addr, buf);
			break;
		case MEM_USE_HIER:
			if (memory_offset_table.memory_use_hierarchy != -1) {
				ptr = subsys_addr +
				      memory_offset_table.memory_use_hierarchy;
				val32 = (int)read_member_32(ptr, "use_hier");
				fprintf(fp, "%s: %d\n", buf, val32);
			}
			break;
		case MEM_FORCE_EMPTY:
			/* nothing to print out */
			fprintf(fp, "%s: \n", buf);
			break;
		case MEM_STAT:
			/* output the local stats */
			for (j = 0; j < NR_MCS_STAT; j++) {
				if (j > MCS_ACTIVE_ANON &&
				    MEMBER_EXISTS("mem_cgroup_per_zone",
						  "active_list"))
					break;
				if ((j == MCS_SWAP && !do_swap_account) ||
				     lstats[j] == -1)
					continue;
				if (j == 0)
					fprintf(fp, "%s: ", buf);
				else
					fprintf(fp, "\t");
				fprintf(fp, "%s %ld\n",
					memcg_stat_strings[j].local_name,
					lstats[j]);
			}
			if (memory_offset_table.memory_use_hierarchy != -1) {
				/* output the hierarchical memory limit */
				get_mem_hierarchical_limit(subsys_addr, &mem_limit,
							   &memsw_limit, subsys_id);
				fprintf(fp, "\thierarchical_memory_limit %lu\n",
					mem_limit);
			}
			if (do_swap_account)
				fprintf(fp, "\thierarchical_memsw_limit %lu\n",
					memsw_limit);
			/* output the total stats */
			for (j = 0; j < NR_MCS_STAT; j++) {
				if (j > MCS_ACTIVE_ANON &&
				    MEMBER_EXISTS("mem_cgroup_per_zone",
						  "active_list"))
					break;
				if ((j == MCS_SWAP && !do_swap_account) ||
				     tstats[j] == -1)
					continue;
				fprintf(fp, "\t%s %ld\n",
					memcg_stat_strings[j].total_name,
					tstats[j]);
			}
			break;
		case MEM_FAILCNT:
			read_res_counter(mem_res_addr,
					 memory_offset_table.counter_failcnt,
					 buf);
			break;
		case MEM_SOFT_LIMIT:
			read_res_counter(mem_res_addr,
					 memory_offset_table.counter_soft_limit,
					 buf);
			break;
		case MEM_LIMIT:
			read_res_counter(mem_res_addr,
					 memory_offset_table.counter_limit,
					 buf);
			break;
		case MEM_MAX_USAGE:
			read_res_counter(mem_res_addr,
					 memory_offset_table.counter_max_usage,
					 buf);
			break;
		case MEM_USAGE:
			usage = get_mem_usage(subsys_addr, 0);
			fprintf(fp, "%s: %lu\n", buf, usage);
			break;
		default:
			break;
		}
	}
}

static void
print_devices(struct cgroup_spec *group_list, int subsys_id, ulong subsys_addr)
{
	ulong whitelist_addr, list_head;

	fprintf(fp, "%s:\n", group_list->path);

	list_head = subsys_addr + devices_offset_table.devices_whitelist;
	whitelist_addr = (ulong)list_next((void *)list_head, NULL,
				MEMBER_OFFSET("dev_whitelist_item", "list"));
	do {
		if (0 != read_whitelist(group_list, whitelist_addr)) {
			fprintf(fp, "get parameters failed.\n");
			return;
		}
		fprintf(fp, "%s.deny: \n", group_list->subsys_str);
		fprintf(fp, "%s.allow: \n", group_list->subsys_str);
		whitelist_addr = (ulong)list_next(NULL, (void *)whitelist_addr,
					MEMBER_OFFSET("dev_whitelist_item", "list"));
	} while (list_head != whitelist_addr +
			MEMBER_OFFSET("dev_whitelist_item", "list"));
}

static void
print_freezer(struct cgroup_spec *group_list, int subsys_id, ulong subsys_addr)
{
	int state;
	char buf[FILENAME_MAX] = {0};

	/* there's nothing to be printed for '/' directory */
	if (!get_subsys_parent(subsys_addr, subsys_id))
		return;

	fprintf(fp, "%s:\n", group_list->path);
	readmem(subsys_addr + freezer_offset_table.freezer_state , KVADDR,
		&state, sizeof(enum freezer_state), "freezer_state",
		FAULT_ON_ERROR);

	switch (state)
	{
	case CGROUP_THAWED:
	case CGROUP_FREEZING:
	case CGROUP_FROZEN:
		sprintf(buf, "%s.state: %s", group_list->subsys_str,
			freezer_state_strs[state]);
		break;
	default:
		fprintf(fp, "wrong value of freezer state.\n");
		return;
	}
	fprintf(fp, "%s\n", buf);
}

static void
print_net_cls(struct cgroup_spec *group_list, int subsys_id, ulong subsys_addr)
{
	uint32_t classid;

	fprintf(fp, "%s:\n", group_list->path);

	readmem(subsys_addr + cls_offset_table.cls_classid , KVADDR,
		&classid, sizeof(uint32_t), "cls_classid",
		FAULT_ON_ERROR);
	fprintf(fp, "%s.classid: %d\n", group_list->subsys_str, classid);
}

static void
print_net_prio(struct cgroup_spec *group_list, int subsys_id, ulong subsys_addr)
{
	uint32_t prioidx;

	fprintf(fp, "%s:\n", group_list->path);

	readmem(subsys_addr + netprio_offset_table.netprio_prioidx , KVADDR,
		&prioidx, sizeof(uint32_t), "netprio_prioidx",
		FAULT_ON_ERROR);
	fprintf(fp, "%s.prioidx: %d\n", group_list->subsys_str, prioidx);
}

static void
print_blkio(struct cgroup_spec *group_list, int subsys_id, ulong subsys_addr)
{
	int plid, atid;

	fprintf(fp, "%s:\n", group_list->path);

	if (STRUCT_EXISTS("blkcg")) {
		for (plid = BLKCG_POLICY_THROTL; plid <= BLKCG_POLICY_PROP; plid++) {
			if (plid == BLKCG_POLICY_PROP)
				for (atid = 1; atid <= BLKIO_PROP_io_queued; atid++) {
					read_policy_group(subsys_addr,
							  plid, atid,
							  group_list->subsys_str);
				}
			else
				for (atid = BLKIO_THROTL_read_bps_device;
					atid <= BLKIO_THROTL_io_serviced; atid++) {
					read_policy_group(subsys_addr,
							  plid, atid,
							  group_list->subsys_str);
				}
		}
		/* there should be nothing to be displayed for reset_stats */
		fprintf(fp, "%s.reset_stats: \n", group_list->subsys_str);
		return;
	}

	if (blkio_offset_table.blkg_stats == -1)
		/* for kernel version 2.6.33, 2.6.34 */
		return blkio_print_param_old(subsys_addr);

	for (plid = 0; plid <= BLKIO_POLICY_THROTL; plid++) {
		if (plid == BLKIO_POLICY_PROP)
			for (atid = 1; atid <= BLKIO_PROP_io_queued; atid++) {
				blkio_read_map(subsys_addr, plid, atid,
						group_list->subsys_str);
			}
		else
			for (atid = BLKIO_THROTL_read_bps_device;
				atid <= BLKIO_THROTL_io_serviced; atid++) {
				blkio_read_map(subsys_addr, plid, atid,
						group_list->subsys_str);
			}
	}
	/* there should be nothing to be displayed for reset_stats */
	fprintf(fp, "%s.reset_stats: \n", group_list->subsys_str);
}

static void
print_cgroup_list(char *cmd_name, struct cgroup_spec *group_list[], int num, int disp_flag)
{
	ulong subsys_addr, css_addr, root_count = 0;
	int subsys_id;
	int i, j, found;
	struct list_head *list_head, *pos;

	list_head = (struct list_head *)symbol_value("roots");
	if (!readmem(symbol_value("root_count"), KVADDR, &root_count,
		     sizeof(int), "root_count", FAULT_ON_ERROR))
		return;

	for (j = 0; j < num; j++) {
		found = 0;
		pos = list_head;
		subsys_id = get_subsys_id(group_list[j]);
		if (subsys_id < 0) {
			fprintf(fp, "Subsys %s does not exist.\n",
				group_list[j]->subsys_str);
			continue;
		}

		for (i = 2; i <= root_count; i++) {
			/* first element is only the head of this list, skip it. */
			if (i == 2) {
				pos = list_next(pos, NULL,
						cgroupfs_root_offset_table.cgroupfs_root_root_list);
			} else {
				pos = list_next(NULL, pos,
						cgroupfs_root_offset_table.cgroupfs_root_root_list);
			}
			if (pos)
				css_addr = cgroupfs_root_offset_table.cgroupfs_root_top_cgroup +
					   cgroup_offset_table.cgroup_subsys + (ulong)pos;
			else
				continue;
			if (!readmem((css_addr + subsys_id * sizeof(ulong)), KVADDR,
				      &subsys_addr, sizeof(ulong), "cgroup subsys",
				      FAULT_ON_ERROR))
				continue;
			if (subsys_addr != 0) {
				found = 1;
				print_cgroup(cmd_name, group_list[j],
					     subsys_id, (ulong)pos,
					     disp_flag);
			}
		}

		if (!found && !disp_flag) {
			fprintf(fp, "%s: can not find controller '%s' "
				"in group '%s'\n", cmd_name,
				group_list[j]->subsys_str,
				group_list[j]->path);
		}
	}
}

static inline int
test_bit(int nr, ulong flags)
{
	if (NUM_TO_BIT(nr) & flags)
		return 1;
	return 0;
}

static inline int
bitmap_scnlistprintf(char *buf, unsigned int buflen, ulong *maskp, int nmaskbits)
{
	int len = 0;
	/* current bit is 'cur', most recently seen range is [rbot, rtop] */
	int cur, rbot, rtop, i;
	if (buflen == 0)
		return 0;
	memset(buf, 0, buflen);

	for (i = 0; i < nmaskbits; i++) {
		if (NUM_IN_BITMAP(maskp, i)) {
			rbot = cur = i;
			while (cur < nmaskbits) {
				if (!NUM_IN_BITMAP(maskp, cur + 1)) {
					rtop = cur;
					break;
				}
				cur++;
			}
			len = bitstr_edit(buf, rbot, rtop, len);
			i = cur + 1;
		}
	}
	return len;
}

static void
cgget_offset_table_init(void)
{
	cgroupfs_root_offset_table_init();
	cgroup_offset_table_init();
	cpuset_offset_table_init();
	tg_offset_table_init();
	cpuacct_offset_table_init();
	hugetlb_offset_table_init();
	memory_offset_table_init();
	devices_offset_table_init();
	freezer_offset_table_init();
	cls_offset_table_init();
	blkio_offset_table_init();
	netprio_offset_table_init();
}

static void
cgroupfs_root_offset_table_init()
{
	CGGET_MEMBER_OFFSET_INIT(cgroupfs_root_offset_table, cgroupfs_root_top_cgroup,
				 "cgroupfs_root", "top_cgroup");
	CGGET_MEMBER_OFFSET_INIT(cgroupfs_root_offset_table,
				 cgroupfs_root_number_of_cgroups,
				 "cgroupfs_root", "number_of_cgroups");
	CGGET_MEMBER_OFFSET_INIT(cgroupfs_root_offset_table, cgroupfs_root_root_list,
				 "cgroupfs_root", "root_list");
}

static void
cgroup_offset_table_init()
{
	CGGET_MEMBER_OFFSET_INIT(cgroup_offset_table, cgroup_sibling,
				 "cgroup", "sibling");
	CGGET_MEMBER_OFFSET_INIT(cgroup_offset_table, cgroup_children,
				 "cgroup", "children");
	CGGET_MEMBER_OFFSET_INIT(cgroup_offset_table, cgroup_parent,
				 "cgroup", "parent");
	CGGET_MEMBER_OFFSET_INIT(cgroup_offset_table, cgroup_dentry,
				 "cgroup", "dentry");
	CGGET_MEMBER_OFFSET_INIT(cgroup_offset_table, cgroup_subsys,
				 "cgroup", "subsys");
}

static void
cpuset_offset_table_init()
{
	CGGET_MEMBER_OFFSET_INIT(cpuset_offset_table, cpuset_flags,
				 "cpuset", "flags");
	CGGET_MEMBER_OFFSET_INIT(cpuset_offset_table, cpuset_cpus_allowed,
				 "cpuset", "cpus_allowed");
	CGGET_MEMBER_OFFSET_INIT(cpuset_offset_table, cpuset_mems_allowed,
				 "cpuset", "mems_allowed");
	CGGET_MEMBER_OFFSET_INIT(cpuset_offset_table, cpuset_fmeter,
				 "cpuset", "fmeter");
	CGGET_MEMBER_OFFSET_INIT(cpuset_offset_table, cpuset_shed_relax_domain_level,
				 "cpuset", "relax_domain_level");
}

static void
tg_offset_table_init()
{
	CGGET_MEMBER_OFFSET_INIT(tg_offset_table, tg_shares,
				 "task_group", "shares");
	CGGET_MEMBER_OFFSET_INIT(tg_offset_table, tg_rt_bandwidth,
				 "task_group", "rt_bandwidth");
	CGGET_MEMBER_OFFSET_INIT(tg_offset_table, tg_cfs_bandwidth,
				 "task_group", "cfs_bandwidth");
}

static void
cpuacct_offset_table_init()
{
	CGGET_MEMBER_OFFSET_INIT(cpuacct_offset_table, cpuacct_cpuusage,
				 "cpuacct", "cpuusage");
	CGGET_MEMBER_OFFSET_INIT(cpuacct_offset_table, cpuacct_cpustat,
				 "cpuacct", "cpustat");
}

static void
hugetlb_offset_table_init()
{
	CGGET_MEMBER_OFFSET_INIT(hugetlb_offset_table, hugetlb_hugepage,
				 "hugetlb_cgroup", "hugepage");
}

static void
memory_offset_table_init()
{
	CGGET_MEMBER_OFFSET_INIT(memory_offset_table, memory_res,
				 "mem_cgroup", "res");
	if (MEMBER_EXISTS("mem_cgroup", "tcp_mem"))
		memory_offset_table.memory_memsw = memory_offset_table.memory_res +
						   STRUCT_SIZE("res_counter");
	else
		CGGET_MEMBER_OFFSET_INIT(memory_offset_table, memory_memsw,
					 "mem_cgroup", "memsw");
	CGGET_MEMBER_OFFSET_INIT(memory_offset_table, memory_info,
				 "mem_cgroup", "info");
	CGGET_MEMBER_OFFSET_INIT(memory_offset_table, memory_stat,
				 "mem_cgroup", "stat");
	CGGET_MEMBER_OFFSET_INIT(memory_offset_table, memory_oom_kill_disable,
				 "mem_cgroup", "oom_kill_disable");
	if (MEMBER_EXISTS("mem_cgroup", "under_oom"))
		CGGET_MEMBER_OFFSET_INIT(memory_offset_table, memory_under_oom,
					 "mem_cgroup", "under_oom");
	else
		CGGET_MEMBER_OFFSET_INIT(memory_offset_table, memory_under_oom,
					 "mem_cgroup", "oom_lock");
	CGGET_MEMBER_OFFSET_INIT(memory_offset_table, memory_mcai,
				 "mem_cgroup", "move_charge_at_immigrate");
	CGGET_MEMBER_OFFSET_INIT(memory_offset_table, memory_swappiness,
				 "mem_cgroup", "swappiness");
	CGGET_MEMBER_OFFSET_INIT(memory_offset_table, memory_use_hierarchy,
				 "mem_cgroup", "use_hierarchy");
	CGGET_MEMBER_OFFSET_INIT(memory_offset_table, counter_usage,
				 "res_counter", "usage");
	CGGET_MEMBER_OFFSET_INIT(memory_offset_table, counter_max_usage,
				 "res_counter", "max_usage");
	CGGET_MEMBER_OFFSET_INIT(memory_offset_table, counter_limit,
				 "res_counter", "limit");
	CGGET_MEMBER_OFFSET_INIT(memory_offset_table, counter_soft_limit,
				 "res_counter", "soft_limit");
	CGGET_MEMBER_OFFSET_INIT(memory_offset_table, counter_failcnt,
				 "res_counter", "failcnt");
	CGGET_MEMBER_OFFSET_INIT(memory_offset_table, perzone_count,
				 "mem_cgroup_per_zone", "count");
	if(memory_offset_table.perzone_count == -1)
		CGGET_MEMBER_OFFSET_INIT(memory_offset_table, perzone_count,
					 "mem_cgroup_per_zone", "lru_size");
}

static void
devices_offset_table_init()
{
	CGGET_MEMBER_OFFSET_INIT(devices_offset_table, devices_whitelist,
				 "dev_cgroup", "whitelist");
}

static void
freezer_offset_table_init()
{
	CGGET_MEMBER_OFFSET_INIT(freezer_offset_table, freezer_state,
				 "freezer", "state");
}

static void
cls_offset_table_init()
{
	CGGET_MEMBER_OFFSET_INIT(cls_offset_table, cls_classid,
				 "cgroup_cls_state", "classid");
}

static void
blkio_offset_table_init()
{
	if (STRUCT_EXISTS("blkcg")) {
		CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, blkio_blkg_list,
					 "blkcg", "blkg_list");
		CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, blkg_blkcg_node,
					 "blkcg_gq", "blkcg_node");
		CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, blkio_weight,
					 "blkcg", "cfq_weight");
		CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, blkg_pd,
					 "blkcg_gq", "pd");
		CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, cfq_group_stats,
					 "cfq_group", "stats");
		CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, cfqg_stats_service_bytes,
					 "cfqg_stats", "service_bytes");
		CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, cfqg_stats_serviced,
					 "cfqg_stats", "serviced");
		CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, cfqg_stats_merged,
					 "cfqg_stats", "merged");
		CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, cfqg_stats_queued,
					 "cfqg_stats", "queued");
		CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, cfqg_stats_service_time,
					 "cfqg_stats", "service_time");
		CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, cfqg_stats_wait_time,
					 "cfqg_stats", "wait_time");
		CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, cfqg_stats_sectors,
					 "cfqg_stats", "sectors");
		CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, cfqg_stats_time,
					 "cfqg_stats", "time");
	} else {
		CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, blkio_blkg_list,
					 "blkio_cgroup", "blkg_list");
		CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, blkio_policy_list,
					 "blkio_cgroup", "policy_list");
		CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, blkio_weight,
					 "blkio_cgroup", "weight");
		CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, blkg_blkcg_node,
					 "blkio_group", "blkcg_node");
	}
	CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, blkg_dev,
				 "blkio_group", "dev");
	CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, blkg_plid,
				 "blkio_group", "plid");
	CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, blkg_stats,
				 "blkio_group", "stats");
	CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, blkg_stats_cpu,
				 "blkio_group", "stats_cpu");
	CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, blkp_dev,
				 "blkio_policy_node", "dev");
	CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, blkp_plid,
				 "blkio_policy_node", "plid");
	CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, blkp_fileid,
				 "blkio_policy_node", "fileid");
	if (MEMBER_EXISTS("blkio_policy_node", "val"))
		CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, blkp_weight,
					 "blkio_policy_node", "val");
	else
		CGGET_MEMBER_OFFSET_INIT(blkio_offset_table, blkp_weight,
					 "blkio_policy_node", "weight");
}

static void
netprio_offset_table_init()
{
	CGGET_MEMBER_OFFSET_INIT(netprio_offset_table, netprio_prioidx,
				 "cgroup_netprio_state", "prioidx");
}

--
Crash-utility mailing list
Crash-utility@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/crash-utility

[Index of Archives]     [Fedora Development]     [Fedora Desktop]     [Fedora SELinux]     [Yosemite News]     [KDE Users]     [Fedora Tools]

 

Powered by Linux