On Tue, Oct 25, 2022 at 8:39 PM <crash-utility-request@xxxxxxxxxx> wrote: > Date: Tue, 25 Oct 2022 20:38:20 +0800 > From: Tao Liu <ltao@xxxxxxxxxx> > To: crash-utility@xxxxxxxxxx > Subject: [PATCH v2 1/6] Port the maple tree data > structures and main functions > Message-ID: <20221025123825.36421-2-ltao@xxxxxxxxxx> > Content-Type: text/plain; charset="US-ASCII"; x-default=true > > There are 2 ways to iterate vm_area_struct: 1) by rbtree, > aka vma.vm_rb; 2) by linked list, aka vma.vm_prev/next. > However for linux maple tree patch[1][2], vm_rb and vm_prev/next > are removed from vm_area_struct. For memory.c:vm_area_dump > of crash, it mainly uses linked list as a way of vma iteration, > which will not work for this case. So maple tree iteration > need to be ported to crash. > > For crash, currently it only iteratively read the maple tree, > no more rcu safe or maple tree modification features > needed. So we only port a subset of kernel maple tree > features. In addition, we need to modify the ported kernel > source code, making it compatible with crash. > > The formal crash way of vmcore struct member resolving is: > > readmem(node, KVADDR, buf, SIZE(buf), "", flag); > return buf + OFFSET(member); > > which is the reimplementation of kernel way of member resolving: > > return node->member; > > The 1st one is arch independent, it uses gdb to resolve the OFFSET > of members, so crash don't need to know what the inside of the > struct is, even if the struct changes for new kernel version. The 2nd > one is arch dependent, the struct need to be ported to crash, and the > OFFSET of members may differ between crash and kernel due to padding/ > alignment or optimization reasons. > > This patch deals with the 2 issues: 1) Poring for_each_vma() macro, and > all its dependencies from kernel source[3] to crash, to enable crash > maple tree vma iteration, 2) adapting the ported code with crash. > > [1]: https://github.com/oracle/linux-uek/commit/d19703645b80abe35dff1a88449d074b0b5b1bb1 > [2]: https://github.com/oracle/linux-uek/commit/91dee01f1ebb6b6587463b6ee6f7bbc4965f91d5 > [3]: https://github.com/oracle/linux-uek, maple/mainline branch > > Signed-off-by: Tao Liu <ltao@xxxxxxxxxx> > --- > Makefile | 10 +- > defs.h | 19 ++ > maple_tree.c | 833 +++++++++++++++++++++++++++++++++++++++++++++++++++ > maple_tree.h | 214 +++++++++++++ > 4 files changed, 1073 insertions(+), 3 deletions(-) > create mode 100644 maple_tree.c > create mode 100644 maple_tree.h > > diff --git a/Makefile b/Makefile > index 79aef17..6f19b77 100644 > --- a/Makefile > +++ b/Makefile > @@ -59,6 +59,7 @@ IBM_HFILES=ibm_common.h > SADUMP_HFILES=sadump.h > UNWIND_HFILES=unwind.h unwind_i.h rse.h unwind_x86.h unwind_x86_64.h > VMWARE_HFILES=vmware_vmss.h > +MAPLE_TREE_HFILES=maple_tree.h > > CFILES=main.c tools.c global_data.c memory.c filesys.c help.c task.c \ > kernel.c test.c gdb_interface.c configure.c net.c dev.c bpf.c \ > @@ -73,12 +74,12 @@ CFILES=main.c tools.c global_data.c memory.c filesys.c help.c task.c \ > xen_hyper.c xen_hyper_command.c xen_hyper_global_data.c \ > xen_hyper_dump_tables.c kvmdump.c qemu.c qemu-load.c sadump.c ipcs.c \ > ramdump.c vmware_vmss.c vmware_guestdump.c \ > - xen_dom0.c kaslr_helper.c sbitmap.c > + xen_dom0.c kaslr_helper.c sbitmap.c maple_tree.c > > SOURCE_FILES=${CFILES} ${GENERIC_HFILES} ${MCORE_HFILES} \ > ${REDHAT_CFILES} ${REDHAT_HFILES} ${UNWIND_HFILES} \ > ${LKCD_DUMP_HFILES} ${LKCD_TRACE_HFILES} ${LKCD_OBSOLETE_HFILES}\ > - ${IBM_HFILES} ${SADUMP_HFILES} ${VMWARE_HFILES} > + ${IBM_HFILES} ${SADUMP_HFILES} ${VMWARE_HFILES} ${MAPLE_TREE_HFILES} > > OBJECT_FILES=main.o tools.o global_data.o memory.o filesys.o help.o task.o \ > build_data.o kernel.o test.o gdb_interface.o net.o dev.o bpf.o \ > @@ -93,7 +94,7 @@ OBJECT_FILES=main.o tools.o global_data.o memory.o filesys.o help.o task.o \ > xen_hyper.o xen_hyper_command.o xen_hyper_global_data.o \ > xen_hyper_dump_tables.o kvmdump.o qemu.o qemu-load.o sadump.o ipcs.o \ > ramdump.o vmware_vmss.o vmware_guestdump.o \ > - xen_dom0.o kaslr_helper.o sbitmap.o > + xen_dom0.o kaslr_helper.o sbitmap.o maple_tree.o > > MEMORY_DRIVER_FILES=memory_driver/Makefile memory_driver/crash.c memory_driver/README > > @@ -536,6 +537,9 @@ kaslr_helper.o: ${GENERIC_HFILES} kaslr_helper.c > bpf.o: ${GENERIC_HFILES} bpf.c > ${CC} -c ${CRASH_CFLAGS} bpf.c ${WARNING_OPTIONS} ${WARNING_ERROR} > > +maple_tree.o: ${GENERIC_HFILES} ${MAPLE_TREE_HFILES} maple_tree.c > + ${CC} -c ${CRASH_CFLAGS} maple_tree.c ${WARNING_OPTIONS} ${WARNING_ERROR} > + > ${PROGRAM}: force > @$(MAKE) all > > diff --git a/defs.h b/defs.h > index afdcf6c..2978cec 100644 > --- a/defs.h > +++ b/defs.h > @@ -2181,6 +2181,22 @@ struct offset_table { /* stash of commonly-used offsets */ > long blk_mq_tags_nr_reserved_tags; > long blk_mq_tags_rqs; > long request_queue_hctx_table; > + long mm_struct_mm_mt; > + long maple_tree_ma_root; > + long maple_tree_ma_flags; > + long maple_node_parent; > + long maple_node_ma64; > + long maple_node_mr64; > + long maple_node_slot; > + long maple_arange_64_parent; > + long maple_arange_64_pivot; > + long maple_arange_64_slot; > + long maple_arange_64_meta; > + long maple_range_64_parent; > + long maple_range_64_pivot; > + long maple_range_64_slot; > + long maple_range_64_meta; > + long maple_metadata_end; > }; > > struct size_table { /* stash of commonly-used sizes */ > @@ -2351,6 +2367,8 @@ struct size_table { /* stash of commonly-used sizes */ > long sbitmap_queue; > long sbq_wait_state; > long blk_mq_tags; > + long maple_tree_struct; > + long maple_node_struct; > }; > > struct array_table { > @@ -5557,6 +5575,7 @@ int file_dump(ulong, ulong, ulong, int, int); > int same_file(char *, char *); > int cleanup_memory_driver(void); > > +void maple_init(void); > > /* > * help.c > diff --git a/maple_tree.c b/maple_tree.c > new file mode 100644 > index 0000000..058b769 > --- /dev/null > +++ b/maple_tree.c > @@ -0,0 +1,833 @@ > +// SPDX-License-Identifier: GPL-2.0+ > +/* > + * Maple Tree implementation > + * Copyright (c) 2018-2022 Oracle Corporation > + * Authors: Liam R. Howlett <Liam.Howlett@xxxxxxxxxx> > + * Matthew Wilcox <willy@xxxxxxxxxxxxx> > + * > + * The following are copied and modified from lib/maple_tree.c > + */ > + > +#include "maple_tree.h" > +#include "defs.h" > + > +/* Bit 1 indicates the root is a node */ > +#define MAPLE_ROOT_NODE 0x02 > +/* maple_type stored bit 3-6 */ > +#define MAPLE_ENODE_TYPE_SHIFT 0x03 > +/* Bit 2 means a NULL somewhere below */ > +#define MAPLE_ENODE_NULL 0x04 > + > +#define MA_ROOT_PARENT 1 > + > +#define MAPLE_PARENT_ROOT 0x01 > + > +#define MAPLE_PARENT_SLOT_SHIFT 0x03 > +#define MAPLE_PARENT_SLOT_MASK 0xF8 > + > +#define MAPLE_PARENT_16B_SLOT_SHIFT 0x02 > +#define MAPLE_PARENT_16B_SLOT_MASK 0xFC > + > +#define MAPLE_PARENT_RANGE64 0x06 > +#define MAPLE_PARENT_RANGE32 0x04 > +#define MAPLE_PARENT_NOT_RANGE16 0x02 > + > +unsigned char *mt_slots = NULL; > +unsigned char *mt_pivots = NULL; > + > +#define MAPLE_BUFSIZE 512 > + > +#define ma_parent_ptr(x) ((struct maple_pnode *)(x)) Unused. > +#define ma_enode_ptr(x) ((struct maple_enode *)(x)) > + > +static inline bool mas_is_ptr(struct ma_state *mas) > +{ > + return mas->node == MAS_ROOT; > +} > + > +static inline bool mas_is_start(struct ma_state *mas) > +{ > + return mas->node == MAS_START; > +} > + > +static inline void *mte_safe_root(const struct maple_enode *node) > +{ > + return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE); > +} > + > +static inline struct maple_node *mte_to_node(const struct maple_enode *entry) > +{ > + return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK); > +} > + > +static inline enum maple_type mte_node_type(const struct maple_enode *entry) > +{ > + return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) & > + MAPLE_NODE_TYPE_MASK; > +} > + > +static inline void *mas_root(struct ma_state *mas) > +{ > + char tree[MAPLE_BUFSIZE]; > + assert(SIZE(maple_tree_struct) <= MAPLE_BUFSIZE); The assert() may cause the crash coredump, could you please replace the assert() with the similar code block as below? if (SIZE(maple_tree_struct) <= MAPLE_BUFSIZE) error(FATAL, ""); > + > + readmem((ulonglong)(mas->tree), KVADDR, tree, SIZE(maple_tree_struct), > + "mas_root read maple_tree", FAULT_ON_ERROR); > + return *(void **)(tree + OFFSET(maple_tree_ma_root)); > +} > + > +static inline struct maple_enode *mas_start(struct ma_state *mas) > +{ > + if (mas_is_start(mas)) { > + struct maple_enode *root; > + > + mas->node = MAS_NONE; > + mas->min = 0; > + mas->max = ULONG_MAX; > + mas->depth = 0; > + mas->offset = 0; > + > + root = mas_root(mas); > + /* Tree with nodes */ > + if (xa_is_node(root)) { > + mas->node = mte_safe_root(root); > + return NULL; > + } > + > + /* empty tree */ > + if (!root) { > + // mas->offset = MAPLE_NODE_SLOTS; > + mas->offset = mt_slots[maple_dense]; > + return NULL; > + } > + > + /* Single entry tree */ > + mas->node = MAS_ROOT; > + // mas->offset = MAPLE_NODE_SLOTS; > + mas->offset = mt_slots[maple_dense]; > + > + /* Single entry tree. */ > + if (mas->index > 0) > + return NULL; > + > + return root; > + } > + > + return NULL; > +} > + > +static inline unsigned long *ma_pivots(struct maple_node *node, > + enum maple_type type) > +{ > + switch (type) { > + case maple_arange_64: > + return (unsigned long *)((char *)node + OFFSET(maple_node_ma64) > + + OFFSET(maple_arange_64_pivot)); > + case maple_range_64: > + case maple_leaf_64: > + return (unsigned long *)((char *)node + OFFSET(maple_node_mr64) > + + OFFSET(maple_range_64_pivot)); > + case maple_dense: > + return NULL; > + } > + return NULL; > +} > + > +static inline struct maple_metadata *ma_meta(struct maple_node *mn, > + enum maple_type mt) > +{ > + switch (mt) { > + case maple_arange_64: > + return (struct maple_metadata *)( > + (char *)mn + OFFSET(maple_node_ma64) > + + OFFSET(maple_arange_64_meta)); > + default: > + return (struct maple_metadata *)( > + (char *)mn + OFFSET(maple_node_mr64) > + + OFFSET(maple_range_64_meta)); > + } > +} > + > +static inline unsigned char ma_meta_end(struct maple_node *mn, > + enum maple_type mt) > +{ > + struct maple_metadata *meta = ma_meta(mn, mt); > + > + return *((char *)meta + OFFSET(maple_metadata_end)); > +} > + > +static inline unsigned char ma_data_end(struct maple_node *node, > + enum maple_type type, > + unsigned long *pivots, > + unsigned long max) > +{ > + unsigned char offset; > + > + if (type == maple_arange_64) > + return ma_meta_end(node, type); > + > + offset = mt_pivots[type] - 1; > + if (!pivots[offset]) > + return ma_meta_end(node, type); > + > + if (pivots[offset] == max) > + return offset; > + > + return mt_pivots[type]; > +} > + > +static inline bool ma_dead_node(const struct maple_node *node, > + const struct maple_node *orig_node) > +{ > + struct maple_node *parent = (struct maple_node *) > + (*(unsigned long *)((char *)node > + + OFFSET(maple_node_parent)) > + & ~MAPLE_NODE_MASK); > + return (parent == orig_node); > +} > + > +static inline void **ma_slots(struct maple_node *mn, enum maple_type mt) > +{ > + switch (mt) { > + default: > + case maple_arange_64: > + return (void **)((char *)mn + OFFSET(maple_node_ma64) > + + OFFSET(maple_arange_64_slot)); > + case maple_range_64: > + case maple_leaf_64: > + return (void **)((char *)mn + OFFSET(maple_node_mr64) > + + OFFSET(maple_range_64_slot)); > + case maple_dense: > + return (void **)((char *)mn + OFFSET(maple_node_slot)); > + } > +} > + > +static inline void *mt_slot(const struct maple_tree *mt, > + void **slots, unsigned char offset) > +{ > + return slots[offset]; > +} > + > +static inline bool ma_is_leaf(const enum maple_type type) > +{ > + return type < maple_range_64; > +} > + > +static inline void *mtree_range_walk(struct ma_state *mas) > +{ > + unsigned long *pivots; > + unsigned char offset; > + struct maple_node *node; > + struct maple_enode *next, *last; > + enum maple_type type; > + void **slots; > + unsigned char end; > + unsigned long max, min; > + unsigned long prev_max, prev_min; > + > + char tmp_node[MAPLE_BUFSIZE]; > + assert(SIZE(maple_node_struct) <= MAPLE_BUFSIZE); Ditto. > + > + last = next = mas->node; > + prev_min = min = mas->min; > + max = mas->max; > + do { > + offset = 0; > + last = next; > + node = mte_to_node(next); > + type = mte_node_type(next); > + readmem((ulonglong)node, KVADDR, tmp_node, SIZE(maple_node_struct), > + "mtree_range_walk read maple_node", FAULT_ON_ERROR); > + pivots = ma_pivots((struct maple_node *)tmp_node, type); > + end = ma_data_end((struct maple_node *)tmp_node, type, pivots, max); > + if (ma_dead_node((struct maple_node *)tmp_node, node)) > + goto dead_node; > + > + if (pivots[offset] >= mas->index) { > + prev_max = max; > + prev_min = min; > + max = pivots[offset]; > + goto next; > + } > + > + do { > + offset++; > + } while ((offset < end) && (pivots[offset] < mas->index)); > + > + prev_min = min; > + min = pivots[offset - 1] + 1; > + prev_max = max; > + if (offset < end && pivots[offset]) > + max = pivots[offset]; > + > +next: > + slots = ma_slots((struct maple_node *)tmp_node, type); > + next = mt_slot(mas->tree, slots, offset); > + if (ma_dead_node((struct maple_node *)tmp_node, node)) > + goto dead_node; > + } while (!ma_is_leaf(type)); > + > + mas->offset = offset; > + mas->index = min; > + mas->last = max; > + mas->min = prev_min; > + mas->max = prev_max; > + mas->node = last; > + return (void *) next; > + > +dead_node: > + mas_reset(mas); > + return NULL; > +} > + > +static inline void *mas_state_walk(struct ma_state *mas) > +{ > + void *entry; > + > + entry = mas_start(mas); > + if (mas_is_none(mas)) > + return NULL; > + > + if (mas_is_ptr(mas)) > + return entry; > + > + return mtree_range_walk(mas); > +} > + > +static inline bool mas_searchable(struct ma_state *mas) > +{ > + if (mas_is_none(mas)) > + return false; > + > + if (mas_is_ptr(mas)) > + return false; > + > + return true; > +} > + > +static inline struct maple_node *mas_mn(const struct ma_state *mas) > +{ > + return mte_to_node(mas->node); > +} > + > +static inline unsigned long > +mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset) > +{ > + if (offset) > + return pivots[offset - 1] + 1; > + > + return mas->min; > +} > + > +static inline void *mas_slot(struct ma_state *mas, void **slots, > + unsigned char offset) > +{ > + return mt_slot(mas->tree, slots, offset); > +} > + > +static inline unsigned long > +mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots, > + unsigned char piv, enum maple_type type) > +{ > + if (piv >= mt_pivots[type]) > + return mas->max; > + > + return pivots[piv]; > +} > + > +static inline void *mas_next_nentry(struct ma_state *mas, > + struct maple_node *node, unsigned long max, > + enum maple_type type, struct maple_node *orig_node) > +{ > + unsigned char count; > + unsigned long pivot; > + unsigned long *pivots; > + void **slots; > + void *entry; > + > + if (mas->last == mas->max) { > + mas->index = mas->max; > + return NULL; > + } > + > + pivots = ma_pivots(node, type); > + slots = ma_slots(node, type); > + mas->index = mas_safe_min(mas, pivots, mas->offset); > + if (ma_dead_node(node, orig_node)) > + return NULL; > + > + if (mas->index > max) > + return NULL; > + > + count = ma_data_end(node, type, pivots, mas->max); > + if (mas->offset > count) > + return NULL; > + > + while (mas->offset < count) { > + pivot = pivots[mas->offset]; > + entry = mas_slot(mas, slots, mas->offset); > + if (ma_dead_node(node, orig_node)) > + return NULL; > + > + if (entry) > + goto found; > + > + if (pivot >= max) > + return NULL; > + > + mas->index = pivot + 1; > + mas->offset++; > + } > + > + if (mas->index > mas->max) { > + mas->index = mas->last; > + return NULL; > + } > + > + pivot = mas_safe_pivot(mas, pivots, mas->offset, type); > + entry = mas_slot(mas, slots, mas->offset); > + if (ma_dead_node(node, orig_node)) > + return NULL; > + > + if (!pivot) > + return NULL; > + > + if (!entry) > + return NULL; > + > +found: > + mas->last = pivot; > + return entry; > +} > + > +static inline void mas_rewalk(struct ma_state *mas, unsigned long index) > +{ > + > +retry: > + mas_set(mas, index); > + mas_state_walk(mas); > + if (mas_is_start(mas)) > + goto retry; > + > + return; The "return" is redundant? > +} > + > +static inline bool ma_is_root(struct maple_node *node) > +{ > + return (*(unsigned long *)((char *)node > + + OFFSET(maple_node_parent)) > + & MA_ROOT_PARENT); > +} > + > +static inline struct maple_node *mte_parent(const struct maple_node *node) > +{ > + return (void *)(*(unsigned long *)((char *)node > + + OFFSET(maple_node_parent)) > + & ~MAPLE_NODE_MASK); > +} > + > +static inline unsigned long mte_parent_slot_mask(unsigned long parent) > +{ > + /* Note bit 1 == 0 means 16B */ > + if (parent & MAPLE_PARENT_NOT_RANGE16) > + return MAPLE_PARENT_SLOT_MASK; > + > + return MAPLE_PARENT_16B_SLOT_MASK; > +} > + > +static inline bool mt_is_alloc(struct maple_tree *mt) > +{ > + return (*(unsigned int *)((char *)mt > + + OFFSET(maple_tree_ma_flags)) > + & MT_FLAGS_ALLOC_RANGE); > +} > + > +static inline > +enum maple_type mte_parent_enum(struct maple_enode *p_enode, > + struct maple_tree *mt) > +{ > + unsigned long p_type; > + char tmp_tree[MAPLE_BUFSIZE]; > + assert(SIZE(maple_tree_struct) <= MAPLE_BUFSIZE); Ditto. > + > + p_type = (unsigned long)p_enode; > + if (p_type & MAPLE_PARENT_ROOT) > + return 0; /* Validated in the caller. */ Does this indicate that it is the enum maple_type: maple_dense? > + > + p_type &= MAPLE_NODE_MASK; > + p_type = p_type & ~(MAPLE_PARENT_ROOT | mte_parent_slot_mask(p_type)); > + > + switch (p_type) { > + case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */ > + readmem((ulonglong)mt, KVADDR, tmp_tree, SIZE(maple_tree_struct), > + "mte_parent_enum read maple_tree", FAULT_ON_ERROR); > + if (mt_is_alloc((struct maple_tree *)tmp_tree)) > + return maple_arange_64; > + return maple_range_64; > + } > + > + return 0; > +} > + > +static inline > +enum maple_type mas_parent_enum(struct ma_state *mas, struct maple_node *node) > +{ > + return mte_parent_enum(ma_enode_ptr(*(struct maple_pnode **) > + ((char *)node + OFFSET(maple_node_parent))), > + mas->tree); > +} > + > +static inline unsigned long mte_parent_shift(unsigned long parent) > +{ > + /* Note bit 1 == 0 means 16B */ > + if (parent & MAPLE_PARENT_NOT_RANGE16) > + return MAPLE_PARENT_SLOT_SHIFT; > + > + return MAPLE_PARENT_16B_SLOT_SHIFT; > +} > + > +static inline unsigned int mte_parent_slot(const struct maple_node *node) > +{ > + unsigned long val = *(unsigned long *)((char *)node > + + OFFSET(maple_node_parent)); > + > + /* Root. */ > + if (val & 1) > + return 0; > + > + /* > + * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost > + * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT > + */ > + return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val); > +} > + > +static inline struct maple_enode *mt_mk_node(const struct maple_node *node, > + enum maple_type type) > +{ > + return (void *)((unsigned long)node | > + (type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL); > +} > + > +static inline bool mte_is_root(struct maple_node *node) > +{ > + return ma_is_root(node); > +} > + > +static int mas_ascend(struct ma_state *mas) > +{ > + struct maple_enode *p_enode; /* parent enode. */ > + struct maple_enode *a_enode; /* ancestor enode. */ > + struct maple_node *a_node; /* ancestor node. */ > + struct maple_node *p_node; /* parent node. */ > + unsigned char a_slot; > + enum maple_type a_type; > + unsigned long min, max; > + unsigned long *pivots; > + unsigned char offset; > + bool set_max = false, set_min = false; > + > + char tmp_node[MAPLE_BUFSIZE]; > + assert(SIZE(maple_node_struct) <= MAPLE_BUFSIZE); Ditto. > + > + a_node = mas_mn(mas); > + readmem((ulonglong)a_node, KVADDR, tmp_node, SIZE(maple_node_struct), > + "mas_ascend read maple_node", FAULT_ON_ERROR); > + if (ma_is_root((struct maple_node *)tmp_node)) { > + mas->offset = 0; > + return 0; > + } > + > + readmem((ulonglong)(mte_to_node(mas->node)), KVADDR, tmp_node, > + SIZE(maple_node_struct), "mas_ascend read maple_node", FAULT_ON_ERROR); > + p_node = mte_parent((struct maple_node *)tmp_node); > + if (a_node == p_node) > + return 1; > + a_type = mas_parent_enum(mas, (struct maple_node *)tmp_node); > + offset = mte_parent_slot((struct maple_node *)tmp_node); > + a_enode = mt_mk_node(p_node, a_type); > + > + /* Check to make sure all parent information is still accurate */ > + if (p_node != mte_parent((struct maple_node *)tmp_node)) > + return 1; > + > + mas->node = a_enode; > + mas->offset = offset; > + > + readmem((ulonglong)(mte_to_node(a_enode)), KVADDR, tmp_node, > + SIZE(maple_node_struct), "mas_ascend read maple_node", FAULT_ON_ERROR); > + if (mte_is_root((struct maple_node *)tmp_node)) { > + mas->max = ULONG_MAX; > + mas->min = 0; > + return 0; > + } > + > + min = 0; > + max = ULONG_MAX; > + do { > + p_enode = a_enode; > + readmem((ulonglong)(mte_to_node(p_enode)), KVADDR, tmp_node, > + SIZE(maple_node_struct), > + "mas_ascend read maple_node", FAULT_ON_ERROR); > + a_type = mas_parent_enum(mas, (struct maple_node *)tmp_node); > + a_node = mte_parent((struct maple_node *)tmp_node); > + a_slot = mte_parent_slot((struct maple_node *)tmp_node); > + readmem((ulonglong)a_node, KVADDR, tmp_node, SIZE(maple_node_struct), > + "mas_ascend read maple_node", FAULT_ON_ERROR); > + pivots = ma_pivots((struct maple_node *)tmp_node, a_type); > + a_enode = mt_mk_node((struct maple_node *)tmp_node, a_type); > + > + if (!set_min && a_slot) { > + set_min = true; > + min = pivots[a_slot - 1] + 1; > + } > + > + if (!set_max && a_slot < mt_pivots[a_type]) { > + set_max = true; > + max = pivots[a_slot]; > + } > + > + if (ma_dead_node((struct maple_node *)tmp_node, a_node)) > + return 1; > + > + if (ma_is_root((struct maple_node *)tmp_node)) > + break; > + > + } while (!set_min || !set_max); > + > + mas->max = max; > + mas->min = min; > + return 0; > +} > + > +static inline int mas_next_node(struct ma_state *mas, struct maple_node *node, > + unsigned long max) > +{ > + unsigned long min, pivot; > + unsigned long *pivots; > + struct maple_enode *enode; > + int level = 0; > + unsigned char offset; > + enum maple_type mt; > + void **slots; > + > + char tmp_node[MAPLE_BUFSIZE]; > + assert(SIZE(maple_node_struct) <= MAPLE_BUFSIZE); Ditto. > + > + if (mas->max >= max) > + goto no_entry; > + > + level = 0; > + readmem((ulonglong)node, KVADDR, tmp_node, SIZE(maple_node_struct), > + "mas_next_node read maple_node", FAULT_ON_ERROR); > + do { > + if (ma_is_root((struct maple_node *)tmp_node)) > + goto no_entry; > + > + min = mas->max + 1; > + if (min > max) > + goto no_entry; > + > + if (mas_ascend(mas)) > + return 1; > + > + offset = mas->offset; > + level++; > + node = mas_mn(mas); > + mt = mte_node_type(mas->node); > + readmem((ulonglong)node, KVADDR, tmp_node, SIZE(maple_node_struct), > + "mas_next_node read maple_node", FAULT_ON_ERROR); > + pivots = ma_pivots((struct maple_node *)tmp_node, mt); > + } while (offset == ma_data_end((struct maple_node *)tmp_node, mt, pivots, mas->max)); > + > + slots = ma_slots((struct maple_node *)tmp_node, mt); > + pivot = mas_safe_pivot(mas, pivots, ++offset, mt); > + while (level > 1) { > + /* Descend, if necessary */ > + enode = mas_slot(mas, slots, offset); > + if (ma_dead_node((struct maple_node *)tmp_node, node)) > + return 1; > + > + mas->node = enode; > + level--; > + node = mas_mn(mas); > + mt = mte_node_type(mas->node); > + readmem((ulonglong)node, KVADDR, tmp_node, SIZE(maple_node_struct), > + "mas_next_node read maple_node", FAULT_ON_ERROR); > + slots = ma_slots((struct maple_node *)tmp_node, mt); > + pivots = ma_pivots((struct maple_node *)tmp_node, mt); > + offset = 0; > + pivot = pivots[0]; > + } > + > + enode = mas_slot(mas, slots, offset); > + if (ma_dead_node((struct maple_node *)tmp_node, node)) > + return 1; > + > + mas->node = enode; > + mas->min = min; > + mas->max = pivot; > + return 0; > + > +no_entry: > + if (ma_dead_node((struct maple_node *)tmp_node, node)) > + return 1; > + > + mas->node = MAS_NONE; > + return 0; > +} > + > +static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit) > +{ > + void *entry = NULL; > + struct maple_enode *prev_node; > + struct maple_node *node; > + unsigned char offset; > + unsigned long last; > + enum maple_type mt; > + > + char tmp_node[MAPLE_BUFSIZE]; > + assert(SIZE(maple_node_struct) <= MAPLE_BUFSIZE); Ditto. > + > + last = mas->last; > +retry: > + offset = mas->offset; > + prev_node = mas->node; > + node = mas_mn(mas); > + mt = mte_node_type(mas->node); > + mas->offset++; > + if (mas->offset >= mt_slots[mt]) { > + mas->offset = mt_slots[mt] - 1; > + goto next_node; > + } > + > + while (!mas_is_none(mas)) { > + readmem((ulonglong)node, KVADDR, tmp_node, SIZE(maple_node_struct), > + "mas_next_entry read maple_node", FAULT_ON_ERROR); > + entry = mas_next_nentry(mas, (struct maple_node *)tmp_node, limit, mt, node); > + if (ma_dead_node((struct maple_node *)tmp_node, node)) { > + mas_rewalk(mas, last); > + goto retry; > + } > + > + if (entry) > + return entry; > + > + if ((mas->index > limit)) > + break; > + > +next_node: > + prev_node = mas->node; > + offset = mas->offset; > + if (mas_next_node(mas, node, limit)) { > + mas_rewalk(mas, last); > + goto retry; > + } > + mas->offset = 0; > + node = mas_mn(mas); > + mt = mte_node_type(mas->node); > + } > + > + mas->index = mas->last = limit; > + mas->offset = offset; > + mas->node = prev_node; > + return NULL; > +} > + > +static void *mas_walk(struct ma_state *mas) > +{ > + void *entry; > + > +retry: > + entry = mas_state_walk(mas); > + if (mas_is_start(mas)) > + goto retry; > + > + if (mas_is_ptr(mas)) { > + if (!mas->index) { > + mas->last = 0; > + } else { > + mas->index = 1; > + mas->last = ULONG_MAX; > + } > + return entry; > + } > + > + if (mas_is_none(mas)) { > + mas->index = 0; > + mas->last = ULONG_MAX; > + } > + > + return entry; > +} > + > +void *mas_find(struct ma_state *mas, unsigned long max) > +{ > + if (mas_is_paused(mas)) { > + if (mas->last == ULONG_MAX) { > + mas->node = MAS_NONE; > + return NULL; > + } > + mas->node = MAS_START; > + mas->index = ++mas->last; > + } > + > + if (mas_is_start(mas)) { > + /* First run or continue */ > + void *entry; > + > + if (mas->index > max) > + return NULL; > + > + entry = mas_walk(mas); > + if (entry) > + return entry; > + } > + > + if (!mas_searchable(mas)) > + return NULL; > + > + /* Retries on dead nodes handled by mas_next_entry */ > + return mas_next_entry(mas, max); > +} > + > +/***********************************************/ > +void maple_init(void) > +{ > + int array_len; > + > + STRUCT_SIZE_INIT(maple_tree_struct, "maple_tree"); > + STRUCT_SIZE_INIT(maple_node_struct, "maple_node"); > + > + MEMBER_OFFSET_INIT(maple_tree_ma_root, "maple_tree", "ma_root"); > + MEMBER_OFFSET_INIT(maple_tree_ma_flags, "maple_tree", "ma_flags"); > + > + MEMBER_OFFSET_INIT(maple_node_parent, "maple_node", "parent"); > + MEMBER_OFFSET_INIT(maple_node_ma64, "maple_node", "ma64"); > + MEMBER_OFFSET_INIT(maple_node_mr64, "maple_node", "mr64"); > + MEMBER_OFFSET_INIT(maple_node_slot, "maple_node", "slot"); > + > + MEMBER_OFFSET_INIT(maple_arange_64_parent, "maple_arange_64", "parent"); > + MEMBER_OFFSET_INIT(maple_arange_64_pivot, "maple_arange_64", "pivot"); > + MEMBER_OFFSET_INIT(maple_arange_64_slot, "maple_arange_64", "slot"); > + MEMBER_OFFSET_INIT(maple_arange_64_meta, "maple_arange_64", "meta"); > + > + MEMBER_OFFSET_INIT(maple_range_64_parent, "maple_range_64", "parent"); > + MEMBER_OFFSET_INIT(maple_range_64_pivot, "maple_range_64", "pivot"); > + MEMBER_OFFSET_INIT(maple_range_64_slot, "maple_range_64", "slot"); > + MEMBER_OFFSET_INIT(maple_range_64_meta, "maple_range_64", "meta"); > + > + MEMBER_OFFSET_INIT(maple_metadata_end, "maple_metadata", "end"); > + > + array_len = get_array_length("mt_slots", NULL, sizeof(char)); > + mt_slots = calloc(array_len, sizeof(char)); > + readmem(symbol_value("mt_slots"), KVADDR, mt_slots, > + array_len * sizeof(char), "maple_init read mt_slots", > + FAULT_ON_ERROR); > + > + array_len = get_array_length("mt_pivots", NULL, sizeof(char)); > + mt_pivots = calloc(array_len, sizeof(char)); > + readmem(symbol_value("mt_pivots"), KVADDR, mt_pivots, > + array_len * sizeof(char), "maple_init read mt_pivots", > + FAULT_ON_ERROR); > +} > \ No newline at end of file > diff --git a/maple_tree.h b/maple_tree.h > new file mode 100644 > index 0000000..8b36c29 > --- /dev/null > +++ b/maple_tree.h > @@ -0,0 +1,214 @@ > +/* SPDX-License-Identifier: GPL-2.0+ */ > +#ifndef _MAPLE_TREE_H > +#define _MAPLE_TREE_H > +/* > + * Maple Tree - An RCU-safe adaptive tree for storing ranges > + * Copyright (c) 2018-2022 Oracle > + * Authors: Liam R. Howlett <Liam.Howlett@xxxxxxxxxx> > + * Matthew Wilcox <willy@xxxxxxxxxxxxx> > + * > + * eXtensible Arrays > + * Copyright (c) 2017 Microsoft Corporation > + * Author: Matthew Wilcox <willy@xxxxxxxxxxxxx> > + * > + * See Documentation/core-api/xarray.rst for how to use the XArray. > + */ > +#include <stdbool.h> > +#include <limits.h> > + > +/* > + * The following are copied and modified from include/linux/maple_tree.h > + * > + * Keep the empty struct names for debug purpose of compiler warning. > + * And can also keep the function prototypes consistent with kernel's > + * corresponding functions. > + */ > +struct maple_tree { }; > +struct maple_metadata { }; > +struct maple_range_64 { }; > +struct maple_arange_64 { }; > +struct maple_alloc { }; > +struct maple_node { }; > + Is it possible to define them with the "void *" ? For example: struct ma_state { void *tree; unsigned long index; ... void *node; ... }; Kernel may change them or move them out of the structure in the future, in this situation crash won't have to change accordingly. > +struct ma_state { > + struct maple_tree *tree; /* The tree we're operating in */ > + unsigned long index; /* The index we're operating on - range start */ > + unsigned long last; /* The last index we're operating on - range end */ > + struct maple_enode *node; /* The node containing this entry */ > + unsigned long min; /* The minimum index of this node - implied pivot min */ > + unsigned long max; /* The maximum index of this node - implied pivot max */ > + struct maple_alloc *alloc; /* Allocated nodes for this operation */ > + unsigned char depth; /* depth of tree descent during write */ > + unsigned char offset; > + unsigned char mas_flags; > +}; > + > +enum maple_type { > + maple_dense, > + maple_leaf_64, > + maple_range_64, > + maple_arange_64, > +}; > + > +#define MAS_START ((struct maple_enode *)1UL) > +#define MAS_ROOT ((struct maple_enode *)5UL) > +#define MAS_NONE ((struct maple_enode *)9UL) > +#define MAS_PAUSE ((struct maple_enode *)17UL) > +#define MA_ERROR(err) \ > + ((struct maple_enode *)(((unsigned long)err << 2) | 2UL)) > + > +#define MA_STATE(name, mt, first, end) \ > + struct ma_state name = { \ > + .tree = mt, \ > + .index = first, \ > + .last = end, \ > + .node = MAS_START, \ > + .min = 0, \ > + .max = ULONG_MAX, \ > + } The above two macros are unused. > + > +#define MAPLE_NODE_MASK 255UL > + > +#define MT_FLAGS_ALLOC_RANGE 0x01 > +#define MT_FLAGS_USE_RCU 0x02 Unused. > +#define MT_FLAGS_HEIGHT_OFFSET 0x02 > +#define MT_FLAGS_HEIGHT_MASK 0x7C > +#define MT_FLAGS_LOCK_MASK 0x300 > +#define MT_FLAGS_LOCK_IRQ 0x100 > +#define MT_FLAGS_LOCK_BH 0x200 > +#define MT_FLAGS_LOCK_EXTERN 0x300 The above four macros are unused. > + > +#define MAPLE_HEIGHT_MAX 31 Unused. > + > +#define MAPLE_NODE_TYPE_MASK 0x0F > +#define MAPLE_NODE_TYPE_SHIFT 0x03 > + > +#define MAPLE_RESERVED_RANGE 4096 > + > +static inline void mas_reset(struct ma_state *mas) > +{ > + mas->node = MAS_START; > +} > + > +static inline > +void mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last) > +{ > + mas->index = start; > + mas->last = last; > + mas->node = MAS_START; > +} > + > +static inline void mas_set(struct ma_state *mas, unsigned long index) > +{ > + mas_set_range(mas, index, index); > +} > + > +static inline bool mas_is_none(struct ma_state *mas) > +{ ()> + return mas->node == MAS_NONE; > +} > + > +static inline bool mas_is_paused(struct ma_state *mas) > +{ > + return mas->node == MAS_PAUSE; > +} > + > +/* > + * The following are copied and modified from include/linux/xarray.h > + */ > + > +#define MAX_ERRNO 4095 The macro MAX_ERRNO is unused because the xa_is_err() is not called. > +#define XA_ZERO_ENTRY xa_mk_internal(257) > +#define XA_RETRY_ENTRY xa_mk_internal(256) The macro XA_RETRY_ENTRY is unused because the xa_is_advanced() is not called by other functions. > + > +static inline void *xa_mk_internal(unsigned long v) > +{ > + return (void *)((v << 2) | 2); > +} > + > +static inline bool xa_is_internal(const void *entry) > +{ > + return ((unsigned long)entry & 3) == 2; > +} > + > +static inline bool xa_is_err(const void *entry) > +{ > + return xa_is_internal(entry) && > + entry >= xa_mk_internal(-MAX_ERRNO); > +} The xa_is_err() is unused. > + > +static inline int xa_err(void *entry) > +{ > + /* xa_to_internal() would not do sign extension. */ > + if (xa_is_err(entry)) > + return (long)entry >> 2; > + return 0; > +} Ditto. > + > +static inline bool xa_is_node(const void *entry) > +{ > + return xa_is_internal(entry) && (unsigned long)entry > 4096; > +} > + > +static inline bool xa_is_value(const void *entry) > +{ > + return (unsigned long)entry & 1; > +} > + > +static inline bool xa_is_zero(const void *entry) > +{ > + return entry == XA_ZERO_ENTRY; > +} > + > +static inline unsigned long xa_to_internal(const void *entry) > +{ > + return (unsigned long)entry >> 2; > +} > + > +static inline unsigned long xa_to_value(const void *entry) > +{ > + return (unsigned long)entry >> 1; > +} > + > +static inline bool xa_is_advanced(const void *entry) > +{ > + return xa_is_internal(entry) && (entry <= XA_RETRY_ENTRY); > +} Ditto. Thanks. Lianbo > + > +/* > + * The following are copied and modified from include/linux/mm.h > + */ > + > +struct vma_iterator { > + struct ma_state mas; > +}; > + > +#define VMA_ITERATOR(name, mm_mt, addr) \ > + struct vma_iterator name = { \ > + .mas = { \ > + .tree = mm_mt, \ > + .index = addr, \ > + .node = MAS_START, \ > + }, \ > + } > + > +void *mas_find(struct ma_state *, unsigned long); > + > +static void *vma_find(struct vma_iterator *vmi, unsigned long max) > +{ > + return mas_find(&vmi->mas, max); > +} > + > + __attribute__((unused)) static void *vma_next(struct vma_iterator *vmi) > +{ > + /* > + * Uses vma_find() to get the first VMA when the iterator starts. > + * Calling mas_next() could skip the first entry. > + */ > + return vma_find(vmi, ULONG_MAX); > +} > + > +#define for_each_vma(vmi, vma) \ > + while ((vma = (ulong)vma_next(&(vmi))) != 0) > + > +#endif /* _MAPLE_TREE_H */ > \ No newline at end of file > -- > 2.33.1 -- Crash-utility mailing list Crash-utility@xxxxxxxxxx https://listman.redhat.com/mailman/listinfo/crash-utility Contribution Guidelines: https://github.com/crash-utility/crash/wiki