Re: [PATCH v2 1/6] Port the maple tree data structures and main functions

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Oct 25, 2022 at 8:39 PM <crash-utility-request@xxxxxxxxxx> wrote:
> Date: Tue, 25 Oct 2022 20:38:20 +0800
> From: Tao Liu <ltao@xxxxxxxxxx>
> To: crash-utility@xxxxxxxxxx
> Subject:  [PATCH v2 1/6] Port the maple tree data
>         structures and main functions
> Message-ID: <20221025123825.36421-2-ltao@xxxxxxxxxx>
> Content-Type: text/plain; charset="US-ASCII"; x-default=true
>
> There are 2 ways to iterate vm_area_struct: 1) by rbtree,
> aka vma.vm_rb; 2) by linked list, aka vma.vm_prev/next.
> However for linux maple tree patch[1][2], vm_rb and vm_prev/next
> are removed from vm_area_struct. For memory.c:vm_area_dump
> of crash, it mainly uses linked list as a way of vma iteration,
> which will not work for this case. So maple tree iteration
> need to be ported to crash.
>
> For crash, currently it only iteratively read the maple tree,
> no more rcu safe or maple tree modification features
> needed. So we only port a subset of kernel maple tree
> features. In addition, we need to modify the ported kernel
> source code, making it compatible with crash.
>
> The formal crash way of vmcore struct member resolving is:
>
>     readmem(node, KVADDR, buf, SIZE(buf), "", flag);
>     return buf + OFFSET(member);
>
> which is the reimplementation of kernel way of member resolving:
>
>     return node->member;
>
> The 1st one is arch independent, it uses gdb to resolve the OFFSET
> of members, so crash don't need to know what the inside of the
> struct is, even if the struct changes for new kernel version. The 2nd
> one is arch dependent, the struct need to be ported to crash, and the
> OFFSET of members may differ between crash and kernel due to padding/
> alignment or optimization reasons.
>
> This patch deals with the 2 issues: 1) Poring for_each_vma() macro, and
> all its dependencies from kernel source[3] to crash, to enable crash
> maple tree vma iteration, 2) adapting the ported code with crash.
>
> [1]: https://github.com/oracle/linux-uek/commit/d19703645b80abe35dff1a88449d074b0b5b1bb1
> [2]: https://github.com/oracle/linux-uek/commit/91dee01f1ebb6b6587463b6ee6f7bbc4965f91d5
> [3]: https://github.com/oracle/linux-uek, maple/mainline branch
>
> Signed-off-by: Tao Liu <ltao@xxxxxxxxxx>
> ---
>  Makefile     |  10 +-
>  defs.h       |  19 ++
>  maple_tree.c | 833 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  maple_tree.h | 214 +++++++++++++
>  4 files changed, 1073 insertions(+), 3 deletions(-)
>  create mode 100644 maple_tree.c
>  create mode 100644 maple_tree.h
>
> diff --git a/Makefile b/Makefile
> index 79aef17..6f19b77 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -59,6 +59,7 @@ IBM_HFILES=ibm_common.h
>  SADUMP_HFILES=sadump.h
>  UNWIND_HFILES=unwind.h unwind_i.h rse.h unwind_x86.h unwind_x86_64.h
>  VMWARE_HFILES=vmware_vmss.h
> +MAPLE_TREE_HFILES=maple_tree.h
>
>  CFILES=main.c tools.c global_data.c memory.c filesys.c help.c task.c \
>         kernel.c test.c gdb_interface.c configure.c net.c dev.c bpf.c \
> @@ -73,12 +74,12 @@ CFILES=main.c tools.c global_data.c memory.c filesys.c help.c task.c \
>         xen_hyper.c xen_hyper_command.c xen_hyper_global_data.c \
>         xen_hyper_dump_tables.c kvmdump.c qemu.c qemu-load.c sadump.c ipcs.c \
>         ramdump.c vmware_vmss.c vmware_guestdump.c \
> -       xen_dom0.c kaslr_helper.c sbitmap.c
> +       xen_dom0.c kaslr_helper.c sbitmap.c maple_tree.c
>
>  SOURCE_FILES=${CFILES} ${GENERIC_HFILES} ${MCORE_HFILES} \
>         ${REDHAT_CFILES} ${REDHAT_HFILES} ${UNWIND_HFILES} \
>         ${LKCD_DUMP_HFILES} ${LKCD_TRACE_HFILES} ${LKCD_OBSOLETE_HFILES}\
> -       ${IBM_HFILES} ${SADUMP_HFILES} ${VMWARE_HFILES}
> +       ${IBM_HFILES} ${SADUMP_HFILES} ${VMWARE_HFILES} ${MAPLE_TREE_HFILES}
>
>  OBJECT_FILES=main.o tools.o global_data.o memory.o filesys.o help.o task.o \
>         build_data.o kernel.o test.o gdb_interface.o net.o dev.o bpf.o \
> @@ -93,7 +94,7 @@ OBJECT_FILES=main.o tools.o global_data.o memory.o filesys.o help.o task.o \
>         xen_hyper.o xen_hyper_command.o xen_hyper_global_data.o \
>         xen_hyper_dump_tables.o kvmdump.o qemu.o qemu-load.o sadump.o ipcs.o \
>         ramdump.o vmware_vmss.o vmware_guestdump.o \
> -       xen_dom0.o kaslr_helper.o sbitmap.o
> +       xen_dom0.o kaslr_helper.o sbitmap.o maple_tree.o
>
>  MEMORY_DRIVER_FILES=memory_driver/Makefile memory_driver/crash.c memory_driver/README
>
> @@ -536,6 +537,9 @@ kaslr_helper.o: ${GENERIC_HFILES} kaslr_helper.c
>  bpf.o: ${GENERIC_HFILES} bpf.c
>         ${CC} -c ${CRASH_CFLAGS} bpf.c ${WARNING_OPTIONS} ${WARNING_ERROR}
>
> +maple_tree.o: ${GENERIC_HFILES} ${MAPLE_TREE_HFILES} maple_tree.c
> +       ${CC} -c ${CRASH_CFLAGS} maple_tree.c ${WARNING_OPTIONS} ${WARNING_ERROR}
> +
>  ${PROGRAM}: force
>         @$(MAKE) all
>
> diff --git a/defs.h b/defs.h
> index afdcf6c..2978cec 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -2181,6 +2181,22 @@ struct offset_table {                    /* stash of commonly-used offsets */
>         long blk_mq_tags_nr_reserved_tags;
>         long blk_mq_tags_rqs;
>         long request_queue_hctx_table;
> +       long mm_struct_mm_mt;
> +       long maple_tree_ma_root;
> +       long maple_tree_ma_flags;
> +       long maple_node_parent;
> +       long maple_node_ma64;
> +       long maple_node_mr64;
> +       long maple_node_slot;
> +       long maple_arange_64_parent;
> +       long maple_arange_64_pivot;
> +       long maple_arange_64_slot;
> +       long maple_arange_64_meta;
> +       long maple_range_64_parent;
> +       long maple_range_64_pivot;
> +       long maple_range_64_slot;
> +       long maple_range_64_meta;
> +       long maple_metadata_end;
>  };
>
>  struct size_table {         /* stash of commonly-used sizes */
> @@ -2351,6 +2367,8 @@ struct size_table {         /* stash of commonly-used sizes */
>         long sbitmap_queue;
>         long sbq_wait_state;
>         long blk_mq_tags;
> +       long maple_tree_struct;
> +       long maple_node_struct;
>  };
>
>  struct array_table {
> @@ -5557,6 +5575,7 @@ int file_dump(ulong, ulong, ulong, int, int);
>  int same_file(char *, char *);
>  int cleanup_memory_driver(void);
>
> +void maple_init(void);
>
>  /*
>   *  help.c
> diff --git a/maple_tree.c b/maple_tree.c
> new file mode 100644
> index 0000000..058b769
> --- /dev/null
> +++ b/maple_tree.c
> @@ -0,0 +1,833 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * Maple Tree implementation
> + * Copyright (c) 2018-2022 Oracle Corporation
> + * Authors: Liam R. Howlett <Liam.Howlett@xxxxxxxxxx>
> + *         Matthew Wilcox <willy@xxxxxxxxxxxxx>
> + *
> + * The following are copied and modified from lib/maple_tree.c
> + */
> +
> +#include "maple_tree.h"
> +#include "defs.h"
> +
> +/* Bit 1 indicates the root is a node */
> +#define MAPLE_ROOT_NODE                        0x02
> +/* maple_type stored bit 3-6 */
> +#define MAPLE_ENODE_TYPE_SHIFT         0x03
> +/* Bit 2 means a NULL somewhere below */
> +#define MAPLE_ENODE_NULL               0x04
> +
> +#define MA_ROOT_PARENT 1
> +
> +#define MAPLE_PARENT_ROOT              0x01
> +
> +#define MAPLE_PARENT_SLOT_SHIFT                0x03
> +#define MAPLE_PARENT_SLOT_MASK         0xF8
> +
> +#define MAPLE_PARENT_16B_SLOT_SHIFT    0x02
> +#define MAPLE_PARENT_16B_SLOT_MASK     0xFC
> +
> +#define MAPLE_PARENT_RANGE64           0x06
> +#define MAPLE_PARENT_RANGE32           0x04
> +#define MAPLE_PARENT_NOT_RANGE16       0x02
> +
> +unsigned char *mt_slots = NULL;
> +unsigned char *mt_pivots = NULL;
> +
> +#define MAPLE_BUFSIZE                  512
> +
> +#define ma_parent_ptr(x) ((struct maple_pnode *)(x))
Unused.

> +#define ma_enode_ptr(x) ((struct maple_enode *)(x))
> +
> +static inline bool mas_is_ptr(struct ma_state *mas)
> +{
> +       return mas->node == MAS_ROOT;
> +}
> +
> +static inline bool mas_is_start(struct ma_state *mas)
> +{
> +       return mas->node == MAS_START;
> +}
> +
> +static inline void *mte_safe_root(const struct maple_enode *node)
> +{
> +       return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE);
> +}
> +
> +static inline struct maple_node *mte_to_node(const struct maple_enode *entry)
> +{
> +       return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK);
> +}
> +
> +static inline enum maple_type mte_node_type(const struct maple_enode *entry)
> +{
> +       return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) &
> +               MAPLE_NODE_TYPE_MASK;
> +}
> +
> +static inline void *mas_root(struct ma_state *mas)
> +{
> +       char tree[MAPLE_BUFSIZE];
> +       assert(SIZE(maple_tree_struct) <= MAPLE_BUFSIZE);

The assert() may cause the crash coredump, could you please replace
the assert() with the similar code block as below?

if (SIZE(maple_tree_struct) <= MAPLE_BUFSIZE)
    error(FATAL, "");

> +
> +       readmem((ulonglong)(mas->tree), KVADDR, tree, SIZE(maple_tree_struct),
> +               "mas_root read maple_tree", FAULT_ON_ERROR);
> +       return *(void **)(tree + OFFSET(maple_tree_ma_root));
> +}
> +
> +static inline struct maple_enode *mas_start(struct ma_state *mas)
> +{
> +       if (mas_is_start(mas)) {
> +               struct maple_enode *root;
> +
> +               mas->node = MAS_NONE;
> +               mas->min = 0;
> +               mas->max = ULONG_MAX;
> +               mas->depth = 0;
> +               mas->offset = 0;
> +
> +               root = mas_root(mas);
> +               /* Tree with nodes */
> +               if (xa_is_node(root)) {
> +                       mas->node = mte_safe_root(root);
> +                       return NULL;
> +               }
> +
> +               /* empty tree */
> +               if (!root) {
> +                       // mas->offset = MAPLE_NODE_SLOTS;
> +                       mas->offset = mt_slots[maple_dense];
> +                       return NULL;
> +               }
> +
> +               /* Single entry tree */
> +               mas->node = MAS_ROOT;
> +               // mas->offset = MAPLE_NODE_SLOTS;
> +               mas->offset = mt_slots[maple_dense];
> +
> +               /* Single entry tree. */
> +               if (mas->index > 0)
> +                       return NULL;
> +
> +               return root;
> +       }
> +
> +       return NULL;
> +}
> +
> +static inline unsigned long *ma_pivots(struct maple_node *node,
> +                                      enum maple_type type)
> +{
> +       switch (type) {
> +       case maple_arange_64:
> +               return (unsigned long *)((char *)node + OFFSET(maple_node_ma64)
> +                                        + OFFSET(maple_arange_64_pivot));
> +       case maple_range_64:
> +       case maple_leaf_64:
> +               return (unsigned long *)((char *)node + OFFSET(maple_node_mr64)
> +                                        + OFFSET(maple_range_64_pivot));
> +       case maple_dense:
> +               return NULL;
> +       }
> +       return NULL;
> +}
> +
> +static inline struct maple_metadata *ma_meta(struct maple_node *mn,
> +                                            enum maple_type mt)
> +{
> +       switch (mt) {
> +       case maple_arange_64:
> +               return (struct maple_metadata *)(
> +                       (char *)mn + OFFSET(maple_node_ma64)
> +                                  + OFFSET(maple_arange_64_meta));
> +       default:
> +               return (struct maple_metadata *)(
> +                       (char *)mn + OFFSET(maple_node_mr64)
> +                                  + OFFSET(maple_range_64_meta));
> +       }
> +}
> +
> +static inline unsigned char ma_meta_end(struct maple_node *mn,
> +                                       enum maple_type mt)
> +{
> +       struct maple_metadata *meta = ma_meta(mn, mt);
> +
> +       return *((char *)meta + OFFSET(maple_metadata_end));
> +}
> +
> +static inline unsigned char ma_data_end(struct maple_node *node,
> +                                       enum maple_type type,
> +                                       unsigned long *pivots,
> +                                       unsigned long max)
> +{
> +       unsigned char offset;
> +
> +       if (type == maple_arange_64)
> +               return ma_meta_end(node, type);
> +
> +       offset = mt_pivots[type] - 1;
> +       if (!pivots[offset])
> +               return ma_meta_end(node, type);
> +
> +       if (pivots[offset] == max)
> +               return offset;
> +
> +       return mt_pivots[type];
> +}
> +
> +static inline bool ma_dead_node(const struct maple_node *node,
> +                               const struct maple_node *orig_node)
> +{
> +       struct maple_node *parent = (struct maple_node *)
> +                                    (*(unsigned long *)((char *)node
> +                                                        + OFFSET(maple_node_parent))
> +                                       & ~MAPLE_NODE_MASK);
> +       return (parent == orig_node);
> +}
> +
> +static inline void **ma_slots(struct maple_node *mn, enum maple_type mt)
> +{
> +       switch (mt) {
> +       default:
> +       case maple_arange_64:
> +               return (void **)((char *)mn + OFFSET(maple_node_ma64)
> +                                + OFFSET(maple_arange_64_slot));
> +       case maple_range_64:
> +       case maple_leaf_64:
> +               return (void **)((char *)mn + OFFSET(maple_node_mr64)
> +                                + OFFSET(maple_range_64_slot));
> +       case maple_dense:
> +               return (void **)((char *)mn + OFFSET(maple_node_slot));
> +       }
> +}
> +
> +static inline void *mt_slot(const struct maple_tree *mt,
> +                           void **slots, unsigned char offset)
> +{
> +       return slots[offset];
> +}
> +
> +static inline bool ma_is_leaf(const enum maple_type type)
> +{
> +       return type < maple_range_64;
> +}
> +
> +static inline void *mtree_range_walk(struct ma_state *mas)
> +{
> +       unsigned long *pivots;
> +       unsigned char offset;
> +       struct maple_node *node;
> +       struct maple_enode *next, *last;
> +       enum maple_type type;
> +       void **slots;
> +       unsigned char end;
> +       unsigned long max, min;
> +       unsigned long prev_max, prev_min;
> +
> +       char tmp_node[MAPLE_BUFSIZE];
> +       assert(SIZE(maple_node_struct) <= MAPLE_BUFSIZE);

Ditto.

> +
> +       last = next = mas->node;
> +       prev_min = min = mas->min;
> +       max = mas->max;
> +       do {
> +               offset = 0;
> +               last = next;
> +               node = mte_to_node(next);
> +               type = mte_node_type(next);
> +               readmem((ulonglong)node, KVADDR, tmp_node, SIZE(maple_node_struct),
> +                       "mtree_range_walk read maple_node", FAULT_ON_ERROR);
> +               pivots = ma_pivots((struct maple_node *)tmp_node, type);
> +               end = ma_data_end((struct maple_node *)tmp_node, type, pivots, max);
> +               if (ma_dead_node((struct maple_node *)tmp_node, node))
> +                       goto dead_node;
> +
> +               if (pivots[offset] >= mas->index) {
> +                       prev_max = max;
> +                       prev_min = min;
> +                       max = pivots[offset];
> +                       goto next;
> +               }
> +
> +               do {
> +                       offset++;
> +               } while ((offset < end) && (pivots[offset] < mas->index));
> +
> +               prev_min = min;
> +               min = pivots[offset - 1] + 1;
> +               prev_max = max;
> +               if (offset < end && pivots[offset])
> +                       max = pivots[offset];
> +
> +next:
> +               slots = ma_slots((struct maple_node *)tmp_node, type);
> +               next = mt_slot(mas->tree, slots, offset);
> +               if (ma_dead_node((struct maple_node *)tmp_node, node))
> +                       goto dead_node;
> +       } while (!ma_is_leaf(type));
> +
> +       mas->offset = offset;
> +       mas->index = min;
> +       mas->last = max;
> +       mas->min = prev_min;
> +       mas->max = prev_max;
> +       mas->node = last;
> +       return (void *) next;
> +
> +dead_node:
> +       mas_reset(mas);
> +       return NULL;
> +}
> +
> +static inline void *mas_state_walk(struct ma_state *mas)
> +{
> +       void *entry;
> +
> +       entry = mas_start(mas);
> +       if (mas_is_none(mas))
> +               return NULL;
> +
> +       if (mas_is_ptr(mas))
> +               return entry;
> +
> +       return mtree_range_walk(mas);
> +}
> +
> +static inline bool mas_searchable(struct ma_state *mas)
> +{
> +       if (mas_is_none(mas))
> +               return false;
> +
> +       if (mas_is_ptr(mas))
> +               return false;
> +
> +       return true;
> +}
> +
> +static inline struct maple_node *mas_mn(const struct ma_state *mas)
> +{
> +       return mte_to_node(mas->node);
> +}
> +
> +static inline unsigned long
> +mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset)
> +{
> +       if (offset)
> +               return pivots[offset - 1] + 1;
> +
> +       return mas->min;
> +}
> +
> +static inline void *mas_slot(struct ma_state *mas, void **slots,
> +                            unsigned char offset)
> +{
> +       return mt_slot(mas->tree, slots, offset);
> +}
> +
> +static inline unsigned long
> +mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots,
> +              unsigned char piv, enum maple_type type)
> +{
> +       if (piv >= mt_pivots[type])
> +               return mas->max;
> +
> +       return pivots[piv];
> +}
> +
> +static inline void *mas_next_nentry(struct ma_state *mas,
> +           struct maple_node *node, unsigned long max,
> +           enum maple_type type, struct maple_node *orig_node)
> +{
> +       unsigned char count;
> +       unsigned long pivot;
> +       unsigned long *pivots;
> +       void **slots;
> +       void *entry;
> +
> +       if (mas->last == mas->max) {
> +               mas->index = mas->max;
> +               return NULL;
> +       }
> +
> +       pivots = ma_pivots(node, type);
> +       slots = ma_slots(node, type);
> +       mas->index = mas_safe_min(mas, pivots, mas->offset);
> +       if (ma_dead_node(node, orig_node))
> +               return NULL;
> +
> +       if (mas->index > max)
> +               return NULL;
> +
> +       count = ma_data_end(node, type, pivots, mas->max);
> +       if (mas->offset > count)
> +               return NULL;
> +
> +       while (mas->offset < count) {
> +               pivot = pivots[mas->offset];
> +               entry = mas_slot(mas, slots, mas->offset);
> +               if (ma_dead_node(node, orig_node))
> +                       return NULL;
> +
> +               if (entry)
> +                       goto found;
> +
> +               if (pivot >= max)
> +                       return NULL;
> +
> +               mas->index = pivot + 1;
> +               mas->offset++;
> +       }
> +
> +       if (mas->index > mas->max) {
> +               mas->index = mas->last;
> +               return NULL;
> +       }
> +
> +       pivot = mas_safe_pivot(mas, pivots, mas->offset, type);
> +       entry = mas_slot(mas, slots, mas->offset);
> +       if (ma_dead_node(node, orig_node))
> +               return NULL;
> +
> +       if (!pivot)
> +               return NULL;
> +
> +       if (!entry)
> +               return NULL;
> +
> +found:
> +       mas->last = pivot;
> +       return entry;
> +}
> +
> +static inline void mas_rewalk(struct ma_state *mas, unsigned long index)
> +{
> +
> +retry:
> +       mas_set(mas, index);
> +       mas_state_walk(mas);
> +       if (mas_is_start(mas))
> +               goto retry;
> +
> +       return;

The "return" is redundant?

> +}
> +
> +static inline bool ma_is_root(struct maple_node *node)
> +{
> +       return (*(unsigned long *)((char *)node
> +                                  + OFFSET(maple_node_parent))
> +               & MA_ROOT_PARENT);
> +}
> +
> +static inline struct maple_node *mte_parent(const struct maple_node *node)
> +{
> +       return (void *)(*(unsigned long *)((char *)node
> +                                          + OFFSET(maple_node_parent))
> +                       & ~MAPLE_NODE_MASK);
> +}
> +
> +static inline unsigned long mte_parent_slot_mask(unsigned long parent)
> +{
> +       /* Note bit 1 == 0 means 16B */
> +       if (parent & MAPLE_PARENT_NOT_RANGE16)
> +               return MAPLE_PARENT_SLOT_MASK;
> +
> +       return MAPLE_PARENT_16B_SLOT_MASK;
> +}
> +
> +static inline bool mt_is_alloc(struct maple_tree *mt)
> +{
> +       return (*(unsigned int *)((char *)mt
> +                                 + OFFSET(maple_tree_ma_flags))
> +               & MT_FLAGS_ALLOC_RANGE);
> +}
> +
> +static inline
> +enum maple_type mte_parent_enum(struct maple_enode *p_enode,
> +                               struct maple_tree *mt)
> +{
> +       unsigned long p_type;
> +       char tmp_tree[MAPLE_BUFSIZE];
> +       assert(SIZE(maple_tree_struct) <= MAPLE_BUFSIZE);

Ditto.

> +
> +       p_type = (unsigned long)p_enode;
> +       if (p_type & MAPLE_PARENT_ROOT)
> +               return 0; /* Validated in the caller. */

Does this indicate that it is the enum maple_type: maple_dense?

> +
> +       p_type &= MAPLE_NODE_MASK;
> +       p_type = p_type & ~(MAPLE_PARENT_ROOT | mte_parent_slot_mask(p_type));
> +
> +       switch (p_type) {
> +       case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */
> +               readmem((ulonglong)mt, KVADDR, tmp_tree, SIZE(maple_tree_struct),
> +                       "mte_parent_enum read maple_tree", FAULT_ON_ERROR);
> +               if (mt_is_alloc((struct maple_tree *)tmp_tree))
> +                       return maple_arange_64;
> +               return maple_range_64;
> +       }
> +
> +       return 0;
> +}
> +
> +static inline
> +enum maple_type mas_parent_enum(struct ma_state *mas, struct maple_node *node)
> +{
> +       return mte_parent_enum(ma_enode_ptr(*(struct maple_pnode **)
> +                                           ((char *)node + OFFSET(maple_node_parent))),
> +                              mas->tree);
> +}
> +
> +static inline unsigned long mte_parent_shift(unsigned long parent)
> +{
> +       /* Note bit 1 == 0 means 16B */
> +       if (parent & MAPLE_PARENT_NOT_RANGE16)
> +               return MAPLE_PARENT_SLOT_SHIFT;
> +
> +       return MAPLE_PARENT_16B_SLOT_SHIFT;
> +}
> +
> +static inline unsigned int mte_parent_slot(const struct maple_node *node)
> +{
> +       unsigned long val = *(unsigned long *)((char *)node
> +                                              + OFFSET(maple_node_parent));
> +
> +       /* Root. */
> +       if (val & 1)
> +               return 0;
> +
> +       /*
> +        * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost
> +        * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT
> +        */
> +       return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val);
> +}
> +
> +static inline struct maple_enode *mt_mk_node(const struct maple_node *node,
> +                                            enum maple_type type)
> +{
> +       return (void *)((unsigned long)node |
> +                       (type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL);
> +}
> +
> +static inline bool mte_is_root(struct maple_node *node)
> +{
> +       return ma_is_root(node);
> +}
> +
> +static int mas_ascend(struct ma_state *mas)
> +{
> +       struct maple_enode *p_enode; /* parent enode. */
> +       struct maple_enode *a_enode; /* ancestor enode. */
> +       struct maple_node *a_node; /* ancestor node. */
> +       struct maple_node *p_node; /* parent node. */
> +       unsigned char a_slot;
> +       enum maple_type a_type;
> +       unsigned long min, max;
> +       unsigned long *pivots;
> +       unsigned char offset;
> +       bool set_max = false, set_min = false;
> +
> +       char tmp_node[MAPLE_BUFSIZE];
> +       assert(SIZE(maple_node_struct) <= MAPLE_BUFSIZE);

Ditto.

> +
> +       a_node = mas_mn(mas);
> +       readmem((ulonglong)a_node, KVADDR, tmp_node, SIZE(maple_node_struct),
> +               "mas_ascend read maple_node", FAULT_ON_ERROR);
> +       if (ma_is_root((struct maple_node *)tmp_node)) {
> +               mas->offset = 0;
> +               return 0;
> +       }
> +
> +       readmem((ulonglong)(mte_to_node(mas->node)), KVADDR, tmp_node,
> +               SIZE(maple_node_struct), "mas_ascend read maple_node", FAULT_ON_ERROR);
> +       p_node = mte_parent((struct maple_node *)tmp_node);
> +       if (a_node == p_node)
> +               return 1;
> +       a_type = mas_parent_enum(mas, (struct maple_node *)tmp_node);
> +       offset = mte_parent_slot((struct maple_node *)tmp_node);
> +       a_enode = mt_mk_node(p_node, a_type);
> +
> +       /* Check to make sure all parent information is still accurate */
> +       if (p_node != mte_parent((struct maple_node *)tmp_node))
> +               return 1;
> +
> +       mas->node = a_enode;
> +       mas->offset = offset;
> +
> +       readmem((ulonglong)(mte_to_node(a_enode)), KVADDR, tmp_node,
> +               SIZE(maple_node_struct), "mas_ascend read maple_node", FAULT_ON_ERROR);
> +       if (mte_is_root((struct maple_node *)tmp_node)) {
> +               mas->max = ULONG_MAX;
> +               mas->min = 0;
> +               return 0;
> +       }
> +
> +       min = 0;
> +       max = ULONG_MAX;
> +       do {
> +               p_enode = a_enode;
> +               readmem((ulonglong)(mte_to_node(p_enode)), KVADDR, tmp_node,
> +                       SIZE(maple_node_struct),
> +                       "mas_ascend read maple_node", FAULT_ON_ERROR);
> +               a_type = mas_parent_enum(mas, (struct maple_node *)tmp_node);
> +               a_node = mte_parent((struct maple_node *)tmp_node);
> +               a_slot = mte_parent_slot((struct maple_node *)tmp_node);
> +               readmem((ulonglong)a_node, KVADDR, tmp_node, SIZE(maple_node_struct),
> +                       "mas_ascend read maple_node", FAULT_ON_ERROR);
> +               pivots = ma_pivots((struct maple_node *)tmp_node, a_type);
> +               a_enode = mt_mk_node((struct maple_node *)tmp_node, a_type);
> +
> +               if (!set_min && a_slot) {
> +                       set_min = true;
> +                       min = pivots[a_slot - 1] + 1;
> +               }
> +
> +               if (!set_max && a_slot < mt_pivots[a_type]) {
> +                       set_max = true;
> +                       max = pivots[a_slot];
> +               }
> +
> +               if (ma_dead_node((struct maple_node *)tmp_node, a_node))
> +                       return 1;
> +
> +               if (ma_is_root((struct maple_node *)tmp_node))
> +                       break;
> +
> +       } while (!set_min || !set_max);
> +
> +       mas->max = max;
> +       mas->min = min;
> +       return 0;
> +}
> +
> +static inline int mas_next_node(struct ma_state *mas, struct maple_node *node,
> +                               unsigned long max)
> +{
> +       unsigned long min, pivot;
> +       unsigned long *pivots;
> +       struct maple_enode *enode;
> +       int level = 0;
> +       unsigned char offset;
> +       enum maple_type mt;
> +       void **slots;
> +
> +       char tmp_node[MAPLE_BUFSIZE];
> +       assert(SIZE(maple_node_struct) <= MAPLE_BUFSIZE);
Ditto.

> +
> +       if (mas->max >= max)
> +               goto no_entry;
> +
> +       level = 0;
> +       readmem((ulonglong)node, KVADDR, tmp_node, SIZE(maple_node_struct),
> +               "mas_next_node read maple_node", FAULT_ON_ERROR);
> +       do {
> +               if (ma_is_root((struct maple_node *)tmp_node))
> +                       goto no_entry;
> +
> +               min = mas->max + 1;
> +               if (min > max)
> +                       goto no_entry;
> +
> +               if (mas_ascend(mas))
> +                       return 1;
> +
> +               offset = mas->offset;
> +               level++;
> +               node = mas_mn(mas);
> +               mt = mte_node_type(mas->node);
> +               readmem((ulonglong)node, KVADDR, tmp_node, SIZE(maple_node_struct),
> +                       "mas_next_node read maple_node", FAULT_ON_ERROR);
> +               pivots = ma_pivots((struct maple_node *)tmp_node, mt);
> +       } while (offset == ma_data_end((struct maple_node *)tmp_node, mt, pivots, mas->max));
> +
> +       slots = ma_slots((struct maple_node *)tmp_node, mt);
> +       pivot = mas_safe_pivot(mas, pivots, ++offset, mt);
> +       while (level > 1) {
> +               /* Descend, if necessary */
> +               enode = mas_slot(mas, slots, offset);
> +               if (ma_dead_node((struct maple_node *)tmp_node, node))
> +                       return 1;
> +
> +               mas->node = enode;
> +               level--;
> +               node = mas_mn(mas);
> +               mt = mte_node_type(mas->node);
> +               readmem((ulonglong)node, KVADDR, tmp_node, SIZE(maple_node_struct),
> +                       "mas_next_node read maple_node", FAULT_ON_ERROR);
> +               slots = ma_slots((struct maple_node *)tmp_node, mt);
> +               pivots = ma_pivots((struct maple_node *)tmp_node, mt);
> +               offset = 0;
> +               pivot = pivots[0];
> +       }
> +
> +       enode = mas_slot(mas, slots, offset);
> +       if (ma_dead_node((struct maple_node *)tmp_node, node))
> +               return 1;
> +
> +       mas->node = enode;
> +       mas->min = min;
> +       mas->max = pivot;
> +       return 0;
> +
> +no_entry:
> +       if (ma_dead_node((struct maple_node *)tmp_node, node))
> +               return 1;
> +
> +       mas->node = MAS_NONE;
> +       return 0;
> +}
> +
> +static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit)
> +{
> +       void *entry = NULL;
> +       struct maple_enode *prev_node;
> +       struct maple_node *node;
> +       unsigned char offset;
> +       unsigned long last;
> +       enum maple_type mt;
> +
> +       char tmp_node[MAPLE_BUFSIZE];
> +       assert(SIZE(maple_node_struct) <= MAPLE_BUFSIZE);

Ditto.

> +
> +       last = mas->last;
> +retry:
> +       offset = mas->offset;
> +       prev_node = mas->node;
> +       node = mas_mn(mas);
> +       mt = mte_node_type(mas->node);
> +       mas->offset++;
> +       if (mas->offset >= mt_slots[mt]) {
> +               mas->offset = mt_slots[mt] - 1;
> +               goto next_node;
> +       }
> +
> +       while (!mas_is_none(mas)) {
> +               readmem((ulonglong)node, KVADDR, tmp_node, SIZE(maple_node_struct),
> +                       "mas_next_entry read maple_node", FAULT_ON_ERROR);
> +               entry = mas_next_nentry(mas, (struct maple_node *)tmp_node, limit, mt, node);
> +               if (ma_dead_node((struct maple_node *)tmp_node, node)) {
> +                       mas_rewalk(mas, last);
> +                       goto retry;
> +               }
> +
> +               if (entry)
> +                       return entry;
> +
> +               if ((mas->index > limit))
> +                       break;
> +
> +next_node:
> +               prev_node = mas->node;
> +               offset = mas->offset;
> +               if (mas_next_node(mas, node, limit)) {
> +                       mas_rewalk(mas, last);
> +                       goto retry;
> +               }
> +               mas->offset = 0;
> +               node = mas_mn(mas);
> +               mt = mte_node_type(mas->node);
> +       }
> +
> +       mas->index = mas->last = limit;
> +       mas->offset = offset;
> +       mas->node = prev_node;
> +       return NULL;
> +}
> +
> +static void *mas_walk(struct ma_state *mas)
> +{
> +       void *entry;
> +
> +retry:
> +       entry = mas_state_walk(mas);
> +       if (mas_is_start(mas))
> +               goto retry;
> +
> +       if (mas_is_ptr(mas)) {
> +               if (!mas->index) {
> +                       mas->last = 0;
> +               } else {
> +                       mas->index = 1;
> +                       mas->last = ULONG_MAX;
> +               }
> +               return entry;
> +       }
> +
> +       if (mas_is_none(mas)) {
> +               mas->index = 0;
> +               mas->last = ULONG_MAX;
> +       }
> +
> +       return entry;
> +}
> +
> +void *mas_find(struct ma_state *mas, unsigned long max)
> +{
> +       if (mas_is_paused(mas)) {
> +               if (mas->last == ULONG_MAX) {
> +                       mas->node = MAS_NONE;
> +                       return NULL;
> +               }
> +               mas->node = MAS_START;
> +               mas->index = ++mas->last;
> +       }
> +
> +       if (mas_is_start(mas)) {
> +               /* First run or continue */
> +               void *entry;
> +
> +               if (mas->index > max)
> +                       return NULL;
> +
> +               entry = mas_walk(mas);
> +               if (entry)
> +                       return entry;
> +       }
> +
> +       if (!mas_searchable(mas))
> +               return NULL;
> +
> +       /* Retries on dead nodes handled by mas_next_entry */
> +       return mas_next_entry(mas, max);
> +}
> +
> +/***********************************************/
> +void maple_init(void)
> +{
> +       int array_len;
> +
> +       STRUCT_SIZE_INIT(maple_tree_struct, "maple_tree");
> +       STRUCT_SIZE_INIT(maple_node_struct, "maple_node");
> +
> +       MEMBER_OFFSET_INIT(maple_tree_ma_root, "maple_tree", "ma_root");
> +       MEMBER_OFFSET_INIT(maple_tree_ma_flags, "maple_tree", "ma_flags");
> +
> +       MEMBER_OFFSET_INIT(maple_node_parent, "maple_node", "parent");
> +       MEMBER_OFFSET_INIT(maple_node_ma64, "maple_node", "ma64");
> +       MEMBER_OFFSET_INIT(maple_node_mr64, "maple_node", "mr64");
> +       MEMBER_OFFSET_INIT(maple_node_slot, "maple_node", "slot");
> +
> +       MEMBER_OFFSET_INIT(maple_arange_64_parent, "maple_arange_64", "parent");
> +       MEMBER_OFFSET_INIT(maple_arange_64_pivot, "maple_arange_64", "pivot");
> +       MEMBER_OFFSET_INIT(maple_arange_64_slot, "maple_arange_64", "slot");
> +       MEMBER_OFFSET_INIT(maple_arange_64_meta, "maple_arange_64", "meta");
> +
> +       MEMBER_OFFSET_INIT(maple_range_64_parent, "maple_range_64", "parent");
> +       MEMBER_OFFSET_INIT(maple_range_64_pivot, "maple_range_64", "pivot");
> +       MEMBER_OFFSET_INIT(maple_range_64_slot, "maple_range_64", "slot");
> +       MEMBER_OFFSET_INIT(maple_range_64_meta, "maple_range_64", "meta");
> +
> +       MEMBER_OFFSET_INIT(maple_metadata_end, "maple_metadata", "end");
> +
> +       array_len = get_array_length("mt_slots", NULL, sizeof(char));
> +       mt_slots = calloc(array_len, sizeof(char));
> +       readmem(symbol_value("mt_slots"), KVADDR, mt_slots,
> +               array_len * sizeof(char), "maple_init read mt_slots",
> +               FAULT_ON_ERROR);
> +
> +       array_len = get_array_length("mt_pivots", NULL, sizeof(char));
> +       mt_pivots = calloc(array_len, sizeof(char));
> +       readmem(symbol_value("mt_pivots"), KVADDR, mt_pivots,
> +               array_len * sizeof(char), "maple_init read mt_pivots",
> +               FAULT_ON_ERROR);
> +}
> \ No newline at end of file
> diff --git a/maple_tree.h b/maple_tree.h
> new file mode 100644
> index 0000000..8b36c29
> --- /dev/null
> +++ b/maple_tree.h
> @@ -0,0 +1,214 @@
> +/* SPDX-License-Identifier: GPL-2.0+ */
> +#ifndef _MAPLE_TREE_H
> +#define _MAPLE_TREE_H
> +/*
> + * Maple Tree - An RCU-safe adaptive tree for storing ranges
> + * Copyright (c) 2018-2022 Oracle
> + * Authors:     Liam R. Howlett <Liam.Howlett@xxxxxxxxxx>
> + *              Matthew Wilcox <willy@xxxxxxxxxxxxx>
> + *
> + * eXtensible Arrays
> + * Copyright (c) 2017 Microsoft Corporation
> + * Author: Matthew Wilcox <willy@xxxxxxxxxxxxx>
> + *
> + * See Documentation/core-api/xarray.rst for how to use the XArray.
> + */
> +#include <stdbool.h>
> +#include <limits.h>
> +
> +/*
> + * The following are copied and modified from include/linux/maple_tree.h
> + *
> + * Keep the empty struct names for debug purpose of compiler warning.
> + * And can also keep the function prototypes consistent with kernel's
> + * corresponding functions.
> + */
> +struct maple_tree { };
> +struct maple_metadata { };
> +struct maple_range_64 { };
> +struct maple_arange_64 { };
> +struct maple_alloc { };
> +struct maple_node { };
> +

Is it possible to define them with the "void *" ? For example:
struct ma_state {
    void *tree;
    unsigned long index;
    ...
    void *node;
    ...
};

Kernel may change them or move them out of the structure in the
future, in this situation crash won't have to change accordingly.

> +struct ma_state {
> +       struct maple_tree *tree;        /* The tree we're operating in */
> +       unsigned long index;            /* The index we're operating on - range start */
> +       unsigned long last;             /* The last index we're operating on - range end */
> +       struct maple_enode *node;       /* The node containing this entry */
> +       unsigned long min;              /* The minimum index of this node - implied pivot min */
> +       unsigned long max;              /* The maximum index of this node - implied pivot max */
> +       struct maple_alloc *alloc;      /* Allocated nodes for this operation */
> +       unsigned char depth;            /* depth of tree descent during write */
> +       unsigned char offset;
> +       unsigned char mas_flags;
> +};
> +
> +enum maple_type {
> +       maple_dense,
> +       maple_leaf_64,
> +       maple_range_64,
> +       maple_arange_64,
> +};
> +
> +#define MAS_START      ((struct maple_enode *)1UL)
> +#define MAS_ROOT       ((struct maple_enode *)5UL)
> +#define MAS_NONE       ((struct maple_enode *)9UL)
> +#define MAS_PAUSE      ((struct maple_enode *)17UL)

> +#define MA_ERROR(err) \
> +               ((struct maple_enode *)(((unsigned long)err << 2) | 2UL))
> +
> +#define MA_STATE(name, mt, first, end)                                 \
> +       struct ma_state name = {                                        \
> +               .tree = mt,                                             \
> +               .index = first,                                         \
> +               .last = end,                                            \
> +               .node = MAS_START,                                      \
> +               .min = 0,                                               \
> +               .max = ULONG_MAX,                                       \
> +       }

The above two macros are unused.

> +
> +#define MAPLE_NODE_MASK                255UL
> +
> +#define MT_FLAGS_ALLOC_RANGE   0x01

> +#define MT_FLAGS_USE_RCU       0x02
Unused.

> +#define MT_FLAGS_HEIGHT_OFFSET 0x02
> +#define MT_FLAGS_HEIGHT_MASK   0x7C

> +#define MT_FLAGS_LOCK_MASK     0x300
> +#define MT_FLAGS_LOCK_IRQ      0x100
> +#define MT_FLAGS_LOCK_BH       0x200
> +#define MT_FLAGS_LOCK_EXTERN   0x300
The above four macros are unused.

> +
> +#define MAPLE_HEIGHT_MAX       31
Unused.

> +
> +#define MAPLE_NODE_TYPE_MASK   0x0F
> +#define MAPLE_NODE_TYPE_SHIFT  0x03
> +
> +#define MAPLE_RESERVED_RANGE   4096
> +
> +static inline void mas_reset(struct ma_state *mas)
> +{
> +       mas->node = MAS_START;
> +}
> +
> +static inline
> +void mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last)
> +{
> +              mas->index = start;
> +              mas->last = last;
> +              mas->node = MAS_START;
> +}
> +
> +static inline void mas_set(struct ma_state *mas, unsigned long index)
> +{
> +       mas_set_range(mas, index, index);
> +}
> +
> +static inline bool mas_is_none(struct ma_state *mas)
> +{
()> +       return mas->node == MAS_NONE;
> +}
> +
> +static inline bool mas_is_paused(struct ma_state *mas)
> +{
> +       return mas->node == MAS_PAUSE;
> +}
> +
> +/*
> + * The following are copied and modified from include/linux/xarray.h
> + */
> +
> +#define MAX_ERRNO      4095
The macro MAX_ERRNO is unused because the xa_is_err() is not called.

> +#define XA_ZERO_ENTRY          xa_mk_internal(257)
> +#define XA_RETRY_ENTRY         xa_mk_internal(256)

The macro XA_RETRY_ENTRY is unused because the xa_is_advanced() is not
called by other functions.

> +
> +static inline void *xa_mk_internal(unsigned long v)
> +{
> +       return (void *)((v << 2) | 2);
> +}
> +
> +static inline bool xa_is_internal(const void *entry)
> +{
> +       return ((unsigned long)entry & 3) == 2;
> +}
> +
> +static inline bool xa_is_err(const void *entry)
> +{
> +       return xa_is_internal(entry) &&
> +                       entry >= xa_mk_internal(-MAX_ERRNO);
> +}

The xa_is_err() is unused.

> +
> +static inline int xa_err(void *entry)
> +{
> +       /* xa_to_internal() would not do sign extension. */
> +       if (xa_is_err(entry))
> +               return (long)entry >> 2;
> +       return 0;
> +}

Ditto.

> +
> +static inline bool xa_is_node(const void *entry)
> +{
> +       return xa_is_internal(entry) && (unsigned long)entry > 4096;
> +}
> +
> +static inline bool xa_is_value(const void *entry)
> +{
> +       return (unsigned long)entry & 1;
> +}
> +
> +static inline bool xa_is_zero(const void *entry)
> +{
> +       return entry == XA_ZERO_ENTRY;
> +}
> +
> +static inline unsigned long xa_to_internal(const void *entry)
> +{
> +       return (unsigned long)entry >> 2;
> +}
> +
> +static inline unsigned long xa_to_value(const void *entry)
> +{
> +       return (unsigned long)entry >> 1;
> +}
> +
> +static inline bool xa_is_advanced(const void *entry)
> +{
> +       return xa_is_internal(entry) && (entry <= XA_RETRY_ENTRY);
> +}

Ditto.

Thanks.
Lianbo

> +
> +/*
> + * The following are copied and modified from include/linux/mm.h
> + */
> +
> +struct vma_iterator {
> +       struct ma_state mas;
> +};
> +
> +#define VMA_ITERATOR(name, mm_mt, addr)                                \
> +       struct vma_iterator name = {                                    \
> +               .mas = {                                                \
> +                       .tree = mm_mt,                                  \
> +                       .index = addr,                                  \
> +                       .node = MAS_START,                              \
> +               },                                                      \
> +       }
> +
> +void *mas_find(struct ma_state *, unsigned long);
> +
> +static void *vma_find(struct vma_iterator *vmi, unsigned long max)
> +{
> +       return mas_find(&vmi->mas, max);
> +}
> +
> + __attribute__((unused)) static void *vma_next(struct vma_iterator *vmi)
> +{
> +       /*
> +        * Uses vma_find() to get the first VMA when the iterator starts.
> +        * Calling mas_next() could skip the first entry.
> +        */
> +       return vma_find(vmi, ULONG_MAX);
> +}
> +
> +#define for_each_vma(vmi, vma) \
> +       while ((vma = (ulong)vma_next(&(vmi))) != 0)
> +
> +#endif /* _MAPLE_TREE_H */
> \ No newline at end of file
> --
> 2.33.1

--
Crash-utility mailing list
Crash-utility@xxxxxxxxxx
https://listman.redhat.com/mailman/listinfo/crash-utility
Contribution Guidelines: https://github.com/crash-utility/crash/wiki




[Index of Archives]     [Fedora Development]     [Fedora Desktop]     [Fedora SELinux]     [Yosemite News]     [KDE Users]     [Fedora Tools]

 

Powered by Linux