Re: [PATCH] perf cs-etm: Improve completeness for kernel address space

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Leo,

On Mon, 17 Jun 2019 at 09:00, Leo Yan <leo.yan@xxxxxxxxxx> wrote:
>
> Arm and arm64 architecture reserve some memory regions prior to the
> symbol '_stext' and these memory regions later will be used by device
> module and BPF jit.  The current code misses to consider these memory
> regions thus any address in the regions will be taken as user space
> mode, but perf cannot find the corresponding dso with the wrong CPU
> mode so we misses to generate samples for device module and BPF
> related trace data.
>
> This patch parse the link scripts to get the memory size prior to start
> address and reduce this size from 'etmq->etm->kernel_start', then can
> get a fixed up kernel start address which contain memory regions for
> device module and BPF.  Finally, cs_etm__cpu_mode() can return right
> mode for these memory regions and perf can successfully generate
> samples.
>
> The reason for parsing the link scripts is Arm architecture changes text
> offset dependent on different platforms, which define multiple text
> offsets in $kernel/arch/arm/Makefile.  This offset is decided when build
> kernel and the final value is extended in the link script, so we can
> extract the used value from the link script.  We use the same way to
> parse arm64 link script as well.  If fail to find the link script, the
> pre start memory size is assumed as zero, in this case it has no any
> change caused with this patch.
>
> Below is detailed info for testing this patch:
>
> - Build LLVM/Clang 8.0 or later version;
>
> - Configure perf with ~/.perfconfig:
>
>   root@debian:~# cat ~/.perfconfig
>   # this file is auto-generated.
>   [llvm]
>           clang-path = /mnt/build/llvm-build/build/install/bin/clang
>           kbuild-dir = /mnt/linux-kernel/linux-cs-dev/
>           clang-opt = "-DLINUX_VERSION_CODE=0x50200 -g"
>           dump-obj = true
>
>   [trace]
>           show_zeros = yes
>           show_duration = no
>           no_inherit = yes
>           show_timestamp = no
>           show_arg_names = no
>           args_alignment = 40
>           show_prefix = yes
>
> - Run 'perf trace' command with eBPF event:
>
>   root@debian:~# perf trace -e string \
>       -e $kernel/tools/perf/examples/bpf/augmented_raw_syscalls.c
>
> - Read eBPF program memory mapping in kernel:
>
>   root@debian:~# echo 1 > /proc/sys/net/core/bpf_jit_kallsyms
>   root@debian:~# cat /proc/kallsyms | grep -E "bpf_prog_.+_sys_[enter|exit]"
>   ffff000000086a84 t bpf_prog_f173133dc38ccf87_sys_enter  [bpf]
>   ffff000000088618 t bpf_prog_c1bd85c092d6e4aa_sys_exit   [bpf]
>
> - Launch any program which accesses file system frequently so can hit
>   the system calls trace flow with eBPF event;
>
> - Capture CoreSight trace data with filtering eBPF program:
>
>   root@debian:~# perf record -e cs_etm/@20070000.etr/ \
>           --filter 'filter 0xffff000000086a84/0x800' -a sleep 5s
>
> - Annotate for symbol 'bpf_prog_f173133dc38ccf87_sys_enter':
>
>   root@debian:~# perf report
>   Then select 'branches' samples and press 'a' to annotate symbol
>   'bpf_prog_f173133dc38ccf87_sys_enter', press 'P' to print to the
>   bpf_prog_f173133dc38ccf87_sys_enter.annotation file:
>
>   root@debian:~# cat bpf_prog_f173133dc38ccf87_sys_enter.annotation
>
>   bpf_prog_f173133dc38ccf87_sys_enter() bpf_prog_f173133dc38ccf87_sys_enter
>   Event: branches
>
>   Percent      int sys_enter(struct syscall_enter_args *args)
>                  stp  x29, x30, [sp, #-16]!
>
>                 int key = 0;
>                  mov  x29, sp
>
>                        augmented_args = bpf_map_lookup_elem(&augmented_filename_map, &key);
>                  stp  x19, x20, [sp, #-16]!
>
>                        augmented_args = bpf_map_lookup_elem(&augmented_filename_map, &key);
>                  stp  x21, x22, [sp, #-16]!
>
>                  stp  x25, x26, [sp, #-16]!
>
>                 return bpf_get_current_pid_tgid();
>                  mov  x25, sp
>
>                 return bpf_get_current_pid_tgid();
>                  mov  x26, #0x0                         // #0
>
>                  sub  sp, sp, #0x10
>
>                 return bpf_map_lookup_elem(pids, &pid) != NULL;
>                  add  x19, x0, #0x0
>
>                  mov  x0, #0x0                          // #0
>
>                  mov  x10, #0xfffffffffffffff8          // #-8
>
>                 if (pid_filter__has(&pids_filtered, getpid()))
>                  str  w0, [x25, x10]
>
>                 probe_read(&augmented_args->args, sizeof(augmented_args->args), args);
>                  add  x1, x25, #0x0
>
>                 probe_read(&augmented_args->args, sizeof(augmented_args->args), args);
>                  mov  x10, #0xfffffffffffffff8          // #-8
>
>                 syscall = bpf_map_lookup_elem(&syscalls, &augmented_args->args.syscall_nr);
>                  add  x1, x1, x10
>
>                 syscall = bpf_map_lookup_elem(&syscalls, &augmented_args->args.syscall_nr);
>                  mov  x0, #0xffff8009ffffffff           // #-140694538682369
>
>                  movk x0, #0x6698, lsl #16
>
>                  movk x0, #0x3e00
>
>                  mov  x10, #0xffffffffffff1040          // #-61376
>
>                 if (syscall == NULL || !syscall->enabled)
>                  movk x10, #0x1023, lsl #16
>
>                 if (syscall == NULL || !syscall->enabled)
>                  movk x10, #0x0, lsl #32
>
>                 loop_iter_first()
>     3.69       → blr  bpf_prog_f173133dc38ccf87_sys_enter
>                 loop_iter_first()
>                  add  x7, x0, #0x0
>
>                 loop_iter_first()
>                  add  x20, x7, #0x0
>
>                 int size = probe_read_str(&augmented_filename->value, filename_len, filename_arg);
>                  mov  x0, #0x1                          // #1
>
>   [...]
>
> Cc: Mathieu Poirier <mathieu.poirier@xxxxxxxxxx>
> Cc: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx>
> Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
> Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> Cc: Suzuki Poulouse <suzuki.poulose@xxxxxxx>
> Cc: coresight@xxxxxxxxxxxxxxxx
> Cc: linux-arm-kernel@xxxxxxxxxxxxxxxxxxx
> Signed-off-by: Leo Yan <leo.yan@xxxxxxxxxx>
> ---
>  tools/perf/Makefile.config | 24 ++++++++++++++++++++++++
>  tools/perf/util/cs-etm.c   | 26 +++++++++++++++++++++++++-
>  2 files changed, 49 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
> index 51dd00f65709..4776c2c1fb6d 100644
> --- a/tools/perf/Makefile.config
> +++ b/tools/perf/Makefile.config
> @@ -418,6 +418,30 @@ ifdef CORESIGHT
>      endif
>      LDFLAGS += $(LIBOPENCSD_LDFLAGS)
>      EXTLIBS += $(OPENCSDLIBS)
> +    ifneq ($(wildcard $(srctree)/arch/arm64/kernel/vmlinux.lds),)
> +      # Extract info from lds:
> +      #  . = ((((((((0xffffffffffffffff)) - (((1)) << (48)) + 1) + (0)) + (0x08000000))) + (0x08000000))) + 0x00080000;
> +      # ARM64_PRE_START_SIZE := (0x08000000 + 0x08000000 + 0x00080000)
> +      ARM64_PRE_START_SIZE := $(shell egrep ' \. \= \({8}0x[0-9a-fA-F]+\){2}' \
> +        $(srctree)/arch/arm64/kernel/vmlinux.lds | \
> +        sed -e 's/[(|)|.|=|+|<|;|-]//g' -e 's/ \+/ /g' -e 's/^[ \t]*//' | \
> +        awk -F' ' '{print "("$$6 "+"  $$7 "+" $$8")"}' 2>/dev/null)
> +    else
> +      ARM64_PRE_START_SIZE := 0
> +    endif
> +    CFLAGS += -DARM64_PRE_START_SIZE="$(ARM64_PRE_START_SIZE)"
> +    ifneq ($(wildcard $(srctree)/arch/arm/kernel/vmlinux.lds),)
> +      # Extract info from lds:
> +      #   . = ((0xC0000000)) + 0x00208000;
> +      # ARM_PRE_START_SIZE := 0x00208000
> +      ARM_PRE_START_SIZE := $(shell egrep ' \. \= \({2}0x[0-9a-fA-F]+\){2}' \
> +        $(srctree)/arch/arm/kernel/vmlinux.lds | \
> +        sed -e 's/[(|)|.|=|+|<|;|-]//g' -e 's/ \+/ /g' -e 's/^[ \t]*//' | \
> +        awk -F' ' '{print "("$$2")"}' 2>/dev/null)
> +    else
> +      ARM_PRE_START_SIZE := 0
> +    endif
> +    CFLAGS += -DARM_PRE_START_SIZE="$(ARM_PRE_START_SIZE)"
>      $(call detected,CONFIG_LIBOPENCSD)
>      ifdef CSTRACE_RAW
>        CFLAGS += -DCS_DEBUG_RAW
> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> index 0c7776b51045..ae831f836c70 100644
> --- a/tools/perf/util/cs-etm.c
> +++ b/tools/perf/util/cs-etm.c
> @@ -613,10 +613,34 @@ static void cs_etm__free(struct perf_session *session)
>  static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
>  {
>         struct machine *machine;
> +       u64 fixup_kernel_start = 0;
> +       const char *arch;
>
>         machine = etmq->etm->machine;
> +       arch = perf_env__arch(machine->env);
>
> -       if (address >= etmq->etm->kernel_start) {
> +       /*
> +        * Since arm and arm64 specify some memory regions prior to
> +        * 'kernel_start', kernel addresses can be less than 'kernel_start'.
> +        *
> +        * For arm architecture, the 16MB virtual memory space prior to
> +        * 'kernel_start' is allocated to device modules, a PMD table if
> +        * CONFIG_HIGHMEM is enabled and a PGD table.
> +        *
> +        * For arm64 architecture, the root PGD table, device module memory
> +        * region and BPF jit region are prior to 'kernel_start'.
> +        *
> +        * To reflect the complete kernel address space, compensate these
> +        * pre-defined regions for kernel start address.
> +        */
> +       if (!strcmp(arch, "arm64"))
> +               fixup_kernel_start = etmq->etm->kernel_start -
> +                                    ARM64_PRE_START_SIZE;
> +       else if (!strcmp(arch, "arm"))
> +               fixup_kernel_start = etmq->etm->kernel_start -
> +                                    ARM_PRE_START_SIZE;

I will test your work but from a quick look wouldn't it be better to
have a single define name here?  From looking at the modifications you
did to Makefile.config there doesn't seem to be a reason to have two.

Thanks,
Mathieu

> +
> +       if (address >= fixup_kernel_start) {
>                 if (machine__is_host(machine))
>                         return PERF_RECORD_MISC_KERNEL;
>                 else
> --
> 2.17.1
>




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux