>> You are right, at -O2 level, loop unrolling is enabled by default. >> So I think '#pragma unroll' can be removed since gcc also has >> loop unrolling enabled by default at -O2. >> >> Your patch has a conflict with latest bpf-next. Please rebase it >> on top of bpf-next, remove '#pragma unroll' support and resubmit. >> Thanks! > > Note profiler.inc.h contains code like: > > #ifdef UNROLL > __pragma_loop_unroll > #endif > for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) { > > And then it is inluded by several test programs, which define (or not) > UNROLL: > > profiler1.c: > > #define UNROLL > #include "profiler.inc.h" > > profiler2.c: > > /* undef #define UNROLL */ > #include "profiler.inc.h" > > In contrast, in pyperf.h or strobemeta.h we find code like: > > #ifdef NO_UNROLL > __pragma_loop_no_unroll > #endif /* NO_UNROLL */ > for (int i = 0; i < STROBE_MAX_STRS; ++i) { > > And then programs including it define NO_UNROLL to disable unrolling. > > If -funroll-oops is enabled with -O2 and BPF programs are always built > with -O2, then not defining UNROLL for profiler.inc.h, seems like > basically a no-op to me, because unrolling will still happen. This is > assuming that #pragma unroll in clang doesn't activates more aggressive > inlining. With the patch below, that basically inverts the logic of these conditionals in profiler.inc.h, the selftests still pass running ./vmtest.sh -- ./test_progs. However, it would be good if some clang wizard could confirm what impact, if any, #pragma unroll (aka #pragma clang loop unroll(enabled)) has over -O2, before ditching these pragmas from the selftests. diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index de3b6e4e4d0a..0a30162e53d2 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -168,8 +168,8 @@ probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max) static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct, int spid) { -#ifdef UNROLL -#pragma unroll +#ifdef NO_UNROLL +#pragma clang loop unroll(disable) #endif for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) if (arr_struct->array[i].meta.pid == spid) @@ -184,8 +184,8 @@ static INLINE void populate_ancestors(struct task_struct* task, u32 num_ancestors, ppid; ancestors_data->num_ancestors = 0; -#ifdef UNROLL -#pragma unroll +#ifdef NO_UNROLL +#pragma clang loop unroll(disable) #endif for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) { parent = BPF_CORE_READ(parent, real_parent); @@ -211,8 +211,8 @@ static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node, void* payload_start = payload; size_t filepart_length; -#ifdef UNROLL -#pragma unroll +#ifdef NO_UNROLL +#pragma clang loop unroll(disable) #endif for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) { filepart_length = @@ -260,8 +260,8 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) { int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local, pids_cgrp_id___local); -#ifdef UNROLL -#pragma unroll +#ifdef NO_UNROLL +#pragma clang loop unroll(disable) #endif for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys_state* subsys = @@ -401,8 +401,8 @@ static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig) get_var_kill_data(ctx, spid, tpid, sig); if (kill_data == NULL) return 0; -#ifdef UNROLL -#pragma unroll +#ifdef NO_UNROLL +#pragma clang loop unroll(disable) #endif for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) if (arr_struct->array[i].meta.pid == 0) { @@ -481,8 +481,8 @@ read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload) size_t filepart_length; struct dentry* parent_dentry; -#ifdef UNROLL -#pragma unroll +#ifdef NO_UNROLL +#pragma clang loop unroll(disable) #endif for (int i = 0; i < MAX_PATH_DEPTH; i++) { filepart_length = @@ -507,8 +507,8 @@ static INLINE bool is_ancestor_in_allowed_inodes(struct dentry* filp_dentry) { struct dentry* parent_dentry; -#ifdef UNROLL -#pragma unroll +#ifdef NO_UNROLL +#pragma clang loop unroll(disable) #endif for (int i = 0; i < MAX_PATH_DEPTH; i++) { u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino); @@ -628,8 +628,8 @@ int raw_tracepoint__sched_process_exit(void* ctx) struct task_struct* task = (struct task_struct*)bpf_get_current_task(); struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); -#ifdef UNROLL -#pragma unroll +#ifdef NO_UNROLL +#pragma clang loop unroll(disable) #endif for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) { struct var_kill_data_t* past_kill_data = &arr_struct->array[i]; diff --git a/tools/testing/selftests/bpf/progs/profiler1.c b/tools/testing/selftests/bpf/progs/profiler1.c index fb6b13522949..c32783826f36 100644 --- a/tools/testing/selftests/bpf/progs/profiler1.c +++ b/tools/testing/selftests/bpf/progs/profiler1.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#define UNROLL #define INLINE __always_inline #include "profiler.inc.h" diff --git a/tools/testing/selftests/bpf/progs/profiler2.c b/tools/testing/selftests/bpf/progs/profiler2.c index 0f32a3cbf556..17da6089212b 100644 --- a/tools/testing/selftests/bpf/progs/profiler2.c +++ b/tools/testing/selftests/bpf/progs/profiler2.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ #define barrier_var(var) /**/ -/* undef #define UNROLL */ +#define NO_UNROLL #define INLINE /**/ #include "profiler.inc.h" diff --git a/tools/testing/selftests/bpf/progs/profiler3.c b/tools/testing/selftests/bpf/progs/profiler3.c index 6249fc31ccb0..cc7f9aee6d9e 100644 --- a/tools/testing/selftests/bpf/progs/profiler3.c +++ b/tools/testing/selftests/bpf/progs/profiler3.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ #define barrier_var(var) /**/ -#define UNROLL #define INLINE __noinline #include "profiler.inc.h"