size_t :0 doesn't always work with llvm-16

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Recently, while running the test_maps, I encountered an error
message. Upon further investigation, I discovered that llvm-16 behaves
inconsistently when it comes to clearing partially initialized local
variables.

We appends a 'size_t :0' at the end of many types to prevent dirty
bytes at the end of struct types.  libbpf also check dirty padding
bytes for CORE. It works most of time with gcc and llvm.  However, I
have discovered that it is not always work with llvm. The C
specification does not guarantee this either. Nonetheless, we
primarily rely on gcc and llvm. In most cases, on x86_64 platforms,
llvm utilizes memset() to clear a partially initialized variable of a
struct type.

> memset(&data, 0, sizeof(data));

However, there are exceptional situations that use a distinct approach to clearing a buffer. It does something like the following code.

> 0000000000010220 <create_hash_of_maps>:
>    10220:       55                      push   %rbp
>    10221:       48 89 e5                mov    %rsp,%rbp
>    10224:       48 83 ec 40             sub    $0x40,%rsp
>    10228:       48 c7 45 c8 38 00 00    movq   $0x38,-0x38(%rbp)
>    1022f:       00
>    10230:       c7 45 d0 00 00 00 00    movl   $0x0,-0x30(%rbp)
>    10237:       c7 45 d4 00 00 00 00    movl   $0x0,-0x2c(%rbp)
>    1023e:       c7 45 d8 00 00 00 00    movl   $0x0,-0x28(%rbp)
>    10245:       c7 45 dc 00 00 00 00    movl   $0x0,-0x24(%rbp)
>    1024c:       e8 1f f4 ff ff          call   f670 <create_small_hash>
>    10251:       89 45 e0                mov    %eax,-0x20(%rbp)
>    10254:       c7 45 e4 01 00 00 00    movl   $0x1,-0x1c(%rbp)
>    1025b:       48 c7 45 e8 00 00 00    movq   $0x0,-0x18(%rbp)
>    10262:       00
>    10263:       c7 45 f0 00 00 00 00    movl   $0x0,-0x10(%rbp)
>    1026a:       c7 45 f4 00 00 00 00    movl   $0x0,-0xc(%rbp)
>    10271:       c7 45 f8 00 00 00 00    movl   $0x0,-0x8(%rbp)
>    10278:       bf 0d 00 00 00          mov    $0xd,%edi
> 1027d: 48 8d 35 fc 34 06 00 lea 0x634fc(%rip),%rsi # 73780 <map_percpu_stats__elf_bytes.data+0x1fe8>
>    10284:       48 8d 55 c8             lea    -0x38(%rbp),%rdx
>    10288:       41 b8 04 00 00 00       mov    $0x4,%r8d
>    1028e:       44 89 c1                mov    %r8d,%ecx
>    10291:       e8 da fd ff ff          call   10070 <map_create_opts>
>    10296:       89 45 c4                mov    %eax,-0x3c(%rbp)
>    10299:       8b 7d e0                mov    -0x20(%rbp),%edi
>    1029c:       e8 cf b0 ff ff          call   b370 <close@plt>
>    102a1:       8b 45 c4                mov    -0x3c(%rbp),%eax
>    102a4:       48 83 c4 40             add    $0x40,%rsp
>    102a8:       5d                      pop    %rbp
>    102a9:       c3                      ret

Instead of using the 'memset()' function, this object code effectively
clears a buffer by utilizing 'movl' instructions.

The above object code is generated from create_hash_of_maps() in
selftests/bpf/map_tests/map_percpu_stats.c.  The following is the
source code of create_hash_of_maps().

> static int create_hash_of_maps(void)
> {
>       struct bpf_map_create_opts map_opts = {
>               .sz = sizeof(map_opts),
>               .map_flags = BPF_F_NO_PREALLOC,
>               .inner_map_fd = create_small_hash(),
>       };
>       int ret;
>
>       ret = map_create_opts(BPF_MAP_TYPE_HASH_OF_MAPS, "hash_of_maps",
>                             &map_opts, sizeof(int), sizeof(int));
>       close(map_opts.inner_map_fd);
>       return ret;
> }

The following is the definition of struct bpf_map_create_opts.
(I added a new filed at the end.)

> struct bpf_map_create_opts {
> size_t sz; /* size of this struct for forward/backward compatibility */
>
>       __u32 btf_fd;
>       __u32 btf_key_type_id;
>       __u32 btf_value_type_id;
>       __u32 btf_vmlinux_value_type_id;
>
>       __u32 inner_map_fd;
>       __u32 map_flags;
>       __u64 map_extra;
>
>       __u32 numa_node;
>       __u32 map_ifindex;
>
>       __u32 value_type_btf_obj_fd;
>       size_t:0;
> };
> #define bpf_map_create_opts__last_field value_type_btf_obj_fd

When checking the value of 'sizeof(map_opts)', it is observed that the
struct bpf_map_create_opts occupies 0x38 bytes. Interestingly, the
offset of the byte after the last member, specifically after
value_type_btf_obj_fd, is actually 0x34. This means that there are 4
padding bytes present at the end of this type. Upon thorough
examination of the aforementioned object code, it becomes apparent
that the initialization is limited to the first 0x34 bytes.

By modifying the code as shown above, the resulting object code
behaves differently.

> static int create_hash_of_maps(void)
> {
>       struct bpf_map_create_opts map_opts = {
>               .sz = sizeof(map_opts),
>               .map_flags = BPF_F_NO_PREALLOC,
>               .inner_map_fd = 0,
>       };
>       int ret;
>
>       map_opts.inner_map_fd = create_small_hash(),
>       ret = map_create_opts(BPF_MAP_TYPE_HASH_OF_MAPS, "hash_of_maps",
>                             &map_opts, sizeof(int), sizeof(int));
>       close(map_opts.inner_map_fd);
>       return ret;
> }

The following is the object code generated by LLVM.

> 00000000000101e0 <create_hash_of_maps>:
>    101e0:       55                      push   %rbp
>    101e1:       48 89 e5                mov    %rsp,%rbp
>    101e4:       48 83 ec 40             sub    $0x40,%rsp
>    101e8:       48 8d 7d c8             lea    -0x38(%rbp),%rdi
>    101ec:       31 f6                   xor    %esi,%esi
>    101ee:       ba 38 00 00 00          mov    $0x38,%edx
>    101f3:       e8 28 b1 ff ff          call   b320 <memset@plt>
>    101f8:       48 c7 45 c8 38 00 00    movq   $0x38,-0x38(%rbp)
>    101ff:       00
>    10200:       c7 45 e4 01 00 00 00    movl   $0x1,-0x1c(%rbp)
>    10207:       e8 24 f4 ff ff          call   f630 <create_small_hash>
>    1020c:       89 45 e0                mov    %eax,-0x20(%rbp)
>    1020f:       bf 0d 00 00 00          mov    $0xd,%edi
> 10214: 48 8d 35 65 35 06 00 lea 0x63565(%rip),%rsi # 73780 <map_percpu_stats__elf_bytes.data+0x1fe8>
>    1021b:       48 8d 55 c8             lea    -0x38(%rbp),%rdx
>    1021f:       41 b8 04 00 00 00       mov    $0x4,%r8d
>    10225:       44 89 c1                mov    %r8d,%ecx
>    10228:       e8 03 fe ff ff          call   10030 <map_create_opts>
>    1022d:       89 45 c4                mov    %eax,-0x3c(%rbp)
>    10230:       8b 7d e0                mov    -0x20(%rbp),%edi
>    10233:       e8 38 b1 ff ff          call   b370 <close@plt>
>    10238:       8b 45 c4                mov    -0x3c(%rbp),%eax
>    1023b:       48 83 c4 40             add    $0x40,%rsp
>    1023f:       5d                      pop    %rbp
>    10240:       c3                      ret

The object code uses memset() to clear the buffer for 0x38
bytes. However, after the change, the behavior becomes inconsistent
with the previous object code. When a function is called during
partial initialization, llvm doesn't clear its buffer using 'memset()'
and leaves the padding bytes uncleared.






[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux