> Am 04.09.2019 um 00:13 schrieb Daniel Borkmann <daniel@xxxxxxxxxxxxx>: > > On 8/16/19 12:53 PM, Ilya Leoshkevich wrote: >> "ctx:file_pos sysctl:read write ok" fails on s390 with "Read value != >> nux". This is because verifier rewrites a complete 32-bit >> bpf_sysctl.file_pos update to a partial update of the first 32 bits of >> 64-bit *bpf_sysctl_kern.ppos, which is not correct on big-endian >> systems. >> Fix by using an offset on big-endian systems. >> Ditto for bpf_sysctl.file_pos reads. Currently the test does not detect >> a problem there, since it expects to see 0, which it gets with high >> probability in error cases, so change it to seek to offset 3 and expect >> 3 in bpf_sysctl.file_pos. >> Fixes: e1550bfe0de4 ("bpf: Add file_pos field to bpf_sysctl ctx") >> Signed-off-by: Ilya Leoshkevich <iii@xxxxxxxxxxxxx> >> --- >> v1->v2: Merge bpf_ctx_narrow_load_shift and >> bpf_ctx_narrow_access_offset. >> include/linux/filter.h | 8 ++++---- >> kernel/bpf/cgroup.c | 10 ++++++++-- >> kernel/bpf/verifier.c | 4 ++-- >> tools/testing/selftests/bpf/test_sysctl.c | 9 ++++++++- >> 4 files changed, 22 insertions(+), 9 deletions(-) >> diff --git a/include/linux/filter.h b/include/linux/filter.h >> index 92c6e31fb008..2ce57645f3cd 100644 >> --- a/include/linux/filter.h >> +++ b/include/linux/filter.h >> @@ -749,14 +749,14 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default) >> } >> static inline u8 >> -bpf_ctx_narrow_load_shift(u32 off, u32 size, u32 size_default) >> +bpf_ctx_narrow_access_offset(u32 off, u32 size, u32 size_default) >> { >> - u8 load_off = off & (size_default - 1); >> + u8 access_off = off & (size_default - 1); >> #ifdef __LITTLE_ENDIAN >> - return load_off * 8; >> + return access_off; >> #else >> - return (size_default - (load_off + size)) * 8; >> + return size_default - (access_off + size); >> #endif >> } >> diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c >> index 0a00eaca6fae..00c4647ce92a 100644 >> --- a/kernel/bpf/cgroup.c >> +++ b/kernel/bpf/cgroup.c >> @@ -1325,6 +1325,7 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type, >> struct bpf_prog *prog, u32 *target_size) >> { >> struct bpf_insn *insn = insn_buf; >> + u32 read_size; >> switch (si->off) { >> case offsetof(struct bpf_sysctl, write): >> @@ -1356,7 +1357,9 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type, >> treg, si->dst_reg, >> offsetof(struct bpf_sysctl_kern, ppos)); >> *insn++ = BPF_STX_MEM( >> - BPF_SIZEOF(u32), treg, si->src_reg, 0); >> + BPF_SIZEOF(u32), treg, si->src_reg, >> + bpf_ctx_narrow_access_offset( >> + 0, sizeof(u32), sizeof(loff_t))); >> *insn++ = BPF_LDX_MEM( >> BPF_DW, treg, si->dst_reg, >> offsetof(struct bpf_sysctl_kern, tmp_reg)); >> @@ -1365,8 +1368,11 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type, >> BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos), >> si->dst_reg, si->src_reg, >> offsetof(struct bpf_sysctl_kern, ppos)); >> + read_size = bpf_size_to_bytes(BPF_SIZE(si->code)); >> *insn++ = BPF_LDX_MEM( >> - BPF_SIZE(si->code), si->dst_reg, si->dst_reg, 0); >> + BPF_SIZE(si->code), si->dst_reg, si->dst_reg, >> + bpf_ctx_narrow_access_offset( >> + 0, read_size, sizeof(loff_t))); > > I see what you're doing, but generally I'm a bit puzzled on why we need these > partial store/loads and cannot access the full loff_t value internally with the > rewrite. Why was BPF_SIZEOF(u32) chosen in the first place? Looks like git history > doesn't have any useful insight here ... Andrey mind to put some clarifications > on this? Thx Do you mean rewriting e.g. the following: BPF_LDX_MEM(access_size, si->dst_reg, si->src_reg, offsetof(struct bpf_sysctl, file_pos))); as the following? BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos), si->dst_reg, si->src_reg, offsetof(struct bpf_sysctl_kern, ppos)); BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg, 0); # full loff_t while also doing *target_size = sizeof(loff_t); in order to let is_narrower_load logic in convert_ctx_accesses () do the masking? That would work, but wouldn't the generated code be less efficient due to explicit masking?