On Mon, 2022-03-14 at 18:35 +0100, Jakub Sitnicki wrote: > On Thu, Mar 10, 2022 at 11:57 PM +01, Jakub Sitnicki wrote: > > On Wed, Mar 09, 2022 at 01:34 PM +01, Ilya Leoshkevich wrote: > > > On Wed, 2022-03-09 at 09:36 +0100, Jakub Sitnicki wrote: > > > > [...] > > > > > > > > > > Consider this - today the below is true on both LE and BE, > > > > right? > > > > > > > > *(u32 *)&ctx->remote_port == *(u16 *)&ctx->remote_port > > > > > > > > because the loads get converted to: > > > > > > > > *(u16 *)&ctx_kern->sport == *(u16 *)&ctx_kern->sport > > > > > > > > IOW, today, because of the bug that you are fixing here, the > > > > data > > > > layout > > > > changes from the PoV of the BPF program depending on the load > > > > size. > > > > > > > > With 2-byte loads, without this patch, the data layout appears > > > > as: > > > > > > > > struct bpf_sk_lookup { > > > > ... > > > > __be16 remote_port; > > > > __be16 remote_port; > > > > ... > > > > } > > > > > > I see, one can indeed argue that this is also a part of the ABI > > > now. > > > So we're stuck between a rock and a hard place. > > > > > > > While for 4-byte loads, it appears as in your 2nd patch: > > > > > > > > struct bpf_sk_lookup { > > > > ... > > > > #if little-endian > > > > __be16 remote_port; > > > > __u16 :16; /* zero padding */ > > > > #elif big-endian > > > > __u16 :16; /* zero padding */ > > > > __be16 remote_port; > > > > #endif > > > > ... > > > > } > > > > > > > > Because of that I don't see how we could keep complete ABI > > > > compatiblity, > > > > and have just one definition of struct bpf_sk_lookup that > > > > reflects > > > > it. These are conflicting requirements. > > > > > > > > I'd bite the bullet for 4-byte loads, for the sake of having an > > > > endian-agnostic struct bpf_sk_lookup and struct bpf_sock > > > > definition > > > > in > > > > the uAPI header. > > > > > > > > The sacrifice here is that the access converter will have to > > > > keep > > > > rewriting 4-byte access to bpf_sk_lookup.remote_port and > > > > bpf_sock.dst_port in this unexpected, quirky manner. > > > > > > > > The expectation is that with time users will recompile their > > > > BPF > > > > progs > > > > against the updated bpf.h, and switch to 2-byte loads. That > > > > will make > > > > the quirk in the access converter dead code in time. > > > > > > > > I don't have any better ideas. Sorry. > > > > > > > > [...] > > > > > > I agree, let's go ahead with this solution. > > > > > > The only remaining problem that I see is: the bug is in the > > > common > > > code, and it will affect the fields that we add in the future. > > > > > > Can we either document this state of things in a comment, or fix > > > the > > > bug and emulate the old behavior for certain fields? > > > > I think we can fix the bug in the common code, and compensate for > > the > > quirky 4-byte access to bpf_sk_lookup.remote_port and > > bpf_sock.dst_port > > in the is_valid_access and convert_ctx_access. > > > > With the patch as below, access to remote_port gets rewritten to: > > > > * size=1, offset=0, r2 = *(u8 *)(r1 +36) > > 0: (69) r2 = *(u16 *)(r1 +4) > > 1: (54) w2 &= 255 > > 2: (b7) r0 = 0 > > 3: (95) exit > > > > * size=1, offset=1, r2 = *(u8 *)(r1 +37) > > 0: (69) r2 = *(u16 *)(r1 +4) > > 1: (74) w2 >>= 8 > > 2: (54) w2 &= 255 > > 3: (b7) r0 = 0 > > 4: (95) exit > > > > * size=1, offset=2, r2 = *(u8 *)(r1 +38) > > 0: (b4) w2 = 0 > > 1: (54) w2 &= 255 > > 2: (b7) r0 = 0 > > 3: (95) exit > > > > * size=1, offset=3, r2 = *(u8 *)(r1 +39) > > 0: (b4) w2 = 0 > > 1: (74) w2 >>= 8 > > 2: (54) w2 &= 255 > > 3: (b7) r0 = 0 > > 4: (95) exit > > > > * size=2, offset=0, r2 = *(u16 *)(r1 +36) > > 0: (69) r2 = *(u16 *)(r1 +4) > > 1: (b7) r0 = 0 > > 2: (95) exit > > > > * size=2, offset=2, r2 = *(u16 *)(r1 +38) > > 0: (b4) w2 = 0 > > 1: (b7) r0 = 0 > > 2: (95) exit > > > > * size=4, offset=0, r2 = *(u32 *)(r1 +36) > > 0: (69) r2 = *(u16 *)(r1 +4) > > 1: (b7) r0 = 0 > > 2: (95) exit > > > > How does that look to you? > > > > Still need to give it a test on s390x. > > Context conversion with patch below applied looks correct to me on > s390x > as well: > > * size=1, offset=0, r2 = *(u8 *)(r1 +36) > 0: (69) r2 = *(u16 *)(r1 +4) > 1: (bc) w2 = w2 > 2: (74) w2 >>= 8 > 3: (bc) w2 = w2 > 4: (54) w2 &= 255 > 5: (bc) w2 = w2 > 6: (b7) r0 = 0 > 7: (95) exit > > * size=1, offset=1, r2 = *(u8 *)(r1 +37) > 0: (69) r2 = *(u16 *)(r1 +4) > 1: (bc) w2 = w2 > 2: (54) w2 &= 255 > 3: (bc) w2 = w2 > 4: (b7) r0 = 0 > 5: (95) exit > > * size=1, offset=2, r2 = *(u8 *)(r1 +38) > 0: (b4) w2 = 0 > 1: (bc) w2 = w2 > 2: (74) w2 >>= 8 > 3: (bc) w2 = w2 > 4: (54) w2 &= 255 > 5: (bc) w2 = w2 > 6: (b7) r0 = 0 > 7: (95) exit > > * size=1, offset=3, r2 = *(u8 *)(r1 +39) > 0: (b4) w2 = 0 > 1: (bc) w2 = w2 > 2: (54) w2 &= 255 > 3: (bc) w2 = w2 > 4: (b7) r0 = 0 > 5: (95) exit > > * size=2, offset=0, r2 = *(u16 *)(r1 +36) > 0: (69) r2 = *(u16 *)(r1 +4) > 1: (bc) w2 = w2 > 2: (b7) r0 = 0 > 3: (95) exit > > * size=2, offset=2, r2 = *(u16 *)(r1 +38) > 0: (b4) w2 = 0 > 1: (bc) w2 = w2 > 2: (b7) r0 = 0 > 3: (95) exit > > * size=4, offset=0, r2 = *(u32 *)(r1 +36) > 0: (69) r2 = *(u16 *)(r1 +4) > 1: (bc) w2 = w2 > 2: (b7) r0 = 0 > 3: (95) exit > > If we go this way, this should unbreak the bpf selftests on BE, > independently of the patch 1 from this series. > > Will send it as a patch, so that we continue the review discussion. I applied this patch to bpf-next, and while it looks reasonable, the test still fails, e.g. here: /* Load from remote_port field with zero padding (backward compatibility) */ val_u32 = *(__u32 *)&ctx->remote_port; if (val_u32 != bpf_htonl(bpf_ntohs(SRC_PORT) << 16)) return SK_DROP; > > > > > --8<-- > > > > diff --git a/net/core/filter.c b/net/core/filter.c > > index 65869fd510e8..2625a1d2dabc 100644 > > --- a/net/core/filter.c > > +++ b/net/core/filter.c > > @@ -10856,13 +10856,24 @@ static bool sk_lookup_is_valid_access(int > > off, int size, > > case bpf_ctx_range(struct bpf_sk_lookup, local_ip4): > > case bpf_ctx_range_till(struct bpf_sk_lookup, > > remote_ip6[0], remote_ip6[3]): > > case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], > > local_ip6[3]): > > - case offsetof(struct bpf_sk_lookup, remote_port) ... > > - offsetof(struct bpf_sk_lookup, local_ip4) - 1: > > case bpf_ctx_range(struct bpf_sk_lookup, local_port): > > case bpf_ctx_range(struct bpf_sk_lookup, ingress_ifindex): > > bpf_ctx_record_field_size(info, sizeof(__u32)); > > return bpf_ctx_narrow_access_ok(off, size, > > sizeof(__u32)); > > > > + case bpf_ctx_range(struct bpf_sk_lookup, remote_port): > > + /* Allow 4-byte access to 2-byte field for backward > > compatibility */ > > + if (size == sizeof(__u32)) > > + return off == offsetof(struct > > bpf_sk_lookup, remote_port); > > + bpf_ctx_record_field_size(info, sizeof(__be16)); > > + return bpf_ctx_narrow_access_ok(off, size, > > sizeof(__be16)); > > + > > + case offsetofend(struct bpf_sk_lookup, remote_port) ... > > + offsetof(struct bpf_sk_lookup, local_ip4) - 1: > > + /* Allow access to zero padding for backward > > compatiblity */ > > + bpf_ctx_record_field_size(info, sizeof(__u16)); > > + return bpf_ctx_narrow_access_ok(off, size, > > sizeof(__u16)); > > + > > default: > > return false; > > } > > @@ -10944,6 +10955,11 @@ static u32 > > sk_lookup_convert_ctx_access(enum bpf_access_type type, > > sport, 2, > > target_size)); > > break; > > > > + case offsetofend(struct bpf_sk_lookup, remote_port): > > + *target_size = 2; > > + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); > > + break; > > + > > case offsetof(struct bpf_sk_lookup, local_port): > > *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si- > > >src_reg, > > bpf_target_off(struct > > bpf_sk_lookup_kern, >