From: Kuniyuki Iwashima <kuniyu@xxxxxxxxxx> Date: Thu, 21 Dec 2023 16:04:43 +0900 > From: Martin KaFai Lau <martin.lau@xxxxxxxxx> > Date: Wed, 20 Dec 2023 22:35:26 -0800 > > On 12/20/23 5:28 PM, Kuniyuki Iwashima wrote: > > > +static int tcp_validate_header(struct tcp_syncookie *ctx) > > > +{ > > > + s64 csum; > > > + > > > + if (tcp_reload_headers(ctx)) > > > + goto err; > > > + > > > + csum = bpf_csum_diff(0, 0, (void *)ctx->tcp, ctx->tcp->doff * 4, 0); > > > + if (csum < 0) > > > + goto err; > > > + > > > + if (ctx->ipv4) { > > > + /* check tcp_v4_csum(csum) is 0 if not on lo. */ > > > + > > > + csum = bpf_csum_diff(0, 0, (void *)ctx->ipv4, ctx->ipv4->ihl * 4, 0); > > > + if (csum < 0) > > > + goto err; > > > + > > > + if (csum_fold(csum) != 0) > > > + goto err; > > > + } else if (ctx->ipv6) { > > > + /* check tcp_v6_csum(csum) is 0 if not on lo. */ > > > + } > > > + > > > + return 0; > > > +err: > > > + return -1; > > > +} > > > + > > > +static int tcp_parse_option(__u32 index, struct tcp_syncookie *ctx) > > > +{ > > > + char opcode, opsize; > > > + > > > + if (ctx->ptr + 1 > ctx->data_end) > > > + goto stop; > > > + > > > + opcode = *ctx->ptr++; > > > + > > > + if (opcode == TCPOPT_EOL) > > > + goto stop; > > > + > > > + if (opcode == TCPOPT_NOP) > > > + goto next; > > > + > > > + if (ctx->ptr + 1 > ctx->data_end) > > > + goto stop; > > > + > > > + opsize = *ctx->ptr++; > > > + > > > + if (opsize < 2) > > > + goto stop; > > > + > > > + switch (opcode) { > > > + case TCPOPT_MSS: > > > + if (opsize == TCPOLEN_MSS && ctx->tcp->syn && > > > + ctx->ptr + (TCPOLEN_MSS - 2) < ctx->data_end) > > > + ctx->attrs.mss = get_unaligned_be16(ctx->ptr); > > > + break; > > > + case TCPOPT_WINDOW: > > > + if (opsize == TCPOLEN_WINDOW && ctx->tcp->syn && > > > + ctx->ptr + (TCPOLEN_WINDOW - 2) < ctx->data_end) { > > > + ctx->attrs.wscale_ok = 1; > > > + ctx->attrs.snd_wscale = *ctx->ptr; > > > + } > > > + break; > > > + case TCPOPT_TIMESTAMP: > > > + if (opsize == TCPOLEN_TIMESTAMP && > > > + ctx->ptr + (TCPOLEN_TIMESTAMP - 2) < ctx->data_end) { > > > + ctx->attrs.rcv_tsval = get_unaligned_be32(ctx->ptr); > > > + ctx->attrs.rcv_tsecr = get_unaligned_be32(ctx->ptr + 4); > > > + > > > + if (ctx->tcp->syn && ctx->attrs.rcv_tsecr) > > > + ctx->attrs.tstamp_ok = 0; > > > + else > > > + ctx->attrs.tstamp_ok = 1; > > > + } > > > + break; > > > + case TCPOPT_SACK_PERM: > > > + if (opsize == TCPOLEN_SACK_PERM && ctx->tcp->syn && > > > + ctx->ptr + (TCPOLEN_SACK_PERM - 2) < ctx->data_end) > > > + ctx->attrs.sack_ok = 1; > > > + break; > > > + } > > > + > > > + ctx->ptr += opsize - 2; > > > +next: > > > + return 0; > > > +stop: > > > + return 1; > > > +} > > > + > > > +static void tcp_parse_options(struct tcp_syncookie *ctx) > > > +{ > > > + ctx->ptr = (char *)(ctx->tcp + 1); > > > + > > > + bpf_loop(40, tcp_parse_option, ctx, 0); > > > +} > > > + > > > +static int tcp_validate_sysctl(struct tcp_syncookie *ctx) > > > +{ > > > + if ((ctx->ipv4 && ctx->attrs.mss != MSS_LOCAL_IPV4) || > > > + (ctx->ipv6 && ctx->attrs.mss != MSS_LOCAL_IPV6)) > > > + goto err; > > > + > > > + if (!ctx->attrs.wscale_ok || ctx->attrs.snd_wscale != 7) > > > + goto err; > > > + > > > + if (!ctx->attrs.tstamp_ok) > > > > The bpf-ci reported error in cpuv4. The email from bot+bpf-ci@xxxxxxxxxx has the > > link. > > I like the mail from the bot, it's useful, but it seems that > it's sent to the patch author only when the CI passes ? > > But yeah, I found the failed test. > https://github.com/kernel-patches/bpf/actions/runs/7284164398/job/19849657597 > > > > > > I tried the following: > > > > if (!ctx->attrs.tstamp_ok) { > > bpf_printk("ctx->attrs.tstamp_ok %u", > > ctx->attrs.tstamp_ok); > > goto err; > > } > > > > > > The above prints tstamp_ok as 1 while there is a "if (!ctx->attrs.tstamp_ok)" > > test before it. > > > > Yonghong and I debugged it quite a bit. verifier concluded the > > ctx->attrs.tstamp_ok is 0. We knew some red herring like cpuv4 has fewer > > register spilling but not able to root cause it yet. > > > > In the mean time, there are existing selftests parsing the tcp header. For > > example, the test_parse_tcp_hdr_opt[_dynptr].c. Not as complete as your > > tcp_parse_option() but should be pretty close. It does not use bpf_loop. It uses > > a bounded loop + a subprog (the parse_hdr_opt in the selftests) instead. You can > > consider a similar construct to see if it works around the cpuv4 CI issue for > > the time being. > > Sure, I'll install the latest clang/llvm and check if the test > passes without bpf_loop(). I've tested a simple diff below and some more different patterns, but the prog cannot be loaded. Without bpf_loop(), the parser can loop only 4 times (s/40/4/), but then, it does not fully parse the necessary options, so the packet is dropped due to tcp_validate_sysctl(), and the test fails. So it seems that tcp_parse_option() cannot work around the issue even without bpf_loop() and this series needs to wait the cpuv4 fix.. ---8<--- @@ -259,9 +260,13 @@ static int tcp_parse_option(__u32 index, struct tcp_syncookie *ctx) static void tcp_parse_options(struct tcp_syncookie *ctx) { + int i; + ctx->ptr = (char *)(ctx->tcp + 1); - bpf_loop(40, tcp_parse_option, ctx, 0); + for (i = 0; i < 40; i++) + if (tcp_parse_option(i, ctx)) + break; } static int tcp_validate_sysctl(struct tcp_syncookie *ctx) ---8<--- ---8<--- BPF program is too large. Processed 1000001 insn processed 1000001 insns (limit 1000000) max_states_per_insn 30 total_states 41159 peak_states 344 mark_read 55 -- END PROG LOAD LOG -- libbpf: prog 'tcp_custom_syncookie': failed to load: -7 ---8<---