On 12/20/23 10:35 PM, Martin KaFai Lau wrote:
On 12/20/23 5:28 PM, Kuniyuki Iwashima wrote:
+static int tcp_validate_header(struct tcp_syncookie *ctx)
+{
+ s64 csum;
+
+ if (tcp_reload_headers(ctx))
+ goto err;
+
+ csum = bpf_csum_diff(0, 0, (void *)ctx->tcp, ctx->tcp->doff * 4,
0);
+ if (csum < 0)
+ goto err;
+
+ if (ctx->ipv4) {
+ /* check tcp_v4_csum(csum) is 0 if not on lo. */
+
+ csum = bpf_csum_diff(0, 0, (void *)ctx->ipv4, ctx->ipv4->ihl
* 4, 0);
+ if (csum < 0)
+ goto err;
+
+ if (csum_fold(csum) != 0)
+ goto err;
+ } else if (ctx->ipv6) {
+ /* check tcp_v6_csum(csum) is 0 if not on lo. */
+ }
+
+ return 0;
+err:
+ return -1;
+}
+
+static int tcp_parse_option(__u32 index, struct tcp_syncookie *ctx)
+{
+ char opcode, opsize;
+
+ if (ctx->ptr + 1 > ctx->data_end)
+ goto stop;
+
+ opcode = *ctx->ptr++;
+
+ if (opcode == TCPOPT_EOL)
+ goto stop;
+
+ if (opcode == TCPOPT_NOP)
+ goto next;
+
+ if (ctx->ptr + 1 > ctx->data_end)
+ goto stop;
+
+ opsize = *ctx->ptr++;
+
+ if (opsize < 2)
+ goto stop;
+
+ switch (opcode) {
+ case TCPOPT_MSS:
+ if (opsize == TCPOLEN_MSS && ctx->tcp->syn &&
+ ctx->ptr + (TCPOLEN_MSS - 2) < ctx->data_end)
+ ctx->attrs.mss = get_unaligned_be16(ctx->ptr);
+ break;
+ case TCPOPT_WINDOW:
+ if (opsize == TCPOLEN_WINDOW && ctx->tcp->syn &&
+ ctx->ptr + (TCPOLEN_WINDOW - 2) < ctx->data_end) {
+ ctx->attrs.wscale_ok = 1;
+ ctx->attrs.snd_wscale = *ctx->ptr;
+ }
+ break;
+ case TCPOPT_TIMESTAMP:
+ if (opsize == TCPOLEN_TIMESTAMP &&
+ ctx->ptr + (TCPOLEN_TIMESTAMP - 2) < ctx->data_end) {
+ ctx->attrs.rcv_tsval = get_unaligned_be32(ctx->ptr);
+ ctx->attrs.rcv_tsecr = get_unaligned_be32(ctx->ptr + 4);
+
+ if (ctx->tcp->syn && ctx->attrs.rcv_tsecr)
+ ctx->attrs.tstamp_ok = 0;
+ else
+ ctx->attrs.tstamp_ok = 1;
+ }
+ break;
+ case TCPOPT_SACK_PERM:
+ if (opsize == TCPOLEN_SACK_PERM && ctx->tcp->syn &&
+ ctx->ptr + (TCPOLEN_SACK_PERM - 2) < ctx->data_end)
+ ctx->attrs.sack_ok = 1;
+ break;
+ }
+
+ ctx->ptr += opsize - 2;
+next:
+ return 0;
+stop:
+ return 1;
+}
+
+static void tcp_parse_options(struct tcp_syncookie *ctx)
+{
+ ctx->ptr = (char *)(ctx->tcp + 1);
+
+ bpf_loop(40, tcp_parse_option, ctx, 0);
+}
+
+static int tcp_validate_sysctl(struct tcp_syncookie *ctx)
+{
+ if ((ctx->ipv4 && ctx->attrs.mss != MSS_LOCAL_IPV4) ||
+ (ctx->ipv6 && ctx->attrs.mss != MSS_LOCAL_IPV6))
+ goto err;
+
+ if (!ctx->attrs.wscale_ok || ctx->attrs.snd_wscale != 7)
+ goto err;
+
+ if (!ctx->attrs.tstamp_ok)
The bpf-ci reported error in cpuv4. The email from
bot+bpf-ci@xxxxxxxxxx has the link.
I tried the following:
if (!ctx->attrs.tstamp_ok) {
bpf_printk("ctx->attrs.tstamp_ok %u",
ctx->attrs.tstamp_ok);
goto err;
}
The above prints tstamp_ok as 1 while there is a "if
(!ctx->attrs.tstamp_ok)" test before it.
Yonghong and I debugged it quite a bit. verifier concluded the
ctx->attrs.tstamp_ok is 0. We knew some red herring like cpuv4 has
fewer register spilling but not able to root cause it yet.
In the mean time, there are existing selftests parsing the tcp header.
For example, the test_parse_tcp_hdr_opt[_dynptr].c. Not as complete as
your tcp_parse_option() but should be pretty close. It does not use
bpf_loop. It uses a bounded loop + a subprog (the parse_hdr_opt in the
selftests) instead. You can consider a similar construct to see if it
works around the cpuv4 CI issue for the time being.
I did some investigation and found some issues in verifier. I need to dig more into details.
On the other hand, with the following patch
https://lore.kernel.org/bpf/20240102190726.2017424-1-yonghong.song@xxxxxxxxx/
the selftest can run successfully with cpuv2/v3/v4.
pw-bot: cr
+ goto err;
+
+ if (!ctx->attrs.sack_ok)
+ goto err;
+
+ if (!ctx->tcp->ece || !ctx->tcp->cwr)
+ goto err;
+
+ return 0;
+err:
+ return -1;
+}
+
[ ... ]
+static int tcp_handle_syn(struct tcp_syncookie *ctx)
+{
+ s64 csum;
+
+ if (tcp_validate_header(ctx))
+ goto err;
+
+ tcp_parse_options(ctx);
+
+ if (tcp_validate_sysctl(ctx))
+ goto err;
+