On 12/20/21 12:40 PM, Tyler Wear wrote:
New bpf helper function BPF_FUNC_skb_change_dsfield
"int bpf_skb_change_dsfield(struct sk_buff *skb, u8 mask, u8 value)".
BPF_PROG_TYPE_CGROUP_SKB typed bpf_prog which currently can
be attached to the ingress and egress path. The helper is needed
because this type of bpf_prog cannot modify the skb directly.
Used by a bpf_prog to specify DS field values on egress or
ingress.
Maybe you can expand a little bit here for your use case?
I know DS field might help but a description of your actual
use case will make adding this helper more compelling.
Signed-off-by: Tyler Wear <quic_twear@xxxxxxxxxxx>
---
include/uapi/linux/bpf.h | 9 ++++++++
net/core/filter.c | 46 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 55 insertions(+)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 556216dc9703..742cea7dcf8c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3742,6 +3742,14 @@ union bpf_attr {
* Return
* The helper returns **TC_ACT_REDIRECT** on success or
* **TC_ACT_SHOT** on error.
+ *
+ * long bpf_skb_change_dsfield(struct sk_buff *skb, u8 mask, u8 value)
+ * Description
+ * Set DS field of IP header to the specified *value*. The *value*
+ * is masked with the provided *mask* when ds field is updated.
This description probably not precise. If I understand correctly, the
*mask* is used to mask the current skb iph->tos value which is then
'or'ed with *value*.
I am also debating the helper name, bpf_skb_change_dsfield vs.
bpf_skb_update_dsfield vs. bpf_skb_update_ds. Here, we are actually
doing an update instead of completely overwrite the original value.
Maybe "update" is better than "change". Maybe we can just do
"bpf_skb_update_ds"? We have an existing helper bpf_skb_ecn_set_ce
to update ecn. We don't have any helper with suffix "field".
+ * Works with IPv6 and IPv4.
+ * Return
+ * 1 if the DS field is set, 0 if it is not set.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3900,6 +3908,7 @@ union bpf_attr {
FN(per_cpu_ptr), \
FN(this_cpu_ptr), \
FN(redirect_peer), \
+ FN(skb_change_dsfield), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/net/core/filter.c b/net/core/filter.c
index 035d66227ae2..71ea943c8059 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6402,6 +6402,50 @@ BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
return INET_ECN_set_ce(skb);
}
+BPF_CALL_3(bpf_skb_change_dsfield, struct sk_buff *, skb, u8, mask, u8, value)
+{
+ unsigned int iphdr_len;
+
+ switch (skb_protocol(skb, true)) {
+ case cpu_to_be16(ETH_P_IP):
+ iphdr_len = sizeof(struct iphdr);
+ break;
+ case cpu_to_be16(ETH_P_IPV6):
+ iphdr_len = sizeof(struct ipv6hdr);
+ break;
+ default:
+ return 0;
+ }
+
+ if (skb_headlen(skb) < iphdr_len)
+ return 0;
+
+ if (skb_cloned(skb) && !skb_clone_writable(skb, iphdr_len))
+ return 0;
+
+ switch (skb_protocol(skb, true)) {
+ case cpu_to_be16(ETH_P_IP):
+ ipv4_change_dsfield(ipip_hdr(skb), mask, value);
+ break;
+ case cpu_to_be16(ETH_P_IPV6):
+ ipv6_change_dsfield(ipv6_hdr(skb), mask, value);
+ break;
+ default:
+ return 0;
+ }
There are some repetition here. For example, in the above, 'default' is
not possible at all. Is it possible to remove the second 'switch'
statement with simple
if (...)
ipv4_change_dsfield(ipip_hdr(skb), mask, value);
else
ipv6_change_dsfield(ipv6_hdr(skb), mask, value);
?
+
+ return 1;
+}
+
+static const struct bpf_func_proto bpf_skb_change_dsfield_proto = {
+ .func = bpf_skb_change_dsfield,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+
bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
@@ -7057,6 +7101,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_listener_sock_proto;
case BPF_FUNC_skb_ecn_set_ce:
return &bpf_skb_ecn_set_ce_proto;
+ case BPF_FUNC_skb_change_dsfield:
+ return &bpf_skb_change_dsfield_proto;
We do need a self test to exercise this helper.
#endif
default:
return sk_filter_func_proto(func_id, prog);