On 9/5/24 6:52 AM, Geliang Tang wrote:
From: Geliang Tang <tanggeliang@xxxxxxxxxx>
It's necessary to traverse all subflows on the conn_list of an MPTCP
socket and then call kfunc to modify the fields of each subflow. In
kernel space, mptcp_for_each_subflow() helper is used for this:
mptcp_for_each_subflow(msk, subflow)
kfunc(subflow);
But in the MPTCP BPF program, this has not yet been implemented. As
Martin suggested recently, this conn_list walking + modify-by-kfunc
usage fits the bpf_iter use case.
This patch adds a new bpf_iter type named "mptcp_subflow" to do this.
Suggested-by: Martin KaFai Lau <martin.lau@xxxxxxxxxx>
Signed-off-by: Geliang Tang <tanggeliang@xxxxxxxxxx>
---
kernel/bpf/helpers.c | 3 +++
net/mptcp/bpf.c | 57 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 60 insertions(+)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index b5f0adae8293..2340ba967444 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -3023,6 +3023,9 @@ BTF_ID_FLAGS(func, bpf_preempt_enable)
BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW)
BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
+BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_new)
+BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_next)
+BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_destroy)
BTF_KFUNCS_END(common_btf_ids)
static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
index 9672a70c24b0..cda09bbfd617 100644
--- a/net/mptcp/bpf.c
+++ b/net/mptcp/bpf.c
@@ -204,6 +204,63 @@ static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = {
.set = &bpf_mptcp_fmodret_ids,
};
+struct bpf_iter__mptcp_subflow {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct mptcp_sock *, msk);
+ __bpf_md_ptr(struct list_head *, pos);
+};
+
+DEFINE_BPF_ITER_FUNC(mptcp_subflow, struct bpf_iter_meta *meta,
+ struct mptcp_sock *msk, struct list_head *pos)
+
+struct bpf_iter_mptcp_subflow {
+ __u64 __opaque[3];
+} __attribute__((aligned(8)));
+
+struct bpf_iter_mptcp_subflow_kern {
+ struct mptcp_sock *msk;
+ struct list_head *pos;
+} __attribute__((aligned(8)));
+
+__bpf_kfunc_start_defs();
+
+__bpf_kfunc int bpf_iter_mptcp_subflow_new(struct bpf_iter_mptcp_subflow *it,
+ struct mptcp_sock *msk)
+{
+ struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
+
+ kit->msk = msk;
+ kit->pos = &msk->conn_list;
+ spin_lock_bh(&msk->pm.lock);
I don't think spin_lock here without unlock can be used. e.g. What if
bpf_iter_mptcp_subflow_new() is called twice back-to-back.
I haven't looked at the mptcp details, some questions:
The list is protected by msk->pm.lock?
What happen to the sk_lock of the msk?
Can this be rcu-ify? or it needs some cares when walking the established TCP
subflow?
[ Please cc the bpf list. Helping to review patches is a good way to contribute
back to the mailing list. ]
+
+ return 0;
+}
+
+__bpf_kfunc struct mptcp_subflow_context *
+bpf_iter_mptcp_subflow_next(struct bpf_iter_mptcp_subflow *it)
+{
+ struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk = kit->msk;
+
+ subflow = list_entry((kit->pos)->next, struct mptcp_subflow_context, node);
+ if (list_entry_is_head(subflow, &msk->conn_list, node))
+ return NULL;
+
+ kit->pos = &subflow->node;
+ return subflow;
+}
+
+__bpf_kfunc void bpf_iter_mptcp_subflow_destroy(struct bpf_iter_mptcp_subflow *it)
+{
+ struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
+ struct mptcp_sock *msk = kit->msk;
+
+ spin_unlock_bh(&msk->pm.lock);
+}
+
+__bpf_kfunc_end_defs();
+
__diag_push();
__diag_ignore_all("-Wmissing-prototypes",
"kfuncs which will be used in BPF programs");