Re: [PATCH bpf-next v7 2/8] net: Update an existing TCP congestion control algorithm.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 3/16/23 3:36 AM, Kui-Feng Lee wrote:
This feature lets you immediately transition to another congestion
control algorithm or implementation with the same name.  Once a name
is updated, new connections will apply this new algorithm.

The purpose is to update a customized algorithm implemented in BPF
struct_ops with a new version on the flight.  The following is an
example of using the userspace API implemented in later BPF patches.

    link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
    .......
    err = bpf_link__update_map(link, skel->maps.ca_update_2);

We first load and register an algorithm implemented in BPF struct_ops,
then swap it out with a new one using the same name. After that, newly
created connections will apply the updated algorithm, while older ones
retain the previous version already applied.

This patch also takes this chance to refactor the ca validation into
the new tcp_validate_congestion_control() function.

Cc: netdev@xxxxxxxxxxxxxxx, Eric Dumazet <edumazet@xxxxxxxxxx>
Signed-off-by: Kui-Feng Lee <kuifeng@xxxxxxxx>
---
  include/net/tcp.h   |  3 +++
  net/ipv4/tcp_cong.c | 60 +++++++++++++++++++++++++++++++++++++++------
  2 files changed, 56 insertions(+), 7 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index db9f828e9d1e..2abb755e6a3a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1117,6 +1117,9 @@ struct tcp_congestion_ops {
int tcp_register_congestion_control(struct tcp_congestion_ops *type);
  void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
+int tcp_update_congestion_control(struct tcp_congestion_ops *type,
+				  struct tcp_congestion_ops *old_type);
+int tcp_validate_congestion_control(struct tcp_congestion_ops *ca);
void tcp_assign_congestion_control(struct sock *sk);
  void tcp_init_congestion_control(struct sock *sk);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index db8b4b488c31..c90791ae8389 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -75,14 +75,8 @@ struct tcp_congestion_ops *tcp_ca_find_key(u32 key)
  	return NULL;
  }
-/*
- * Attach new congestion control algorithm to the list
- * of available options.
- */
-int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
+int tcp_validate_congestion_control(struct tcp_congestion_ops *ca)
  {
-	int ret = 0;
-
  	/* all algorithms must implement these */
  	if (!ca->ssthresh || !ca->undo_cwnd ||
  	    !(ca->cong_avoid || ca->cong_control)) {
@@ -90,6 +84,20 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
  		return -EINVAL;
  	}
+ return 0;
+}
+
+/* Attach new congestion control algorithm to the list
+ * of available options.
+ */
+int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
+{
+	int ret;
+
+	ret = tcp_validate_congestion_control(ca);
+	if (ret)
+		return ret;
+
  	ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
spin_lock(&tcp_cong_list_lock);
@@ -130,6 +138,44 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
  }
  EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
+/* Replace a registered old ca with a new one.
+ *
+ * The new ca must have the same name as the old one, that has been
+ * registered.
+ */
+int tcp_update_congestion_control(struct tcp_congestion_ops *ca, struct tcp_congestion_ops *old_ca)
+{
+	struct tcp_congestion_ops *existing;
+	int ret;
+
+	ret = tcp_validate_congestion_control(ca);
+	if (ret)
+		return ret;
+
+	ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
+
+	spin_lock(&tcp_cong_list_lock);
+	existing = tcp_ca_find_key(old_ca->key);
+	if (ca->key == TCP_CA_UNSPEC || !existing || strcmp(existing->name, ca->name)) {
+		pr_notice("%s not registered or non-unique key\n",
+			  ca->name);
+		ret = -EINVAL;
+	} else if (existing != old_ca) {
+		pr_notice("invalid old congestion control algorithm to replace\n");
+		ret = -EINVAL;
+	} else {
+		/* Add the new one before removing the old one to keep
+		 * one implementation available all the time.
+		 */
+		list_add_tail_rcu(&ca->list, &tcp_cong_list);
+		list_del_rcu(&existing->list);
+		pr_debug("%s updated\n", ca->name);
+	}
+	spin_unlock(&tcp_cong_list_lock);
+
+	return ret;
+}

Was wondering if we could have tcp_register_congestion_control and tcp_update_congestion_control
could be refactored for reuse. Maybe like below. From the function itself what is not clear whether
callers that replace an existing one should do the synchronize_rcu() themselves or if this should
be part of tcp_update_congestion_control?

int tcp_check_congestion_control(struct tcp_congestion_ops *ca)
{
	/* All algorithms must implement these. */
	if (!ca->ssthresh || !ca->undo_cwnd ||
	    !(ca->cong_avoid || ca->cong_control)) {
		pr_err("%s does not implement required ops\n", ca->name);
		return -EINVAL;
	}
	if (ca->key == TCP_CA_UNSPEC)
		ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
	if (ca->key == TCP_CA_UNSPEC) {
		pr_notice("%s results in zero key\n", ca->name);
		return -EEXIST;
	}
	return 0;
}

/* Attach new congestion control algorithm to the list of available
 * options or replace an existing one if old is non-NULL.
 */
int tcp_update_congestion_control(struct tcp_congestion_ops *new,
				  struct tcp_congestion_ops *old)
{
	struct tcp_congestion_ops *found;
	int ret;

	ret = tcp_check_congestion_control(new);
	if (ret)
		return ret;
	if (old &&
	    (old->key != new->key ||
	     strcmp(old->name, new->name))) {
		pr_notice("%s & %s have non-matching congestion control names\n",
			  old->name, new->name);
		return -EINVAL;
	}
	spin_lock(&tcp_cong_list_lock);
	found = tcp_ca_find_key(new->key);
	if (old) {
		if (found == old) {
			list_add_tail_rcu(&new->list, &tcp_cong_list);
			list_del_rcu(&old->list);
		} else {
			pr_notice("%s not registered\n", old->name);
			ret = -EINVAL;
		}
	} else {
		if (found) {
			pr_notice("%s already registered\n", new->name);
			ret = -EEXIST;
		} else {
			list_add_tail_rcu(&new->list, &tcp_cong_list);
		}
	}
	spin_unlock(&tcp_cong_list_lock);
	return ret;
}

int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
{
	return tcp_update_congestion_control(ca, NULL);
}
EXPORT_SYMBOL_GPL(tcp_register_congestion_control);



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux