Re: [PATCH bpf 1/2] bpf: Avoid iter->offset making backward progress in bpf_iter_udp

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 12/20/23 11:10 AM, Martin KaFai Lau wrote:
Good catch. It will unnecessary skip in the following batch/bucket if there is changes in the current batch/bucket.

From looking at the loop again, I think it is better not to change the iter->offset during the for loop. Only update iter->offset after the for loop has concluded.

The non-zero iter->offset is only useful for the first bucket, so does a test on the first bucket (state->bucket == bucket) before skipping sockets. Something like this:

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 89e5a806b82e..a993f364d6ae 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -3139,6 +3139,7 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
      struct net *net = seq_file_net(seq);
      struct udp_table *udptable;
      unsigned int batch_sks = 0;
+    int bucket, bucket_offset;
      bool resized = false;
      struct sock *sk;

@@ -3162,14 +3163,14 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
      iter->end_sk = 0;
      iter->st_bucket_done = false;
      batch_sks = 0;
+    bucket = state->bucket;
+    bucket_offset = 0;

      for (; state->bucket <= udptable->mask; state->bucket++) {
          struct udp_hslot *hslot2 = &udptable->hash2[state->bucket];

-        if (hlist_empty(&hslot2->head)) {
-            iter->offset = 0;
+        if (hlist_empty(&hslot2->head))
              continue;
-        }

          spin_lock_bh(&hslot2->lock);
          udp_portaddr_for_each_entry(sk, &hslot2->head) {
@@ -3177,8 +3178,9 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
                  /* Resume from the last iterated socket at the
                   * offset in the bucket before iterator was stopped.
                   */
-                if (iter->offset) {
-                    --iter->offset;
+                if (state->bucket == bucket &&
+                    bucket_offset < iter->offset) {
+                    ++bucket_offset;
                      continue;
                  }
                  if (iter->end_sk < iter->max_sk) {
@@ -3192,10 +3194,10 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)

          if (iter->end_sk)
              break;
+    }

-        /* Reset the current bucket's offset before moving to the next bucket. */
+    if (state->bucket != bucket)
          iter->offset = 0;
-    }

      /* All done: no batch made. */
      if (!iter->end_sk)

I think I found another bug in the current bpf_iter_udp_batch(). The "state->bucket--;" at the end of the batch() function is wrong also. It does not need to go back to the previous bucket. After realloc with a larger batch array, it should retry on the "state->bucket" as is. I tried to force the bind() to use bucket 0 and bind a larger so_reuseport set (24 sockets). WARN_ON(state->bucket < 0) triggered.

Going back to this bug (backward progress on --iter->offset), I think it is a bit cleaner to always reset iter->offset to 0 and advance iter->offset to the resume_offset only when needed. Something like this:

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 89e5a806b82e..184aa966a006 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -3137,16 +3137,18 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
 	struct bpf_udp_iter_state *iter = seq->private;
 	struct udp_iter_state *state = &iter->state;
 	struct net *net = seq_file_net(seq);
+	int resume_bucket, resume_offset;
 	struct udp_table *udptable;
 	unsigned int batch_sks = 0;
 	bool resized = false;
 	struct sock *sk;

+	resume_bucket = state->bucket;
+	resume_offset = iter->offset;
+
 	/* The current batch is done, so advance the bucket. */
-	if (iter->st_bucket_done) {
+	if (iter->st_bucket_done)
 		state->bucket++;
-		iter->offset = 0;
-	}

 	udptable = udp_get_table_seq(seq, net);

@@ -3166,10 +3168,9 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
 	for (; state->bucket <= udptable->mask; state->bucket++) {
 		struct udp_hslot *hslot2 = &udptable->hash2[state->bucket];

-		if (hlist_empty(&hslot2->head)) {
-			iter->offset = 0;
+		iter->offset = 0;
+		if (hlist_empty(&hslot2->head))
 			continue;
-		}

 		spin_lock_bh(&hslot2->lock);
 		udp_portaddr_for_each_entry(sk, &hslot2->head) {
@@ -3177,8 +3178,9 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
 				/* Resume from the last iterated socket at the
 				 * offset in the bucket before iterator was stopped.
 				 */
-				if (iter->offset) {
-					--iter->offset;
+				if (state->bucket == resume_bucket &&
+				    iter->offset < resume_offset) {
+					++iter->offset;
 					continue;
 				}
 				if (iter->end_sk < iter->max_sk) {
@@ -3192,9 +3194,6 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)

 		if (iter->end_sk)
 			break;
-
-		/* Reset the current bucket's offset before moving to the next bucket. */
-		iter->offset = 0;
 	}

 	/* All done: no batch made. */
@@ -3210,10 +3209,6 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
 	}
 	if (!resized && !bpf_iter_udp_realloc_batch(iter, batch_sks * 3 / 2)) {
 		resized = true;
-		/* After allocating a larger batch, retry one more time to grab
-		 * the whole bucket.
-		 */
-		state->bucket--;
 		goto again;
 	}
 done:





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux