Re: [PATCH v5 bpf-next 4/7] bpf: udp: Implement batching for sockets iterator

Martin KaFai Lau <martin.lau@xxxxxxxxx> · Fri, 31 Mar 2023 14:08:35 -0700




On 3/30/23 8:17 AM, Aditi Ghag wrote:
+static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter,
+				      unsigned int new_batch_sz);
  
  static inline bool seq_sk_match(struct seq_file *seq, const struct sock *sk)
  {
@@ -3151,6 +3163,149 @@ static inline bool seq_sk_match(struct seq_file *seq, const struct sock *sk)
  		net_eq(sock_net(sk), seq_file_net(seq)));
  }
  
+static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
+{
+	struct bpf_udp_iter_state *iter = seq->private;
+	struct udp_iter_state *state = &iter->state;
+	struct net *net = seq_file_net(seq);
+	struct sock *first_sk = NULL;
+	struct udp_seq_afinfo afinfo;
+	struct udp_table *udptable;
+	unsigned int batch_sks = 0;
+	bool resized = false;
+	struct sock *sk;
+	int offset = 0;
+	int new_offset;
+
+	/* The current batch is done, so advance the bucket. */
+	if (iter->st_bucket_done) {
+		state->bucket++;
+		iter->offset = 0;
+	}
+
+	afinfo.family = AF_UNSPEC;
+	afinfo.udp_table = NULL;
+	udptable = udp_get_table_afinfo(&afinfo, net);
+
+	if (state->bucket > udptable->mask) {

This test looks unnecessary. The for-loop below should take care of this case?

+		state->bucket = 0;

Reset state->bucket here looks suspicious (or at least unnecessary) also. The 
iterator cannot restart from the beginning. or I am missing something here? This 
at least requires a comment if it is really needed.

+		iter->offset = 0;
+		return NULL;
+	}
+
+again:
+	/* New batch for the next bucket.
+	 * Iterate over the hash table to find a bucket with sockets matching
+	 * the iterator attributes, and return the first matching socket from
+	 * the bucket. The remaining matched sockets from the bucket are batched
+	 * before releasing the bucket lock. This allows BPF programs that are
+	 * called in seq_show to acquire the bucket lock if needed.
+	 */
+	iter->cur_sk = 0;
+	iter->end_sk = 0;
+	iter->st_bucket_done = false;
+	first_sk = NULL;
+	batch_sks = 0;
+	offset = iter->offset;
+
+	for (; state->bucket <= udptable->mask; state->bucket++) {
+		struct udp_hslot *hslot2 = &udptable->hash2[state->bucket];
+
+		if (hlist_empty(&hslot2->head)) {
+			offset = 0;
+			continue;
+		}
+		new_offset = offset;
+
+		spin_lock_bh(&hslot2->lock);
+		udp_portaddr_for_each_entry(sk, &hslot2->head) {
+			if (seq_sk_match(seq, sk)) {
+				/* Resume from the last iterated socket at the
+				 * offset in the bucket before iterator was stopped.
+				 */
+				if (offset) {
+					--offset;
+					continue;
+				}
+				if (!first_sk)
+					first_sk = sk;
+				if (iter->end_sk < iter->max_sk) {
+					sock_hold(sk);
+					iter->batch[iter->end_sk++] = sk;
+				}
+				batch_sks++;
+				new_offset++;
+			}
+		}
+		spin_unlock_bh(&hslot2->lock);
+
+		if (first_sk)
+			break;
+
+		/* Reset the current bucket's offset before moving to the next bucket. */
+		offset = 0;
+	}
+
+	/* All done: no batch made. */
+	if (!first_sk)

Testing !iter->end_sk should be the same?

+		goto ret;
+
+	if (iter->end_sk == batch_sks) {
+		/* Batching is done for the current bucket; return the first
+		 * socket to be iterated from the batch.
+		 */
+		iter->st_bucket_done = true;
+		goto ret;
+	}
+	if (!resized && !bpf_iter_udp_realloc_batch(iter, batch_sks * 3 / 2)) {
+		resized = true;
+		/* Go back to the previous bucket to resize its batch. */
+		state->bucket--;
+		goto again;
+	}
+ret:
+	iter->offset = new_offset;

hmm... updating iter->offset looks not right and,
does it need a new_offset?

afaict, either

a) it can resume at the old bucket. In this case, the iter->offset should not 
change.

or

b) it moved to the next bucket and iter->offset should be 0.

+	return first_sk;

&iter->batch[0] is the first_sk. 'first_sk' variable is not needed then.