Re: [PATCH v2 bpf-next 1/3] bpf: Implement batching in UDP iterator

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2/23/23 1:53 PM, Aditi Ghag wrote:
+struct bpf_udp_iter_state {
+	struct udp_iter_state state;
+	unsigned int cur_sk;
+	unsigned int end_sk;
+	unsigned int max_sk;
+	struct sock **batch;
+	bool st_bucket_done;
+};
+
+static unsigned short seq_file_family(const struct seq_file *seq);
+static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter,
+				      unsigned int new_batch_sz);
+
+static inline bool seq_sk_match(struct seq_file *seq, const struct sock *sk)
+{
+	unsigned short family = seq_file_family(seq);
+
+	/* AF_UNSPEC is used as a match all */
+	return ((family == AF_UNSPEC || family == sk->sk_family) &&
+		net_eq(sock_net(sk), seq_file_net(seq)));
+}
+
+static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
+{
+	struct bpf_udp_iter_state *iter = seq->private;
+	struct udp_iter_state *state = &iter->state;
+	struct net *net = seq_file_net(seq);
+	struct udp_seq_afinfo *afinfo = state->bpf_seq_afinfo;
+	struct udp_table *udptable;
+	struct sock *first_sk = NULL;
+	struct sock *sk;
+	unsigned int bucket_sks = 0;
+	bool first;
+	bool resized = false;
+
+	/* The current batch is done, so advance the bucket. */
+	if (iter->st_bucket_done)
+		state->bucket++;
+
+	udptable = udp_get_table_afinfo(afinfo, net);
+
+again:
+	/* New batch for the next bucket.
+	 * Iterate over the hash table to find a bucket with sockets matching
+	 * the iterator attributes, and return the first matching socket from
+	 * the bucket. The remaining matched sockets from the bucket are batched
+	 * before releasing the bucket lock. This allows BPF programs that are
+	 * called in seq_show to acquire the bucket lock if needed.
+	 */
+	iter->cur_sk = 0;
+	iter->end_sk = 0;
+	iter->st_bucket_done = false;
+	first = true;
+
+	for (; state->bucket <= udptable->mask; state->bucket++) {
+		struct udp_hslot *hslot = &udptable->hash[state->bucket];

Since it is mostly separated from the proc's udp-seq-file now, may as well iterate the udptable->hash"2" which is hashed by both addr and port such that each batch should be smaller.

+
+		if (hlist_empty(&hslot->head))
+			continue;
+
+		spin_lock_bh(&hslot->lock);
+		sk_for_each(sk, &hslot->head) {
+			if (seq_sk_match(seq, sk)) {
+				if (first) {
+					first_sk = sk;
+					first = false;
+				}
+				if (iter->end_sk < iter->max_sk) {
+					sock_hold(sk);
+					iter->batch[iter->end_sk++] = sk;
+				}
+				bucket_sks++;
+			}
+		}
+		spin_unlock_bh(&hslot->lock);
+		if (first_sk)
+			break;
+	}
+
+	/* All done: no batch made. */
+	if (!first_sk)
+		return NULL;

I think first_sk and bucket_sks need to be reset on the "again" case also?

If bpf_iter_udp_seq_stop() is called before a batch has been fully processed by the bpf prog in ".show", how does the next bpf_iter_udp_seq_start() continue from where it left off? The bpf_tcp_iter remembers the bucket and the offset-in-this-bucket. I think bpf_udp_iter can do something similar.

+
+	if (iter->end_sk == bucket_sks) {
+		/* Batching is done for the current bucket; return the first
+		 * socket to be iterated from the batch.
+		 */
+		iter->st_bucket_done = true;
+		return first_sk;
+	}
+	if (!resized && !bpf_iter_udp_realloc_batch(iter, bucket_sks * 3 / 2)) {
+		resized = true;
+		/* Go back to the previous bucket to resize its batch. */
+		state->bucket--;
+		goto again;
+	}
+	return first_sk;
+}
+

[ ... ]

  static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux)
  {
-	struct udp_iter_state *st = priv_data;
+	struct bpf_udp_iter_state *iter = priv_data;
+	struct udp_iter_state *st = &iter->state;
  	struct udp_seq_afinfo *afinfo;
  	int ret;
@@ -3427,24 +3623,34 @@ static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux)
  	afinfo->udp_table = NULL;
  	st->bpf_seq_afinfo = afinfo;

Is bpf_seq_afinfo still needed in 'struct udp_iter_state'? Can it be removed?

  	ret = bpf_iter_init_seq_net(priv_data, aux);
-	if (ret)
+	if (ret) {
  		kfree(afinfo);
+		return ret;
+	}
+	ret = bpf_iter_udp_realloc_batch(iter, INIT_BATCH_SZ);
+	if (ret) {
+		bpf_iter_fini_seq_net(priv_data);
+		return ret;
+	}
+	iter->cur_sk = 0;
+	iter->end_sk = 0;
+
  	return ret;
  }
static void bpf_iter_fini_udp(void *priv_data)
  {
-	struct udp_iter_state *st = priv_data;
+	struct bpf_udp_iter_state *iter = priv_data;
- kfree(st->bpf_seq_afinfo);
  	bpf_iter_fini_seq_net(priv_data);
+	kfree(iter->batch);
kvfree




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux