Re: [PATCH bpf-next v9 1/5] bpf: Add "live packet" mode for XDP in BPF_PROG_RUN

Toke Høiland-Jørgensen <toke@xxxxxxxxxx> · Tue, 08 Mar 2022 15:31:24 +0100

Martin KaFai Lau <kafai@xxxxxx> writes:

> On Sun, Mar 06, 2022 at 11:34:00PM +0100, Toke Høiland-Jørgensen wrote:
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index 4eebea830613..a36065872882 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -1232,6 +1232,10 @@ enum {
>>  
>>  /* If set, run the test on the cpu specified by bpf_attr.test.cpu */
>>  #define BPF_F_TEST_RUN_ON_CPU	(1U << 0)
>> +/* Guaranteed to be rejected in XDP tests (for probing) */
>> +#define BPF_F_TEST_XDP_RESERVED	(1U << 1)
>> +/* If set, XDP frames will be transmitted after processing */
>> +#define BPF_F_TEST_XDP_LIVE_FRAMES	(1U << 2)
>>  
>>  /* type for BPF_ENABLE_STATS */
>>  enum bpf_stats_type {
>> @@ -1393,6 +1397,7 @@ union bpf_attr {
>>  		__aligned_u64	ctx_out;
>>  		__u32		flags;
>>  		__u32		cpu;
>> +		__u32		batch_size;
>>  	} test;
>>  
>>  	struct { /* anonymous struct used by BPF_*_GET_*_ID */
>
> [ ... ]
>
>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>> index db402ebc5570..9beb585be5a6 100644
>> --- a/kernel/bpf/syscall.c
>> +++ b/kernel/bpf/syscall.c
>> @@ -3336,7 +3336,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
>>  	}
>>  }
>>  
>> -#define BPF_PROG_TEST_RUN_LAST_FIELD test.cpu
>> +#define BPF_PROG_TEST_RUN_LAST_FIELD test.batch_size
> Instead of adding BPF_F_TEST_XDP_RESERVED,
> probing by non-zero batch_size (== 1) should be as good?

Hmm, yeah, good point; added the RESERVED flag before the batch_size;
guess it's not needed anymore.

> [ ... ]
>
>> +static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog,
>> +			      u32 repeat)
>> +{
>> +	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
>> +	int err = 0, act, ret, i, nframes = 0, batch_sz;
>> +	struct xdp_frame **frames = xdp->frames;
>> +	struct xdp_page_head *head;
>> +	struct xdp_frame *frm;
>> +	bool redirect = false;
>> +	struct xdp_buff *ctx;
>> +	struct page *page;
>> +
>> +	batch_sz = min_t(u32, repeat, xdp->batch_size);
>> +
>> +	local_bh_disable();
>> +	xdp_set_return_frame_no_direct();
>> +
>> +	for (i = 0; i < batch_sz; i++) {
>> +		page = page_pool_dev_alloc_pages(xdp->pp);
>> +		if (!page) {
>> +			err = -ENOMEM;
>> +			goto out;
>> +		}
>> +
>> +		head = phys_to_virt(page_to_phys(page));
>> +		reset_ctx(head);
>> +		ctx = &head->ctx;
>> +		frm = &head->frm;
>> +		xdp->frame_cnt++;
>> +
>> +		act = bpf_prog_run_xdp(prog, ctx);
>> +
>> +		/* if program changed pkt bounds we need to update the xdp_frame */
>> +		if (unlikely(ctx_was_changed(head))) {
>> +			err = xdp_update_frame_from_buff(ctx, frm);
>> +			if (err) {
>> +				xdp_return_buff(ctx);
>> +				continue;
>> +			}
>> +		}
>> +
>> +		switch (act) {
>> +		case XDP_TX:
>> +			/* we can't do a real XDP_TX since we're not in the
>> +			 * driver, so turn it into a REDIRECT back to the same
>> +			 * index
>> +			 */
>> +			ri->tgt_index = xdp->dev->ifindex;
>> +			ri->map_id = INT_MAX;
>> +			ri->map_type = BPF_MAP_TYPE_UNSPEC;
>> +			fallthrough;
>> +		case XDP_REDIRECT:
>> +			redirect = true;
>> +			err = xdp_do_redirect_frame(xdp->dev, ctx, frm, prog);
> err of the previous iteration is over written.
>
>> +			if (err)
>> +				xdp_return_buff(ctx);
>> +			break;
>> +		case XDP_PASS:
>> +			frames[nframes++] = frm;
>> +			break;
>> +		default:
>> +			bpf_warn_invalid_xdp_action(NULL, prog, act);
>> +			fallthrough;
>> +		case XDP_DROP:
>> +			xdp_return_buff(ctx);
>> +			break;
>> +		}
>> +	}
>> +
>> +out:
>> +	if (redirect)
>> +		xdp_do_flush();
>> +	if (nframes) {
>> +		ret = xdp_recv_frames(frames, nframes, xdp->skbs, xdp->dev);
>> +		if (ret)
>> +			err = ret;
> but here is trying to avoid over writing the err if possible.
>
>> +	}
>> +
>> +	xdp_clear_return_frame_no_direct();
>> +	local_bh_enable();
>> +	return err;
> so only err happens in the last iteration will break the loop in
> bpf_test_run_xdp_live()?

Ah, excellent catch. This is also an artefact of earlier revisions where
any error would break the loop. Since that is no longer the case, we
should only propagate fatal errors (i.e., memory allocation errors
during the run); will fix!

-Toke