On 25/08/2023 19:26, David Ahern wrote: > On 8/25/23 6:19 PM, David Wei wrote: >> diff --git a/io_uring/zc_rx.c b/io_uring/zc_rx.c >> index 6c57c9b06e05..8cc66731af5b 100644 >> --- a/io_uring/zc_rx.c >> +++ b/io_uring/zc_rx.c >> @@ -10,6 +11,35 @@ >> #include "kbuf.h" >> #include "zc_rx.h" >> >> +typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); >> + >> +static int __io_queue_mgmt(struct net_device *dev, struct io_zc_rx_ifq *ifq, >> + u16 queue_id) >> +{ >> + struct netdev_bpf cmd; >> + bpf_op_t ndo_bpf; >> + >> + ndo_bpf = dev->netdev_ops->ndo_bpf; > > This is not bpf related, so it seems wrong to be overloading this ndo. Agreed. I believe the original author (Jonathan) was inspired by XDP_SETUP_XSK_POOL. I would also prefer a better way of setting up an RX queue for ZC. Mina's proposal I think uses netlink for this. Do you have any other suggestions? We want to keep resource registration inline, so we need to be able to call it from within io_uring. > > >> + if (!ndo_bpf) >> + return -EINVAL; >> + >> + cmd.command = XDP_SETUP_ZC_RX; >> + cmd.zc_rx.ifq = ifq; >> + cmd.zc_rx.queue_id = queue_id; >> + >> + return ndo_bpf(dev, &cmd); >> +} >> + >> +static int io_open_zc_rxq(struct io_zc_rx_ifq *ifq) >> +{ >> + return __io_queue_mgmt(ifq->dev, ifq, ifq->if_rxq_id); >> +} >> + >> +static int io_close_zc_rxq(struct io_zc_rx_ifq *ifq) >> +{ >> + return __io_queue_mgmt(ifq->dev, NULL, ifq->if_rxq_id); >> +} >> + >> static struct io_zc_rx_ifq *io_zc_rx_ifq_alloc(struct io_ring_ctx *ctx) >> { >> struct io_zc_rx_ifq *ifq; >> @@ -19,12 +49,17 @@ static struct io_zc_rx_ifq *io_zc_rx_ifq_alloc(struct io_ring_ctx *ctx) >> return NULL; >> >> ifq->ctx = ctx; >> + ifq->if_rxq_id = -1; >> >> return ifq; >> } >> >> static void io_zc_rx_ifq_free(struct io_zc_rx_ifq *ifq) >> { >> + if (ifq->if_rxq_id != -1) >> + io_close_zc_rxq(ifq); >> + if (ifq->dev) >> + dev_put(ifq->dev); >> io_free_rbuf_ring(ifq); >> kfree(ifq); >> } >> @@ -41,17 +76,22 @@ int io_register_zc_rx_ifq(struct io_ring_ctx *ctx, >> return -EFAULT; >> if (ctx->ifq) >> return -EBUSY; >> + if (reg.if_rxq_id == -1) >> + return -EINVAL; >> >> ifq = io_zc_rx_ifq_alloc(ctx); >> if (!ifq) >> return -ENOMEM; >> >> - /* TODO: initialise network interface */ >> - >> ret = io_allocate_rbuf_ring(ifq, ®); >> if (ret) >> goto err; >> >> + ret = -ENODEV; >> + ifq->dev = dev_get_by_index(&init_net, reg.if_idx); > > What's the plan for other namespaces? Ideally the device bind comes from > a socket and that gives you the namespace. Sorry, I did not consider namespaces yet. I'll look into how namespaces work and then get back to you. > >> + if (!ifq->dev) >> + goto err; >> + >> /* TODO: map zc region and initialise zc pool */ >> >> ifq->rq_entries = reg.rq_entries; > >