On Fri, 28 Jun 2024 00:32:40 +0000 Mina Almasry wrote: > +/* Protected by rtnl_lock() */ > +static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1); > + > +void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) > +{ > + struct netdev_rx_queue *rxq; > + unsigned long xa_idx; > + unsigned int rxq_idx; > + > + if (!binding) > + return; nit: I don't see how it can happen, no defensive programming, please > + if (binding->list.next) > + list_del(&binding->list); > + > + xa_for_each(&binding->bound_rxq_list, xa_idx, rxq) { nit: s/bound_rxq_list/bound_rxqs/ ? it's not a list > + if (rxq->mp_params.mp_priv == binding) { > + /* We hold the rtnl_lock while binding/unbinding > + * dma-buf, so we can't race with another thread that > + * is also modifying this value. However, the page_pool > + * may read this config while it's creating its > + * rx-queues. WRITE_ONCE() here to match the > + * READ_ONCE() in the page_pool. > + */ > + WRITE_ONCE(rxq->mp_params.mp_priv, NULL); Is this really sufficient in terms of locking? @binding is not RCU-protected and neither is the reader guaranteed to be in an RCU critical section. Actually the "reader" tries to take a ref and use this struct so it's not even a pure reader. Let's add a lock or use one of the existing locks Or, perhaps time to add a mutex to struct net_device > + rxq_idx = get_netdev_rx_queue_index(rxq); > + > + netdev_rx_queue_restart(binding->dev, rxq_idx); > + } > + } > + > + xa_erase(&net_devmem_dmabuf_bindings, binding->id); > + > + net_devmem_dmabuf_binding_put(binding); > +} > + > +int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, > + struct net_devmem_dmabuf_binding *binding) > +{ > + struct netdev_rx_queue *rxq; > + u32 xa_idx; > + int err; > + > + if (rxq_idx >= dev->num_rx_queues) > + return -ERANGE; > + > + rxq = __netif_get_rx_queue(dev, rxq_idx); > + if (rxq->mp_params.mp_priv) > + return -EEXIST; Makes me wonder - do we need an API to unbind or we assume application will only have one binding per socket and close it every time? I guess that's fine for future extension. > + err = xa_alloc(&binding->bound_rxq_list, &xa_idx, rxq, xa_limit_32b, > + GFP_KERNEL); > + if (err) > + return err; > + > + /* We hold the rtnl_lock while binding/unbinding dma-buf, so we can't > + * race with another thread that is also modifying this value. However, > + * the driver may read this config while it's creating its * rx-queues. > + * WRITE_ONCE() here to match the READ_ONCE() in the driver. > + */ > + WRITE_ONCE(rxq->mp_params.mp_priv, binding); > + > + err = netdev_rx_queue_restart(dev, rxq_idx); > + if (err) > + goto err_xa_erase; > + > + return 0; > + > +err_xa_erase: > + WRITE_ONCE(rxq->mp_params.mp_priv, NULL); > + xa_erase(&binding->bound_rxq_list, xa_idx); > + > + return err; > +} > + > +int net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, > + struct net_devmem_dmabuf_binding **out) > +{ > + struct net_devmem_dmabuf_binding *binding; > + static u32 id_alloc_next; > + struct scatterlist *sg; > + struct dma_buf *dmabuf; > + unsigned int sg_idx, i; > + unsigned long virtual; > + int err; > + > + dmabuf = dma_buf_get(dmabuf_fd); > + if (IS_ERR(dmabuf)) > + return -EBADFD; nit: I think error pointers are nicer than **out parameters :( you can ERR_CAST() all the DMABUF errors > + binding = kzalloc_node(sizeof(*binding), GFP_KERNEL, > + dev_to_node(&dev->dev)); > + if (!binding) { > + err = -ENOMEM; > + goto err_put_dmabuf; > + } > + > + binding->dev = dev; > + > + err = xa_alloc_cyclic(&net_devmem_dmabuf_bindings, &binding->id, > + binding, xa_limit_32b, &id_alloc_next, > + GFP_KERNEL); > + if (err < 0) > + goto err_free_binding; > + > + xa_init_flags(&binding->bound_rxq_list, XA_FLAGS_ALLOC); > + > + refcount_set(&binding->ref, 1); > + > + binding->dmabuf = dmabuf; > + > + binding->attachment = dma_buf_attach(binding->dmabuf, dev->dev.parent); > + if (IS_ERR(binding->attachment)) { > + err = PTR_ERR(binding->attachment); > + goto err_free_id; > + } > -/* Stub */ > int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) > { > - return 0; > + struct nlattr *tb[ARRAY_SIZE(netdev_queue_dmabuf_nl_policy)]; > + struct net_devmem_dmabuf_binding *out_binding; > + struct list_head *sock_binding_list; > + u32 ifindex, dmabuf_fd, rxq_idx; > + struct net_device *netdev; > + struct sk_buff *rsp; > + struct nlattr *attr; > + int rem, err = 0; > + void *hdr; > + > + if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) || > + GENL_REQ_ATTR_CHECK(info, NETDEV_A_BIND_DMABUF_DMABUF_FD) || > + GENL_REQ_ATTR_CHECK(info, NETDEV_A_BIND_DMABUF_QUEUES)) > + return -EINVAL; > + > + ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]); > + dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_BIND_DMABUF_DMABUF_FD]); > + > + rtnl_lock(); > + > + netdev = __dev_get_by_index(genl_info_net(info), ifindex); > + if (!netdev) { || !netif_device_present(netdev) > + err = -ENODEV; > + goto err_unlock; > + } > + > + err = net_devmem_bind_dmabuf(netdev, dmabuf_fd, &out_binding); > + if (err) > + goto err_unlock; > + > + nla_for_each_attr(attr, genlmsg_data(info->genlhdr), > + genlmsg_len(info->genlhdr), rem) { > + > + if (nla_type(attr) != NETDEV_A_BIND_DMABUF_QUEUES) > + continue; nit: nla_for_each_attr_type() > + err = nla_parse_nested( > + tb, ARRAY_SIZE(netdev_queue_dmabuf_nl_policy) - 1, attr, > + netdev_queue_dmabuf_nl_policy, info->extack); > + if (err < 0) > + goto err_unbind; > + > + rxq_idx = nla_get_u32(tb[NETDEV_A_QUEUE_DMABUF_IDX]); > + > + err = net_devmem_bind_dmabuf_to_queue(netdev, rxq_idx, > + out_binding); > + if (err) > + goto err_unbind; > + } > + > + sock_binding_list = genl_sk_priv_get(&netdev_nl_family, > + NETLINK_CB(skb).sk); > + if (IS_ERR(sock_binding_list)) { > + err = PTR_ERR(sock_binding_list); > + goto err_unbind; > + } > + > + list_add(&out_binding->list, sock_binding_list); > + > + rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); > + if (!rsp) { > + err = -ENOMEM; > + goto err_unbind; > + } > + > + hdr = genlmsg_iput(rsp, info); > + if (!hdr) { > + err = -EMSGSIZE; > + goto err_genlmsg_free; > + } I'd move genl_sk_priv_get(), genlmsg_new() and genlmsg_iput() before we take rtnl_lock(), but I admit it's a bit late for this sort of feedback.. :) > + nla_put_u32(rsp, NETDEV_A_BIND_DMABUF_DMABUF_ID, out_binding->id); > + genlmsg_end(rsp, hdr); > + > + rtnl_unlock(); > + > + return genlmsg_reply(rsp, info); > + > +err_genlmsg_free: > + nlmsg_free(rsp); > +err_unbind: > + net_devmem_unbind_dmabuf(out_binding); > +err_unlock: > + rtnl_unlock(); > + return err; > }