On 12/12/22 6:35 PM, Stanislav Fomichev wrote:
New flag BPF_F_XDP_DEV_BOUND_ONLY plus all the infra to have a way
to associate a netdev with a BPF program at load time.
Some existing 'offloaded' routines are renamed to 'dev_bound' for
consistency with the rest.
Also moved a bunch of code around to avoid forward declarations.
There are too many things in one patch. It becomes quite hard to follow, eg. I
have to go back-and-forth a few times within this patch to confirm what change
is just a move. Please put the "moved a bunch of code around to avoid forward
declarations" in one individual patch and also the
"late_initcall(bpf_offload_init)" change in another individual patch.
[ ... ]
-int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
+static int __bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
+ struct net_device *netdev)
+{
+ struct bpf_offload_netdev *ondev;
+ int err;
+
+ ondev = kzalloc(sizeof(*ondev), GFP_KERNEL);
+ if (!ondev)
+ return -ENOMEM;
+
+ ondev->netdev = netdev;
+ ondev->offdev = offdev;
+ INIT_LIST_HEAD(&ondev->progs);
+ INIT_LIST_HEAD(&ondev->maps);
+
+ err = rhashtable_insert_fast(&offdevs, &ondev->l, offdevs_params);
+ if (err) {
+ netdev_warn(netdev, "failed to register for BPF offload\n");
+ goto err_unlock_free;
+ }
+
+ if (offdev)
+ list_add(&ondev->offdev_netdevs, &offdev->netdevs);
+ return 0;
+
+err_unlock_free:
+ up_write(&bpf_devs_lock);
No need to handle bpf_devs_lock in the "__" version of the register() helper?
The goto label probably also needs another name, eg. "err_free".
+ kfree(ondev);
+ return err;
+}
+
[ ... ]
+int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr)
{
struct bpf_offload_netdev *ondev;
struct bpf_prog_offload *offload;
@@ -87,7 +198,7 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
attr->prog_type != BPF_PROG_TYPE_XDP)
return -EINVAL;
- if (attr->prog_flags)
+ if (attr->prog_flags & ~BPF_F_XDP_DEV_BOUND_ONLY)
return -EINVAL;
offload = kzalloc(sizeof(*offload), GFP_USER);
@@ -102,11 +213,25 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
if (err)
goto err_maybe_put;
+ prog->aux->offload_requested = !(attr->prog_flags & BPF_F_XDP_DEV_BOUND_ONLY);
+
down_write(&bpf_devs_lock);
ondev = bpf_offload_find_netdev(offload->netdev);
if (!ondev) {
- err = -EINVAL;
- goto err_unlock;
+ if (!bpf_prog_is_offloaded(prog->aux)) {
+ /* When only binding to the device, explicitly
+ * create an entry in the hashtable. See related
+ * bpf_dev_bound_try_remove_netdev.
+ */
+ err = __bpf_offload_dev_netdev_register(NULL, offload->netdev);
+ if (err)
+ goto err_unlock;
+ ondev = bpf_offload_find_netdev(offload->netdev);
+ }
+ if (!ondev) {
nit. A bit confusing because the "ondev = bpf_offload_find_netdev(...)" above
should not fail but "!ondev" is tested again here. I think the intention is to
fail on the 'bpf_prog_is_offloaded() == true' case. May be:
if (bpf_prog_is_offloaded(prog->aux)) {
err = -EINVAL;
goto err_unlock;
}
/* When only binding to the device, explicitly
* ...
*/
err = __bpf_offload_dev_netdev_register(NULL, offload->netdev);
if (err)
goto err_unlock;
ondev = bpf_offload_find_netdev(offload->netdev);
+ err = -EINVAL;
+ goto err_unlock;
+ }
}
offload->offdev = ondev->offdev;
prog->aux->offload = offload;
@@ -209,27 +334,28 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
up_read(&bpf_devs_lock);
}
-static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
+static void bpf_dev_bound_try_remove_netdev(struct net_device *dev)
{
- struct bpf_prog_offload *offload = prog->aux->offload;
-
- if (offload->dev_state)
- offload->offdev->ops->destroy(prog);
+ struct bpf_offload_netdev *ondev;
- /* Make sure BPF_PROG_GET_NEXT_ID can't find this dead program */
- bpf_prog_free_id(prog, true);
+ if (!dev)
+ return;
- list_del_init(&offload->offloads);
- kfree(offload);
- prog->aux->offload = NULL;
+ ondev = bpf_offload_find_netdev(dev);
+ if (ondev && !ondev->offdev && list_empty(&ondev->progs))
hmm....list_empty(&ondev->progs) is tested here but will it be empty? ...
+ __bpf_offload_dev_netdev_unregister(NULL, dev);
}
-void bpf_prog_offload_destroy(struct bpf_prog *prog)
+void bpf_prog_dev_bound_destroy(struct bpf_prog *prog)
{
+ rtnl_lock();
down_write(&bpf_devs_lock);
- if (prog->aux->offload)
- __bpf_prog_offload_destroy(prog);
+ if (prog->aux->offload) {
+ bpf_dev_bound_try_remove_netdev(prog->aux->offload->netdev);
... the "prog" here is still linked to ondev->progs, right?
because __bpf_prog_dev_bound_destroy() is called later below.
nit. May be the bpf_dev_bound_try_remove_netdev() should be folded/merged back
into bpf_prog_dev_bound_destroy() to make things more clear.
+ __bpf_prog_dev_bound_destroy(prog); > + }
up_write(&bpf_devs_lock);
+ rtnl_unlock();
}
[ ... ]
+static int __init bpf_offload_init(void)
+{
+ int err;
+
+ down_write(&bpf_devs_lock);
lock is probably not needed.
+ err = rhashtable_init(&offdevs, &offdevs_params);
+ up_write(&bpf_devs_lock);
+
+ return err;
+}
+
+late_initcall(bpf_offload_init);
[ ... ]
diff --git a/net/core/dev.c b/net/core/dev.c
index 5d51999cba30..194f8116aad4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -9228,6 +9228,10 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
NL_SET_ERR_MSG(extack, "Using offloaded program without HW_MODE flag is not supported");
return -EINVAL;
}
+ if (bpf_prog_is_dev_bound(new_prog->aux) && !bpf_offload_dev_match(new_prog, dev)) {
+ NL_SET_ERR_MSG(extack, "Program bound to different device");
+ return -EINVAL;
+ }
if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) {
NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device");
return -EINVAL;
@@ -10813,6 +10817,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
/* Shutdown queueing discipline. */
dev_shutdown(dev);
+ bpf_dev_bound_netdev_unregister(dev);
Does it matter if bpf_dev_bound_netdev_unregister(dev) is called before
dev_xdp_uninstall(dev)? Asking because it seems more logic to unregister dev
after detaching xdp progs.
dev_xdp_uninstall(dev);
netdev_offload_xstats_disable_all(dev);