Re: [PATCH v3 bpf-next 1/2] libbpf: separate XDP program load with xsk socket creation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Nov 25, 2020 at 3:30 PM Magnus Karlsson
<magnus.karlsson@xxxxxxxxx> wrote:
>
> On Wed, Nov 18, 2020 at 9:34 AM <mariusz.dudek@xxxxxxxxx> wrote:
> >
> > From: Mariusz Dudek <mariuszx.dudek@xxxxxxxxx>
> >
> > Add support for separation of eBPF program load and xsk socket
> > creation.
> >
> > This is needed for use-case when you want to privide as little
> > privileges as possible to the data plane application that will
> > handle xsk socket creation and incoming traffic.
> >
> > With this patch the data entity container can be run with only
> > CAP_NET_RAW capability to fulfill its purpose of creating xsk
> > socket and handling packages. In case your umem is larger or
> > equal process limit for MEMLOCK you need either increase the
> > limit or CAP_IPC_LOCK capability.
> >
> > To resolve privileges issue two APIs are introduced:
> >
> > - xsk_setup_xdp_prog - loads the built in XDP program. It can
> > also return xsks_map_fd which is needed by unprivileged process
> > to update xsks_map with AF_XDP socket "fd"
> >
> > - xsk_socket__update_xskmap - inserts an AF_XDP socket into an xskmap
> > for a particular xsk_socket
> >
> > Signed-off-by: Mariusz Dudek <mariuszx.dudek@xxxxxxxxx>
> > ---
> >  tools/lib/bpf/libbpf.map |  2 +
> >  tools/lib/bpf/xsk.c      | 97 ++++++++++++++++++++++++++++++++++++----
> >  tools/lib/bpf/xsk.h      |  5 +++
> >  3 files changed, 95 insertions(+), 9 deletions(-)
> >
> > diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
> > index 29ff4807b909..d939d5ac092e 100644
> > --- a/tools/lib/bpf/libbpf.map
> > +++ b/tools/lib/bpf/libbpf.map
> > @@ -345,4 +345,6 @@ LIBBPF_0.3.0 {
> >                 btf__parse_split;
> >                 btf__new_empty_split;
> >                 btf__new_split;
> > +               xsk_setup_xdp_prog;
> > +               xsk_socket__update_xskmap;
> >  } LIBBPF_0.2.0;
> > diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
> > index 9bc537d0b92d..e16f920d2ef9 100644
> > --- a/tools/lib/bpf/xsk.c
> > +++ b/tools/lib/bpf/xsk.c
> > @@ -566,8 +566,42 @@ static int xsk_set_bpf_maps(struct xsk_socket *xsk)
> >                                    &xsk->fd, 0);
> >  }
> >
> > -static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
> > +static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
> >  {
> > +       char ifname[IFNAMSIZ];
> > +       struct xsk_ctx *ctx;
> > +       char *interface;
> > +       int res = -1;
>
> No need to set it to -1 anymore, due to the below.
Will fix this
>
> > +
> > +       ctx = calloc(1, sizeof(*ctx));
> > +       if (!ctx)
> > +               goto error_ctx;
>
> return an -ENOMEM here directly.
-ENOMEM will be returned
>
> > +
> > +       interface = if_indextoname(ifindex, &ifname[0]);
> > +       if (!interface) {
> > +               res = -errno;
> > +               goto error_ifindex;
> > +       }
> > +
> > +       ctx->ifindex = ifindex;
> > +       strncpy(ctx->ifname, ifname, IFNAMSIZ - 1);
> > +       ctx->ifname[IFNAMSIZ - 1] = 0;
> > +
> > +       xsk->ctx = ctx;
> > +
> > +       return 0;
> > +
> > +error_ifindex:
> > +       free(ctx);
> > +error_ctx:
>
> And you can get rid of this label.
I will get rid of both labels as I can return either -ENOMEM or -errno
from both places directly
>
> > +       return res;
> > +}
> > +
> > +static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp,
> > +                               bool force_set_map,
>
> force_set_map always seems to be false now. Correct? If it is, then it
> is not needed anymore. What was the original use case of this boolean?
>
force_set_map was used before for setting xsk bpf maps, but after code
change it is no longer needed. I will remove it.
> > +                               int *xsks_map_fd)
> > +{
> > +       struct xsk_socket *xsk = _xdp;
> >         struct xsk_ctx *ctx = xsk->ctx;
> >         __u32 prog_id = 0;
> >         int err;
> > @@ -584,8 +618,7 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
> >
> >                 err = xsk_load_xdp_prog(xsk);
> >                 if (err) {
> > -                       xsk_delete_bpf_maps(xsk);
> > -                       return err;
> > +                       goto err_load_xdp_prog;
> >                 }
> >         } else {
> >                 ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
> > @@ -598,15 +631,29 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
> >                 }
> >         }
> >
> > -       if (xsk->rx)
> > +       if (xsk->rx || force_set_map) {
> >                 err = xsk_set_bpf_maps(xsk);
> > -       if (err) {
> > -               xsk_delete_bpf_maps(xsk);
> > -               close(ctx->prog_fd);
> > -               return err;
> > +               if (err) {
> > +                       if (!prog_id) {
> > +                               goto err_set_bpf_maps;
> > +                       } else {
> > +                               close(ctx->prog_fd);
> > +                               return err;
> > +                       }
> > +               }
> >         }
> > +       if (xsks_map_fd)
> > +               *xsks_map_fd = ctx->xsks_map_fd;
> >
> >         return 0;
> > +
> > +err_set_bpf_maps:
> > +       close(ctx->prog_fd);
> > +       bpf_set_link_xdp_fd(ctx->ifindex, -1, 0);
> > +err_load_xdp_prog:
> > +       xsk_delete_bpf_maps(xsk);
> > +
> > +       return err;
> >  }
> >
> >  static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
> > @@ -689,6 +736,38 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
> >         return ctx;
> >  }
> >
> > +static void xsk_destroy_xsk_struct(struct xsk_socket *xsk)
> > +{
> > +       free(xsk->ctx);
> > +       free(xsk);
> > +}
> > +
> > +int xsk_socket__update_xskmap(struct xsk_socket *xsk, int fd)
> > +{
> > +       xsk->ctx->xsks_map_fd = fd;
> > +       return xsk_set_bpf_maps(xsk);
> > +}
> > +
> > +int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd)
> > +{
> > +       struct xsk_socket *xsk;
> > +       int res = -1;
> > +
> > +       xsk = calloc(1, sizeof(*xsk));
> > +       if (!xsk)
> > +               return res;
> > +
> > +       res = xsk_create_xsk_struct(ifindex, xsk);
> > +       if (res)
> > +               return -EINVAL;
>
> Here you can now return the error from the function, i.e. return res,
> as we returned -ENOMEM in that function. You are however leaking the
> xsk struct you just allocated in case of error. Needs to be
> deallocated.
>
xsk struct deallocated. -ENOMEM returned in case calloc fails.
> > +
> > +       res = __xsk_setup_xdp_prog(xsk, false, xsks_map_fd);
> > +
> > +       xsk_destroy_xsk_struct(xsk);
> > +
> > +       return res;
> > +}
> > +
> >  int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
> >                               const char *ifname,
> >                               __u32 queue_id, struct xsk_umem *umem,
> > @@ -838,7 +917,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
> >         ctx->prog_fd = -1;
> >
> >         if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
> > -               err = xsk_setup_xdp_prog(xsk);
> > +               err = __xsk_setup_xdp_prog(xsk, false, NULL);
> >                 if (err)
> >                         goto out_mmap_tx;
> >         }
> > diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
> > index 1069c46364ff..5b74c17ed3d4 100644
> > --- a/tools/lib/bpf/xsk.h
> > +++ b/tools/lib/bpf/xsk.h
> > @@ -201,6 +201,11 @@ struct xsk_umem_config {
> >         __u32 flags;
> >  };
> >
> > +LIBBPF_API int xsk_setup_xdp_prog(int ifindex,
> > +                                 int *xsks_map_fd);
> > +LIBBPF_API int xsk_socket__update_xskmap(struct xsk_socket *xsk,
> > +                                int xsks_map_fd);
> > +
> >  /* Flags for the libbpf_flags field. */
> >  #define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0)
> >
> > --
> > 2.20.1
> >



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux