Re: [RFC PATCH 4/4] qemu_command: Added "ebpf_rss_fds" support for virtio-net.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi all,

On Mon, Oct 9, 2023 at 1:54 PM Peter Krempa <pkrempa@xxxxxxxxxx> wrote:
>
> On Mon, Oct 09, 2023 at 09:16:14 +0300, Andrew Melnychenko wrote:
> > Added logic for loading the "RSS" eBPF program.
> > eBPF file descriptors passed to the QEMU.
> >
> > Signed-off-by: Andrew Melnychenko <andrew@xxxxxxxxxx>
> > ---
> >  src/qemu/qemu_command.c | 53 +++++++++++++++++++++++++++++++++++++++++
> >  src/qemu/qemu_domain.c  |  4 ++++
> >  src/qemu/qemu_domain.h  |  3 +++
> >  3 files changed, 60 insertions(+)
>
> This will require addition to qemuxml2argvtest once the qemu bit is
> ready.
>
>
> > diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
> > index 8a7b80719f..ca6cb1bc7a 100644
> > --- a/src/qemu/qemu_command.c
> > +++ b/src/qemu/qemu_command.c
> > @@ -3786,6 +3786,23 @@ qemuBuildLegacyNicStr(virDomainNetDef *net)
> >                             NULLSTR_EMPTY(net->info.alias));
> >  }
> >
> > +static char *qemuBuildEbpfRssArg(virDomainNetDef *net) {
> > +    qemuDomainNetworkPrivate *netpriv = QEMU_DOMAIN_NETWORK_PRIVATE(net);
> > +    g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
> > +    size_t nfds;
> > +    GSList *n;
> > +
> > +    if (netpriv->ebpfrssfds) {
>
> This check is not needed.
>
> > +        nfds = 0;
> > +        for (n = netpriv->ebpfrssfds; n; n = n->next) {
> > +            virBufferAsprintf(&buf, "%s:", qemuFDPassDirectGetPath(n->data));
> > +            nfds++;
>
> 'nfds' is not used.
>
>
> > +        }
> > +    }
> > +    virBufferTrim(&buf, ":");
> > +
> > +    return virBufferContentAndReset(&buf);
> > +}
> >
> >  virJSONValue *
> >  qemuBuildNicDevProps(virDomainDef *def,
> > @@ -3871,6 +3888,11 @@ qemuBuildNicDevProps(virDomainDef *def,
> >                                    "T:failover", failover,
> >                                    NULL) < 0)
> >              return NULL;
> > +        if (net->driver.virtio.rss == VIR_TRISTATE_SWITCH_ON && virQEMUCapsGet(qemuCaps, QEMU_CAPS_VIRTIO_NET_EBPF_RSS_FDS)){
>
> This check should not be needed here ...
>
> > +            g_autofree char *ebpf = qemuBuildEbpfRssArg(net);
>
> Because this will return a NULL string if it was not set up.
>
> > +            if (virJSONValueObjectAdd(&props, "S:ebpf_rss_fds", ebpf, NULL) < 0)
>
> This is definitely not needed in a separate step and can be done in the
> block above as "S:" skipps the attribute if the argument is NULL.
>
> > +                return NULL;
> > +        }
> >      } else {
> >          if (virJSONValueObjectAdd(&props,
> >                                    "s:driver", virDomainNetGetModelString(net),
> > @@ -4152,6 +4174,33 @@ qemuBuildWatchdogDevProps(const virDomainDef *def,
> >  }
> >
> >
> > +static void qemuOpenEbpfRssFds(virDomainNetDef *net,
> > +                              virQEMUCaps *qemuCaps) {
> > +    const void *ebpfRSSObject = NULL;
> > +    size_t ebpfRSSObjectSize = 0;
> > +    int fds[16];
>
> 16 feels arbitrary

Yes, it should be big enough. QEMU in the future may have less/more
program&maps file descriptors.
I'm thinking about a routine that could allocate the exact fd array
during ebpf load.

>
> > +    int nfds = 0;
> > +    qemuDomainNetworkPrivate *netpriv = QEMU_DOMAIN_NETWORK_PRIVATE(net);
> > +
> > +    netpriv->libbpfRSSObject = NULL;
> > +    netpriv->ebpfrssfds = NULL;
> > +
> > +    /* Add ebpf values */
> > +    if (net->driver.virtio.rss == VIR_TRISTATE_SWITCH_ON && virQEMUCapsGet(qemuCaps, QEMU_CAPS_VIRTIO_NET_EBPF_RSS_FDS)) {
>
> This line is too long and there's a good spot to break it up;
>
> > +        ebpfRSSObject = virQEMUCapsGetEbpf(qemuCaps, "rss", &ebpfRSSObjectSize);
> > +        nfds = qemuInterfaceLoadEbpf(ebpfRSSObject, ebpfRSSObjectSize, &netpriv->libbpfRSSObject, fds, 16);
>
> Note that the '16' argument is unused by the callee.
>
> Also note that when running an unprivileged instance of libvirt, this
> function might not have permissions to do anything. It seems that the
> rest is okay, but we need to avoid any form of spurious errors and such
> if that is really okay and there are fallback paths.
>
> > +
> > +        if (nfds > 0) {
> > +            for (int i = 0; i < nfds; ++i) {
> > +                g_autofree char *name = g_strdup_printf("ebpfrssfd-%s-%u", net->info.alias, i);
> > +                netpriv->ebpfrssfds = g_slist_prepend(netpriv->ebpfrssfds, qemuFDPassDirectNew(name, fds + i));
> > +            }
> > +            netpriv->ebpfrssfds = g_slist_reverse(netpriv->ebpfrssfds);
> > +        }
> > +    }
> > +}
> > +
> > +
> >  static int
> >  qemuBuildWatchdogCommandLine(virCommand *cmd,
> >                               const virDomainDef *def,
> > @@ -8735,6 +8784,10 @@ qemuBuildInterfaceCommandLine(virQEMUDriver *driver,
> >      qemuFDPassDirectTransferCommand(netpriv->slirpfd, cmd);
> >      qemuFDPassTransferCommand(netpriv->vdpafd, cmd);
> >
> > +    qemuOpenEbpfRssFds(net, qemuCaps);
>
> Note that this will cause problems when you'll be about to test this
> functionality in qemuxml2argvtest. The test code must not modify the
> host state, so this will either need to be mocked in the testsuite or
> you'll need to move it to the host-prepare function
> 'qemuProcessPrepareHost' which is skipped from the test suite. The test
> suite will then need to fake the data.

I think it will be mocked.

>
> > +    for (n = netpriv->ebpfrssfds; n; n = n->next)
> > +        qemuFDPassDirectTransferCommand(n->data, cmd);
> > +
> >      if (!(hostnetprops = qemuBuildHostNetProps(vm, net)))
> >          goto cleanup;
> >
> > diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
> > index eec7bed28b..c696482f29 100644
> > --- a/src/qemu/qemu_domain.c
> > +++ b/src/qemu/qemu_domain.c
> > @@ -38,6 +38,7 @@
> >  #include "qemu_checkpoint.h"
> >  #include "qemu_validate.h"
> >  #include "qemu_namespace.h"
> > +#include "qemu_interface.h"
> >  #include "viralloc.h"
> >  #include "virlog.h"
> >  #include "virerror.h"
> > @@ -1078,6 +1079,7 @@ qemuDomainNetworkPrivateClearFDs(qemuDomainNetworkPrivate *priv)
> >      g_clear_pointer(&priv->vdpafd, qemuFDPassFree);
> >      g_slist_free_full(g_steal_pointer(&priv->vhostfds), (GDestroyNotify) qemuFDPassDirectFree);
> >      g_slist_free_full(g_steal_pointer(&priv->tapfds), (GDestroyNotify) qemuFDPassDirectFree);
> > +    g_slist_free_full(g_steal_pointer(&priv->ebpfrssfds), (GDestroyNotify) qemuFDPassDirectFree);
> >  }
> >
> >
> > @@ -1089,6 +1091,8 @@ qemuDomainNetworkPrivateDispose(void *obj G_GNUC_UNUSED)
> >      qemuSlirpFree(priv->slirp);
> >
> >      qemuDomainNetworkPrivateClearFDs(priv);
> > +
> > +    qemuInterfaceCloseEbpf(priv->libbpfRSSObject);
>
> Does libvirt need to keep this open during the whole lifetime of the VM?
> Also note that this code will be called when restarting the
> libvirtd/virtqemud daemon, thus it can be called while the VM is alive.
>

I believe we can close the libbpf context right after QEMU
launch(after fds are passed to the QEMU process).
The idea was if the private context holds libbpf resources - it will
destroy them with the private network context itself.





[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]

  Powered by Linux