Hi all, On Mon, Oct 9, 2023 at 1:54 PM Peter Krempa <pkrempa@xxxxxxxxxx> wrote: > > On Mon, Oct 09, 2023 at 09:16:14 +0300, Andrew Melnychenko wrote: > > Added logic for loading the "RSS" eBPF program. > > eBPF file descriptors passed to the QEMU. > > > > Signed-off-by: Andrew Melnychenko <andrew@xxxxxxxxxx> > > --- > > src/qemu/qemu_command.c | 53 +++++++++++++++++++++++++++++++++++++++++ > > src/qemu/qemu_domain.c | 4 ++++ > > src/qemu/qemu_domain.h | 3 +++ > > 3 files changed, 60 insertions(+) > > This will require addition to qemuxml2argvtest once the qemu bit is > ready. > > > > diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c > > index 8a7b80719f..ca6cb1bc7a 100644 > > --- a/src/qemu/qemu_command.c > > +++ b/src/qemu/qemu_command.c > > @@ -3786,6 +3786,23 @@ qemuBuildLegacyNicStr(virDomainNetDef *net) > > NULLSTR_EMPTY(net->info.alias)); > > } > > > > +static char *qemuBuildEbpfRssArg(virDomainNetDef *net) { > > + qemuDomainNetworkPrivate *netpriv = QEMU_DOMAIN_NETWORK_PRIVATE(net); > > + g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER; > > + size_t nfds; > > + GSList *n; > > + > > + if (netpriv->ebpfrssfds) { > > This check is not needed. > > > + nfds = 0; > > + for (n = netpriv->ebpfrssfds; n; n = n->next) { > > + virBufferAsprintf(&buf, "%s:", qemuFDPassDirectGetPath(n->data)); > > + nfds++; > > 'nfds' is not used. > > > > + } > > + } > > + virBufferTrim(&buf, ":"); > > + > > + return virBufferContentAndReset(&buf); > > +} > > > > virJSONValue * > > qemuBuildNicDevProps(virDomainDef *def, > > @@ -3871,6 +3888,11 @@ qemuBuildNicDevProps(virDomainDef *def, > > "T:failover", failover, > > NULL) < 0) > > return NULL; > > + if (net->driver.virtio.rss == VIR_TRISTATE_SWITCH_ON && virQEMUCapsGet(qemuCaps, QEMU_CAPS_VIRTIO_NET_EBPF_RSS_FDS)){ > > This check should not be needed here ... > > > + g_autofree char *ebpf = qemuBuildEbpfRssArg(net); > > Because this will return a NULL string if it was not set up. > > > + if (virJSONValueObjectAdd(&props, "S:ebpf_rss_fds", ebpf, NULL) < 0) > > This is definitely not needed in a separate step and can be done in the > block above as "S:" skipps the attribute if the argument is NULL. > > > + return NULL; > > + } > > } else { > > if (virJSONValueObjectAdd(&props, > > "s:driver", virDomainNetGetModelString(net), > > @@ -4152,6 +4174,33 @@ qemuBuildWatchdogDevProps(const virDomainDef *def, > > } > > > > > > +static void qemuOpenEbpfRssFds(virDomainNetDef *net, > > + virQEMUCaps *qemuCaps) { > > + const void *ebpfRSSObject = NULL; > > + size_t ebpfRSSObjectSize = 0; > > + int fds[16]; > > 16 feels arbitrary Yes, it should be big enough. QEMU in the future may have less/more program&maps file descriptors. I'm thinking about a routine that could allocate the exact fd array during ebpf load. > > > + int nfds = 0; > > + qemuDomainNetworkPrivate *netpriv = QEMU_DOMAIN_NETWORK_PRIVATE(net); > > + > > + netpriv->libbpfRSSObject = NULL; > > + netpriv->ebpfrssfds = NULL; > > + > > + /* Add ebpf values */ > > + if (net->driver.virtio.rss == VIR_TRISTATE_SWITCH_ON && virQEMUCapsGet(qemuCaps, QEMU_CAPS_VIRTIO_NET_EBPF_RSS_FDS)) { > > This line is too long and there's a good spot to break it up; > > > + ebpfRSSObject = virQEMUCapsGetEbpf(qemuCaps, "rss", &ebpfRSSObjectSize); > > + nfds = qemuInterfaceLoadEbpf(ebpfRSSObject, ebpfRSSObjectSize, &netpriv->libbpfRSSObject, fds, 16); > > Note that the '16' argument is unused by the callee. > > Also note that when running an unprivileged instance of libvirt, this > function might not have permissions to do anything. It seems that the > rest is okay, but we need to avoid any form of spurious errors and such > if that is really okay and there are fallback paths. > > > + > > + if (nfds > 0) { > > + for (int i = 0; i < nfds; ++i) { > > + g_autofree char *name = g_strdup_printf("ebpfrssfd-%s-%u", net->info.alias, i); > > + netpriv->ebpfrssfds = g_slist_prepend(netpriv->ebpfrssfds, qemuFDPassDirectNew(name, fds + i)); > > + } > > + netpriv->ebpfrssfds = g_slist_reverse(netpriv->ebpfrssfds); > > + } > > + } > > +} > > + > > + > > static int > > qemuBuildWatchdogCommandLine(virCommand *cmd, > > const virDomainDef *def, > > @@ -8735,6 +8784,10 @@ qemuBuildInterfaceCommandLine(virQEMUDriver *driver, > > qemuFDPassDirectTransferCommand(netpriv->slirpfd, cmd); > > qemuFDPassTransferCommand(netpriv->vdpafd, cmd); > > > > + qemuOpenEbpfRssFds(net, qemuCaps); > > Note that this will cause problems when you'll be about to test this > functionality in qemuxml2argvtest. The test code must not modify the > host state, so this will either need to be mocked in the testsuite or > you'll need to move it to the host-prepare function > 'qemuProcessPrepareHost' which is skipped from the test suite. The test > suite will then need to fake the data. I think it will be mocked. > > > + for (n = netpriv->ebpfrssfds; n; n = n->next) > > + qemuFDPassDirectTransferCommand(n->data, cmd); > > + > > if (!(hostnetprops = qemuBuildHostNetProps(vm, net))) > > goto cleanup; > > > > diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c > > index eec7bed28b..c696482f29 100644 > > --- a/src/qemu/qemu_domain.c > > +++ b/src/qemu/qemu_domain.c > > @@ -38,6 +38,7 @@ > > #include "qemu_checkpoint.h" > > #include "qemu_validate.h" > > #include "qemu_namespace.h" > > +#include "qemu_interface.h" > > #include "viralloc.h" > > #include "virlog.h" > > #include "virerror.h" > > @@ -1078,6 +1079,7 @@ qemuDomainNetworkPrivateClearFDs(qemuDomainNetworkPrivate *priv) > > g_clear_pointer(&priv->vdpafd, qemuFDPassFree); > > g_slist_free_full(g_steal_pointer(&priv->vhostfds), (GDestroyNotify) qemuFDPassDirectFree); > > g_slist_free_full(g_steal_pointer(&priv->tapfds), (GDestroyNotify) qemuFDPassDirectFree); > > + g_slist_free_full(g_steal_pointer(&priv->ebpfrssfds), (GDestroyNotify) qemuFDPassDirectFree); > > } > > > > > > @@ -1089,6 +1091,8 @@ qemuDomainNetworkPrivateDispose(void *obj G_GNUC_UNUSED) > > qemuSlirpFree(priv->slirp); > > > > qemuDomainNetworkPrivateClearFDs(priv); > > + > > + qemuInterfaceCloseEbpf(priv->libbpfRSSObject); > > Does libvirt need to keep this open during the whole lifetime of the VM? > Also note that this code will be called when restarting the > libvirtd/virtqemud daemon, thus it can be called while the VM is alive. > I believe we can close the libbpf context right after QEMU launch(after fds are passed to the QEMU process). The idea was if the private context holds libbpf resources - it will destroy them with the private network context itself.