On 06.05.2013 22:16, Laine Stump wrote: > VFIO device assignment requires a cgroup ACL to be setup for access to > the /dev/vfio/nn "group" device for any devices that will be assigned > to a guest. In the case of a host device that is allocated from a > pool, it was being allocated during qemuBuildCommandLine(), which is > called by qemuProcessStart() *after* the all-encompassing > qemuSetupCgroup() is called, meaning that the standard Cgroup ACL > setup wasn't catching these devices allocated from pools. > > One possible solution was to manually add a single ACL down inside > qemuBuildCommandLine() when networkAllocateActualDevice() is called, > but that has two problems: 1) the function that adds the cgroup ACL > requires a virDomainObjPtr, which isn't available in > qemuBuildCommandLine(), and 2) we really shouldn't be doing network > device setup inside qemuBuildCommandLine() anyway. > > Instead, I've created a new function called > qemuNetworkPrepareDevices() which is called just before > qemuPrepareHostDevices() during qemuProcessStart() (explanation of > ordering in the comments), i.e. well before the call to > qemuSetupCgroup(). To minimize code churn in a patch that will be > backported to 1.0.5-maint, qemuNetworkPrepareDevices only does > networkAllocateActualDevice() and the bare amount of setup required > for type='hostdev network devices. > > Note that some of the code that was previously needed in > qemuBuildCommandLine() is no longer required when > networkAllocateActualDevice() is called earlier: > > * qemuAssignDeviceHostdevAlias() is already done further down in > qemuProcessStart(). > > * qemuPrepareHostdevPCIDevices() is called by > qemuPrepareHostDevices() which is called after > qemuNetworkPrepareDevices() in qemuProcessStart(). > > This new function should be moved into a separate qemu_network.c (or > similarly named) file along with qemuPhysIfaceConnect(), > qemuNetworkIfaceConnect(), and qemuOpenVhostNet(), and expanded to call > those functions as well, then the nnets loop in qemuBuildCommandLine() should > be reduced to only build the commandline string. However, this will > require storing away an array of tapfd and vhostfd that are needed > for the commandline, so I would rather do that in a separate patch and > leave this patch at the minimum to fix the bug. > --- > src/qemu/qemu_command.c | 104 ++++++++++++++++++++++++++---------------------- > src/qemu/qemu_command.h | 4 +- > src/qemu/qemu_process.c | 8 ++++ > 3 files changed, 67 insertions(+), 49 deletions(-) > > diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c > index 144620c..5ce97e8 100644 > --- a/src/qemu/qemu_command.c > +++ b/src/qemu/qemu_command.c > @@ -457,6 +457,58 @@ qemuOpenVhostNet(virDomainDefPtr def, > return 0; > } > > +int > +qemuNetworkPrepareDevices(virDomainDefPtr def) > +{ > + int ret = -1; > + int ii; > + > + for (ii = 0; ii < def->nnets; ii++) { > + virDomainNetDefPtr net = def->nets[ii]; > + int actualType; > + > + /* If appropriate, grab a physical device from the configured > + * network's pool of devices, or resolve bridge device name > + * to the one defined in the network definition. > + */ > + if (networkAllocateActualDevice(net) < 0) > + goto cleanup; > + > + actualType = virDomainNetGetActualType(net); > + if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV && > + net->type == VIR_DOMAIN_NET_TYPE_NETWORK) { > + /* Each type='hostdev' network device must also have a > + * corresponding entry in the hostdevs array. For netdevs > + * that are hardcoded as type='hostdev', this is already > + * done by the parser, but for those allocated from a > + * network / determined at runtime, we need to do it > + * separately. > + */ > + virDomainHostdevDefPtr hostdev = virDomainNetGetActualHostdev(net); > + > + if (virDomainHostdevFind(def, hostdev, NULL) >= 0) { > + virReportError(VIR_ERR_INTERNAL_ERROR, > + _("PCI device %04x:%02x:%02x.%x " > + "allocated from network %s is already " > + "in use by domain %s"), > + hostdev->source.subsys.u.pci.addr.domain, > + hostdev->source.subsys.u.pci.addr.bus, > + hostdev->source.subsys.u.pci.addr.slot, > + hostdev->source.subsys.u.pci.addr.function, > + net->data.network.name, > + def->name); > + goto cleanup; > + } > + if (virDomainHostdevInsert(def, hostdev) < 0) { > + virReportOOMError(); > + goto cleanup; > + } > + } > + } > + ret = 0; > +cleanup: > + return ret; > +} > > static int qemuDomainDeviceAliasIndex(virDomainDeviceInfoPtr info, > const char *prefix) > @@ -7106,12 +7158,15 @@ qemuBuildCommandLine(virConnectPtr conn, > char vhostfd_name[50] = ""; > int vlan; > int bootindex = bootNet; > - int actualType; > + int actualType = virDomainNetGetActualType(net); > > bootNet = 0; > if (!bootindex) > bootindex = net->info.bootIndex; > I'd expect a one line comment here at least to give a reason why we are skipping VIR_DOMAIN_NET_TYPE_HOSTDEV. Something like: /* hostdev interfaces already handled by qemuNetworkPrepareDevices */ It's obvious now, but if we get later to this code it won't be IMO. > + if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV) > + continue; > + > /* VLANs are not used with -netdev, so don't record them */ > if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_NETDEV) && > virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE)) I actually posted a patch which moves the whole block into a separate function, which is what we should do: https://www.redhat.com/archives/libvir-list/2013-April/msg01550.html > @@ -7119,53 +7174,6 @@ qemuBuildCommandLine(virConnectPtr conn, > else > vlan = i; > > - /* If appropriate, grab a physical device from the configured > - * network's pool of devices, or resolve bridge device name > - * to the one defined in the network definition. > - */ > - if (networkAllocateActualDevice(net) < 0) > - goto error; > - > - actualType = virDomainNetGetActualType(net); > - if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV) { > - if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) { > - virDomainHostdevDefPtr hostdev = virDomainNetGetActualHostdev(net); > - virDomainHostdevDefPtr found; > - /* For a network with <forward mode='hostdev'>, there is a need to > - * add the newly minted hostdev to the hostdevs array. > - */ > - if (qemuAssignDeviceHostdevAlias(def, hostdev, > - (def->nhostdevs-1)) < 0) { > - goto error; > - } > - > - if (virDomainHostdevFind(def, hostdev, &found) < 0) { > - if (virDomainHostdevInsert(def, hostdev) < 0) { > - virReportOOMError(); > - goto error; > - } > - if (qemuPrepareHostdevPCIDevices(driver, def->name, def->uuid, > - &hostdev, 1) < 0) { > - goto error; > - } > - } > - else { > - virReportError(VIR_ERR_INTERNAL_ERROR, > - _("PCI device %04x:%02x:%02x.%x " > - "allocated from network %s is already " > - "in use by domain %s"), > - hostdev->source.subsys.u.pci.addr.domain, > - hostdev->source.subsys.u.pci.addr.bus, > - hostdev->source.subsys.u.pci.addr.slot, > - hostdev->source.subsys.u.pci.addr.function, > - net->data.network.name, > - def->name); > - goto error; > - } > - } > - continue; > - } > - > if (actualType == VIR_DOMAIN_NET_TYPE_NETWORK || > actualType == VIR_DOMAIN_NET_TYPE_BRIDGE) { > int tapfd = qemuNetworkIfaceConnect(def, conn, driver, net, > diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h > index a706942..e690dee 100644 > --- a/src/qemu/qemu_command.h > +++ b/src/qemu/qemu_command.h > @@ -1,7 +1,7 @@ > /* > * qemu_command.h: QEMU command generation > * > - * Copyright (C) 2006-2012 Red Hat, Inc. > + * Copyright (C) 2006-2013 Red Hat, Inc. > * Copyright (C) 2006 Daniel P. Berrange > * > * This library is free software; you can redistribute it and/or > @@ -161,6 +161,8 @@ int qemuOpenVhostNet(virDomainDefPtr def, > virQEMUCapsPtr qemuCaps, > int *vhostfd); > > +int qemuNetworkPrepareDevices(virDomainDefPtr def); > + > /* > * NB: def->name can be NULL upon return and the caller > * *must* decide how to fill in a name in this case > diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c > index 3dd178c..d7b0aee 100644 > --- a/src/qemu/qemu_process.c > +++ b/src/qemu/qemu_process.c > @@ -3422,6 +3422,14 @@ int qemuProcessStart(virConnectPtr conn, > goto cleanup; > } > > + /* network devices must be "prepared" before hostdevs, because > + * setting up a network device might create a new hostdev that > + * will need to be setup. > + */ > + VIR_DEBUG("Preparing network devices"); Is it worth mentioning s/network/network host/? > + if (qemuNetworkPrepareDevices(vm->def) < 0) > + goto cleanup; > + > /* Must be run before security labelling */ > VIR_DEBUG("Preparing host devices"); > if (qemuPrepareHostDevices(driver, vm->def, !migrateFrom) < 0) > ACK if you add the comment. The debug message fix is optional. Michal -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list