On 05/27/2013 12:52 AM, Marek Marczykowski wrote: > On 16.05.2013 08:07, Chunyan Liu wrote: >> Write separate module for hostdev passthrough so that it could be used by all >> hypervisor drivers and maintain a global hostdev state. >> >> Signed-off-by: Chunyan Liu <cyliu@xxxxxxxx> > Some comments inline. Besides that seems to be working (with your next patch > for libxl) :) > >> --- >> po/POTFILES.in | 1 + >> src/Makefile.am | 1 + >> src/libvirt.c | 5 + >> src/libvirt_private.syms | 15 + >> src/util/virhostdevmanager.c | 1218 ++++++++++++++++++++++++++++++++++++++++++ >> src/util/virhostdevmanager.h | 91 ++++ >> src/util/virpci.c | 17 +- >> src/util/virpci.h | 7 +- >> src/util/virusb.c | 19 +- >> src/util/virusb.h | 4 +- >> 10 files changed, 1362 insertions(+), 16 deletions(-) >> create mode 100644 src/util/virhostdevmanager.c >> create mode 100644 src/util/virhostdevmanager.h >> >> diff --git a/po/POTFILES.in b/po/POTFILES.in >> index f3ea4da..a7c21bf 100644 >> --- a/po/POTFILES.in >> +++ b/po/POTFILES.in >> @@ -188,6 +188,7 @@ src/util/viruri.c >> src/util/virusb.c >> src/util/virutil.c >> src/util/virxml.c >> +src/util/virhostdevmanager.c >> src/vbox/vbox_MSCOMGlue.c >> src/vbox/vbox_XPCOMCGlue.c >> src/vbox/vbox_driver.c >> diff --git a/src/Makefile.am b/src/Makefile.am >> index 4312c3c..a197c2b 100644 >> --- a/src/Makefile.am >> +++ b/src/Makefile.am >> @@ -130,6 +130,7 @@ UTIL_SOURCES = \ >> util/virutil.c util/virutil.h \ >> util/viruuid.c util/viruuid.h \ >> util/virxml.c util/virxml.h \ >> + util/virhostdevmanager.c util/virhostdevmanager.h \ >> $(NULL) >> >> >> diff --git a/src/libvirt.c b/src/libvirt.c >> index 2b3515e..d9af5a6 100644 >> --- a/src/libvirt.c >> +++ b/src/libvirt.c >> @@ -65,6 +65,7 @@ >> #include "virthread.h" >> #include "virstring.h" >> #include "virutil.h" >> +#include "virhostdevmanager.h" >> >> #ifdef WITH_TEST >> # include "test/test_driver.h" >> @@ -827,6 +828,7 @@ int virStateInitialize(bool privileged, >> if (virInitialize() < 0) >> return -1; >> >> + virHostdevManagerInit(); >> for (i = 0 ; i < virStateDriverTabCount ; i++) { >> if (virStateDriverTab[i]->stateInitialize) { >> VIR_DEBUG("Running global init for %s state driver", >> @@ -858,6 +860,9 @@ int virStateCleanup(void) { >> virStateDriverTab[i]->stateCleanup() < 0) >> ret = -1; >> } >> + >> + virHostdevManagerCleanup(); >> + >> return ret; >> } >> >> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms >> index cdd0b41..824de4e 100644 >> --- a/src/libvirt_private.syms >> +++ b/src/libvirt_private.syms >> @@ -1991,6 +1991,21 @@ virXPathULong; >> virXPathULongHex; >> virXPathULongLong; >> >> +#util/virhostdevmanager.h >> +virHostdevManagerGetDefault; >> +virHostdevManagerInit; >> +virHostdevManagerCleanup; >> +virHostdevManagerPrepareHostdevs; >> +virHostdevManagerReAttachHostdevs; >> +virHostdevManagerPreparePciHostdevs; >> +virHostdevManagerPrepareUsbHostdevs; >> +virHostdevManagerReAttachPciHostdevs; >> +virHostdevManagerReAttachUsbHostdevs; >> +virGetActivePciHostdevs; >> +virGetActiveUsbHostdevs; >> +virGetDomainActivePciHostdevs; >> +virGetDomainActiveUsbHostdevs; >> +virFreeHostdevNameList; > "make check" reports wrong symbol order here. > >> >> # Let emacs know we want case-insensitive sorting >> # Local Variables: >> diff --git a/src/util/virhostdevmanager.c b/src/util/virhostdevmanager.c >> new file mode 100644 >> index 0000000..9034212 >> --- /dev/null >> +++ b/src/util/virhostdevmanager.c >> @@ -0,0 +1,1218 @@ >> +/* >> + * Copyright (C) 2013 SUSE LINUX Products GmbH, Nuernberg, Germany. >> + * >> + * This library is free software; you can redistribute it and/or >> + * modify it under the terms of the GNU Lesser General Public >> + * License as published by the Free Software Foundation; either >> + * version 2.1 of the License, or (at your option) any later version. >> + * >> + * This library is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> + * Lesser General Public License for more details. >> + * >> + * You should have received a copy of the GNU Lesser General Public >> + * License along with this library. If not, see >> + * <http://www.gnu.org/licenses/>. >> + * >> + * Author: Chunyan Liu <cyliu@xxxxxxxx> >> + */ >> +#include <config.h> >> + >> +#include "virhostdevmanager.h" >> + >> +#include <sys/types.h> >> +#include <sys/stat.h> >> +#include <unistd.h> >> +#include <stdlib.h> >> +#include <stdio.h> >> + >> +#include "viralloc.h" >> +#include "virstring.h" >> +#include "virfile.h" >> +#include "virerror.h" >> +#include "virlog.h" >> +#include "virpci.h" >> +#include "virusb.h" >> +#include "virnetdev.h" >> + >> +/* For virReportOOMError() and virReportSystemError() */ >> +#define VIR_FROM_THIS VIR_FROM_NONE >> + >> +struct _virHostdevManager{ >> + char *stateDir; >> + >> + virPCIDeviceListPtr activePciHostdevs; >> + virPCIDeviceListPtr inactivePciHostdevs; >> + virUSBDeviceListPtr activeUsbHostdevs; >> +}; >> + >> +static virHostdevManagerPtr hostdevMgr; >> + >> +int virHostdevManagerInit(void) >> +{ >> + char ebuf[1024]; >> + if (VIR_ALLOC(hostdevMgr) < 0) >> + return -1; >> + >> + if ((hostdevMgr->activePciHostdevs = virPCIDeviceListNew()) == NULL) >> + return -1; > goto out_of_memory ? > >> + >> + if ((hostdevMgr->activeUsbHostdevs = virUSBDeviceListNew()) == NULL) >> + return -1; > goto out_of_memory ? > >> + >> + if ((hostdevMgr->inactivePciHostdevs = virPCIDeviceListNew()) == NULL) >> + return -1; > goto out_of_memory ? > >> + >> + if (virAsprintf(&hostdevMgr->stateDir, >> + "%s",HOSTDEV_STATE_DIR) == -1) >> + goto out_of_memory; >> + >> + if (virFileMakePath(hostdevMgr->stateDir) < 0) { >> + VIR_ERROR(_("Failed to create state dir '%s': %s"), >> + hostdevMgr->stateDir, virStrerror(errno, ebuf, sizeof(ebuf))); >> + goto error; >> + } >> + >> + return 0; >> + >> +out_of_memory: >> + virReportOOMError(); >> +error: >> + return -1; >> +} >> + >> +void virHostdevManagerCleanup(void) >> +{ >> + if(!hostdevMgr) >> + return; >> + >> + virObjectUnref(hostdevMgr->activePciHostdevs); >> + virObjectUnref(hostdevMgr->inactivePciHostdevs); >> + virObjectUnref(hostdevMgr->activeUsbHostdevs); >> + >> + VIR_FREE(hostdevMgr->stateDir); >> +} >> + >> +virHostdevManagerPtr virHostdevManagerGetDefault(void) >> +{ >> + return hostdevMgr; >> +} >> + >> +static int >> +virDomainHostdevPciSysfsPath(virDomainHostdevDefPtr hostdev, char **sysfs_path) >> +{ >> + virPCIDeviceAddress config_address; >> + >> + config_address.domain = hostdev->source.subsys.u.pci.addr.domain; >> + config_address.bus = hostdev->source.subsys.u.pci.addr.bus; >> + config_address.slot = hostdev->source.subsys.u.pci.addr.slot; >> + config_address.function = hostdev->source.subsys.u.pci.addr.function; >> + >> + return virPCIDeviceAddressGetSysfsFile(&config_address, sysfs_path); >> +} >> + >> +static int >> +virDomainHostdevIsVirtualFunction(virDomainHostdevDefPtr hostdev) >> +{ >> + char *sysfs_path = NULL; >> + int ret = -1; >> + >> + if (virDomainHostdevPciSysfsPath(hostdev, &sysfs_path) < 0) >> + return ret; >> + >> + ret = virPCIIsVirtualFunction(sysfs_path); >> + >> + VIR_FREE(sysfs_path); >> + >> + return ret; >> +} >> + >> +static int >> +virDomainHostdevNetDevice(virDomainHostdevDefPtr hostdev, char **linkdev, >> + int *vf) >> +{ >> + int ret = -1; >> + char *sysfs_path = NULL; >> + >> + if (virDomainHostdevPciSysfsPath(hostdev, &sysfs_path) < 0) >> + return ret; >> + >> + if (virPCIIsVirtualFunction(sysfs_path) == 1) { >> + if (virPCIGetVirtualFunctionInfo(sysfs_path, linkdev, >> + vf) < 0) >> + goto cleanup; >> + } else { >> + if (virPCIGetNetName(sysfs_path, linkdev) < 0) >> + goto cleanup; >> + *vf = -1; >> + } >> + >> + ret = 0; >> + >> +cleanup: >> + VIR_FREE(sysfs_path); >> + >> + return ret; >> +} >> + >> +static int >> +virDomainHostdevNetConfigVirtPortProfile(const char *linkdev, int vf, >> + virNetDevVPortProfilePtr virtPort, >> + const virMacAddrPtr macaddr, >> + const unsigned char *uuid, >> + int associate) >> +{ >> + int ret = -1; >> + >> + if (!virtPort) >> + return ret; >> + >> + switch (virtPort->virtPortType) { >> + case VIR_NETDEV_VPORT_PROFILE_NONE: >> + case VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH: >> + case VIR_NETDEV_VPORT_PROFILE_8021QBG: >> + case VIR_NETDEV_VPORT_PROFILE_LAST: >> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, >> + _("virtualport type %s is " >> + "currently not supported on interfaces of type " >> + "hostdev"), >> + virNetDevVPortTypeToString(virtPort->virtPortType)); >> + break; >> + >> + case VIR_NETDEV_VPORT_PROFILE_8021QBH: >> + if (associate) >> + ret = virNetDevVPortProfileAssociate(NULL, virtPort, macaddr, >> + linkdev, vf, uuid, >> + VIR_NETDEV_VPORT_PROFILE_OP_CREATE, false); >> + else >> + ret = virNetDevVPortProfileDisassociate(NULL, virtPort, >> + macaddr, linkdev, vf, >> + VIR_NETDEV_VPORT_PROFILE_OP_DESTROY); >> + break; >> + } >> + >> + return ret; >> +} >> + >> +static int >> +virDomainHostdevNetConfigReplace(virDomainHostdevDefPtr hostdev, >> + const unsigned char *uuid, >> + char *stateDir) >> +{ >> + char *linkdev = NULL; >> + virNetDevVlanPtr vlan; >> + virNetDevVPortProfilePtr virtPort; >> + int ret = -1; >> + int vf = -1; >> + int vlanid = -1; >> + int port_profile_associate = 1; >> + int isvf; >> + >> + isvf = virDomainHostdevIsVirtualFunction(hostdev); >> + if (isvf <= 0) { >> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", >> + _("Interface type hostdev is currently supported on" >> + " SR-IOV Virtual Functions only")); >> + return ret; >> + } >> + >> + if (virDomainHostdevNetDevice(hostdev, &linkdev, &vf) < 0) >> + return ret; >> + >> + vlan = virDomainNetGetActualVlan(hostdev->parent.data.net); >> + virtPort = virDomainNetGetActualVirtPortProfile( >> + hostdev->parent.data.net); >> + if (virtPort) { >> + if (vlan) { >> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, >> + _("direct setting of the vlan tag is not allowed " >> + "for hostdev devices using %s mode"), >> + virNetDevVPortTypeToString(virtPort->virtPortType)); >> + goto cleanup; >> + } >> + ret = virDomainHostdevNetConfigVirtPortProfile(linkdev, vf, >> + virtPort, &hostdev->parent.data.net->mac, uuid, >> + port_profile_associate); >> + } else { >> + /* Set only mac and vlan */ >> + if (vlan) { >> + if (vlan->nTags != 1 || vlan->trunk) { >> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", >> + _("vlan trunking is not supported " >> + "by SR-IOV network devices")); >> + goto cleanup; >> + } >> + if (vf == -1) { >> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, >> + _("vlan can only be set for SR-IOV VFs, but " >> + "%s is not a VF"), linkdev); >> + goto cleanup; >> + } >> + vlanid = vlan->tag[0]; >> + } else if (vf >= 0) { >> + vlanid = 0; /* assure any current vlan tag is reset */ >> + } >> + >> + ret = virNetDevReplaceNetConfig(linkdev, vf, >> + &hostdev->parent.data.net->mac, >> + vlanid, stateDir); >> + } >> +cleanup: >> + VIR_FREE(linkdev); >> + return ret; >> +} >> + >> +static int >> +virDomainHostdevNetConfigRestore(virDomainHostdevDefPtr hostdev, >> + char *stateDir) >> +{ >> + char *linkdev = NULL; >> + virNetDevVPortProfilePtr virtPort; >> + int ret = -1; >> + int vf = -1; >> + int port_profile_associate = 0; >> + int isvf; >> + >> + isvf = virDomainHostdevIsVirtualFunction(hostdev); >> + if (isvf <= 0) { >> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", >> + _("Interface type hostdev is currently supported on" >> + " SR-IOV Virtual Functions only")); >> + return ret; >> + } >> + >> + if (virDomainHostdevNetDevice(hostdev, &linkdev, &vf) < 0) >> + return ret; >> + >> + virtPort = virDomainNetGetActualVirtPortProfile( >> + hostdev->parent.data.net); >> + if (virtPort) >> + ret = virDomainHostdevNetConfigVirtPortProfile(linkdev, vf, virtPort, >> + &hostdev->parent.data.net->mac, NULL, >> + port_profile_associate); >> + else >> + ret = virNetDevRestoreNetConfig(linkdev, vf, stateDir); >> + >> + VIR_FREE(linkdev); >> + >> + return ret; >> +} >> + >> +static virPCIDeviceListPtr >> +virHostdevManagerGetPciHostDeviceList( >> + virDomainHostdevDefPtr *hostdevs, >> + int nhostdevs) >> +{ >> + virPCIDeviceListPtr list; >> + int i; >> + >> + if (!(list = virPCIDeviceListNew())) >> + return NULL; >> + >> + for (i = 0 ; i < nhostdevs ; i++) { >> + virDomainHostdevDefPtr hostdev = hostdevs[i]; >> + virPCIDevicePtr dev; >> + >> + if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS) >> + continue; >> + if (hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) >> + continue; >> + >> + dev = virPCIDeviceNew(hostdev->source.subsys.u.pci.addr.domain, >> + hostdev->source.subsys.u.pci.addr.bus, >> + hostdev->source.subsys.u.pci.addr.slot, >> + hostdev->source.subsys.u.pci.addr.function); >> + if (!dev) { >> + virObjectUnref(list); >> + return NULL; >> + } >> + >> + if (virPCIDeviceListAdd(list, dev) < 0) { >> + virPCIDeviceFree(dev); >> + virObjectUnref(list); >> + return NULL; >> + } >> + >> + virPCIDeviceSetManaged(dev, hostdev->managed); >> + } >> + >> + return list; >> +} >> + >> +int >> +virHostdevManagerPreparePciHostdevs(virHostdevManagerPtr mgr, >> + const char *drv_name, >> + const char *dom_name, >> + const unsigned char *uuid, >> + virDomainHostdevDefPtr *hostdevs, >> + int nhostdevs, >> + unsigned int flags) >> +{ >> + virPCIDeviceListPtr pcidevs; >> + int last_processed_hostdev_vf = -1; >> + int i; >> + int ret = -1; >> + >> + virObjectLock(mgr->activePciHostdevs); >> + virObjectLock(mgr->inactivePciHostdevs); >> + >> + if (!(pcidevs = virHostdevManagerGetPciHostDeviceList(hostdevs, nhostdevs))) >> + goto cleanup; >> + >> + /* We have to use 9 loops here. *All* devices must >> + * be detached before we reset any of them, because >> + * in some cases you have to reset the whole PCI, >> + * which impacts all devices on it. Also, all devices >> + * must be reset before being marked as active. >> + */ >> + >> + /* Loop 1: validate that non-managed device isn't in use, eg >> + * by checking that device is either un-bound, or bound >> + * to pci-stub.ko >> + */ >> + >> + for (i = 0; i < virPCIDeviceListCount(pcidevs); i++) { >> + virPCIDevicePtr dev = virPCIDeviceListGet(pcidevs, i); >> + virPCIDevicePtr other; >> + bool strict_acs_check = (flags & VIR_STRICT_ACS_CHECK)?true:false; >> + >> + if (!virPCIDeviceIsAssignable(dev, strict_acs_check)) { >> + virReportError(VIR_ERR_OPERATION_INVALID, >> + _("PCI device %s is not assignable"), >> + virPCIDeviceGetName(dev)); >> + goto cleanup; >> + } >> + /* The device is in use by other active domain if >> + * the dev is in list activePciHostdevs. >> + */ >> + if ((other = virPCIDeviceListFind(mgr->activePciHostdevs, dev))) { >> + char *other_drvname = NULL; >> + char *other_domname = NULL; >> + virPCIDeviceGetUsedBy(other, &other_drvname, &other_domname); >> + >> + if (other_drvname && other_domname) >> + virReportError(VIR_ERR_OPERATION_INVALID, >> + _("PCI device %s is in use by driver %s,domain %s"), >> + virPCIDeviceGetName(dev), other_drvname, >> + other_domname); >> + else >> + virReportError(VIR_ERR_OPERATION_INVALID, >> + _("PCI device %s is already in use"), >> + virPCIDeviceGetName(dev)); >> + VIR_FREE(other_drvname); >> + VIR_FREE(other_domname); >> + goto cleanup; >> + } >> + } >> + >> + /* Loop 2: detach managed devices */ >> + for (i = 0; i < virPCIDeviceListCount(pcidevs); i++) { >> + virPCIDevicePtr dev = virPCIDeviceListGet(pcidevs, i); >> + const char *stub_driver; >> + if (STREQ_NULLABLE(drv_name, "xenlight")) >> + stub_driver = "pciback"; >> + else >> + stub_driver = "pci_stub"; > Really need hardcode driver name here? Actually, no, it's not needed, and the code this is replacing has changed, so this all needs a rebase. The way that it works now (with the code that's in qemu) is that qemuGetPciHostDeviceList() (which builds a list of virPCIDevice from a list of virDomainHostDevDef) checks for a <driver name='xxx'/> setting, and if driver name is set to "vfio", it calls virPCIDeviceSetStubDriver(dev, "vfio-pci"), otherwise it calls virPCIDeviceSetStubDriver(dev, "pci-stub"). I guess your new virHostdevManagerGetPciHostDeviceList() needs to be made aware of which hypervisor is being used, and set the stub driver accordingly. Everything else in the device detach/reattach should be made to either use the stub driver name stored in the virPCIDevice, or to auto-detect it (virPCIDeviceUnbindFromStub() shows how to do that). > BTW "xen" driver missing here. > Perhaps this can be passes as parameter (with default to "pci_stub" if NULL)? > Or set by driver before calling this function (virPCIDeviceSetSubDriver?). Right, that function is newer than the code this patch is based from. [...] >> + >> +/* >> + * Pre-condition: mgr->inactivePciHostdevs & mgr->activePciHostdevs >> + * are locked >> + */ >> +static void >> +virReattachPciDevice(virHostdevManagerPtr mgr, virPCIDevicePtr dev, const char *driver) >> +{ >> + /* If the device is not managed and was attached to guest >> + * successfully, it must have been inactive. >> + */ >> + if (!virPCIDeviceGetManaged(dev)) { >> + if (virPCIDeviceListAdd(mgr->inactivePciHostdevs, dev) < 0) >> + virPCIDeviceFree(dev); >> + return; >> + } >> + >> + if (STREQ_NULLABLE(driver, "QEMU")) { > Same as earlier, perhaps the better approach will be checking for current stub > driver here? Hmm, yes. I wonder what this does for vfio. Is this needed for something in the pci-stub driver? Or is it common to any device assignment in qemu? (Fortunately it's harmless (although a bit wasteful of CPU time) when the device hasn't been used by qemu/kvm. >> + int retries = 100; >> + >> + while (virPCIDeviceWaitForCleanup(dev, "kvm_assigned_device") >> + && retries) { >> + usleep(100*1000); >> + retries--; >> + } >> + } >> + >> + if (virPCIDeviceReattach(dev, mgr->activePciHostdevs, >> + mgr->inactivePciHostdevs) < 0) { >> + virErrorPtr err = virGetLastError(); >> + VIR_ERROR(_("Failed to re-attach PCI device: %s"), >> + err ? err->message : _("unknown error")); >> + virResetError(err); >> + } >> + virPCIDeviceFree(dev); >> +} >> + >> +/* >> Generally, please check for recent changes to the functions you're basing this on. We don't want any of that to mysteriously disappear when we switch from the qemu-specific functions to these general functions. -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list