On Fri, Jan 22, 2010 at 11:40:38AM -0500, Chris Lalancette wrote: > Certain hypervisors (like qemu/kvm) map the PCI bar(s) on > the host when doing device passthrough. This can lead to a race > condition where the hypervisor is still cleaning up the device while > libvirt is trying to re-attach it to the host device driver. To avoid > this situation, we look through /proc/iomem, and if the hypervisor is > still holding onto the bar (denoted by the string in the matcher variable), > then we can wait around a bit for that to clear up. > > v2: Thanks to review by DV, make sure we wait the full timeout per-device > > Signed-off-by: Chris Lalancette <clalance@xxxxxxxxxx> > --- > src/libvirt_private.syms | 1 + > src/qemu/qemu_driver.c | 19 ++++++--- > src/util/pci.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++ > src/util/pci.h | 1 + > 4 files changed, 112 insertions(+), 6 deletions(-) > > diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms > index c912d81..e42c090 100644 > --- a/src/libvirt_private.syms > +++ b/src/libvirt_private.syms > @@ -439,6 +439,7 @@ pciGetDevice; > pciFreeDevice; > pciDettachDevice; > pciReAttachDevice; > +pciWaitForDeviceCleanup; > pciResetDevice; > pciDeviceSetManaged; > pciDeviceGetManaged; > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c > index e14ed8f..55550ef 100644 > --- a/src/qemu/qemu_driver.c > +++ b/src/qemu/qemu_driver.c > @@ -2277,12 +2277,19 @@ qemuDomainReAttachHostDevices(virConnectPtr conn, > > for (i = 0; i < pciDeviceListCount(pcidevs); i++) { > pciDevice *dev = pciDeviceListGet(pcidevs, i); > - if (pciDeviceGetManaged(dev) && > - pciReAttachDevice(NULL, dev) < 0) { > - virErrorPtr err = virGetLastError(); > - VIR_ERROR(_("Failed to re-attach PCI device: %s"), > - err ? err->message : ""); > - virResetError(err); > + int retries = 100; > + if (pciDeviceGetManaged(dev)) { > + while (pciWaitForDeviceCleanup(dev, "kvm_assigned_device") > + && retries) { > + usleep(100*1000); > + retries--; > + } > + if (pciReAttachDevice(NULL, dev) < 0) { > + virErrorPtr err = virGetLastError(); > + VIR_ERROR(_("Failed to re-attach PCI device: %s"), > + err ? err->message : ""); > + virResetError(err); > + } > } > } > > diff --git a/src/util/pci.c b/src/util/pci.c > index 0defbfe..09535bd 100644 > --- a/src/util/pci.c > +++ b/src/util/pci.c > @@ -883,6 +883,103 @@ pciReAttachDevice(virConnectPtr conn, pciDevice *dev) > return pciUnBindDeviceFromStub(conn, dev, driver); > } > > +/* Certain hypervisors (like qemu/kvm) map the PCI bar(s) on > + * the host when doing device passthrough. This can lead to a race > + * condition where the hypervisor is still cleaning up the device while > + * libvirt is trying to re-attach it to the host device driver. To avoid > + * this situation, we look through /proc/iomem, and if the hypervisor is > + * still holding onto the bar (denoted by the string in the matcher variable), > + * then we can wait around a bit for that to clear up. > + * > + * A typical /proc/iomem looks like this (snipped for brevity): > + * 00010000-0008efff : System RAM > + * 0008f000-0008ffff : reserved > + * ... > + * 00100000-cc9fcfff : System RAM > + * 00200000-00483d3b : Kernel code > + * 00483d3c-005c88df : Kernel data > + * cc9fd000-ccc71fff : ACPI Non-volatile Storage > + * ... > + * d0200000-d02fffff : PCI Bus #05 > + * d0200000-d021ffff : 0000:05:00.0 > + * d0200000-d021ffff : e1000e > + * d0220000-d023ffff : 0000:05:00.0 > + * d0220000-d023ffff : e1000e > + * ... > + * f0000000-f0003fff : 0000:00:1b.0 > + * f0000000-f0003fff : kvm_assigned_device > + * > + * Returns 0 if we are clear to continue, and 1 if the hypervisor is still > + * holding onto the resource. > + */ > +int > +pciWaitForDeviceCleanup(pciDevice *dev, const char *matcher) > +{ > + FILE *fp; > + char line[160]; > + unsigned long long start, end; > + int consumed; > + char *rest; > + unsigned long long domain; > + int bus, slot, function; > + int in_matching_device; > + int ret; > + size_t match_depth; > + > + fp = fopen("/proc/iomem", "r"); > + if (!fp) { > + /* If we failed to open iomem, we just basically ignore the error. The > + * unbind might succeed anyway, and besides, it's very likely we have > + * no way to report the error > + */ > + VIR_DEBUG0("Failed to open /proc/iomem, trying to continue anyway"); > + return 0; > + } > + > + ret = 0; > + in_matching_device = 0; > + match_depth = 0; > + while (fgets(line, sizeof(line), fp) != 0) { > + /* the logic here is a bit confusing. For each line, we look to > + * see if it matches the domain:bus:slot.function we were given. > + * If this line matches the DBSF, then any subsequent lines indented > + * by 2 spaces are the PCI regions for this device. It's also > + * possible that none of the PCI regions are currently mapped, in > + * which case we have no indented regions. This code handles all > + * of these situations > + */ > + if (in_matching_device && (strspn(line, " ") == (match_depth + 2))) { > + if (sscanf(line, "%Lx-%Lx : %n", &start, &end, &consumed) != 2) > + continue; > + > + rest = line + consumed; > + if (STRPREFIX(rest, matcher)) { > + ret = 1; > + break; > + } > + } > + else { > + in_matching_device = 0; > + if (sscanf(line, "%Lx-%Lx : %n", &start, &end, &consumed) != 2) > + continue; > + > + rest = line + consumed; > + if (sscanf(rest, "%Lx:%x:%x.%x", &domain, &bus, &slot, &function) != 4) > + continue; > + > + if (domain != dev->domain || bus != dev->bus || slot != dev->slot || > + function != dev->function) > + continue; > + in_matching_device = 1; > + match_depth = strspn(line, " "); > + } > + } > + > + fclose(fp); > + > + return ret; > +} > + > static char * > pciReadDeviceID(pciDevice *dev, const char *id_name) > { > diff --git a/src/util/pci.h b/src/util/pci.h > index a1a19e7..e6ab137 100644 > --- a/src/util/pci.h > +++ b/src/util/pci.h > @@ -81,5 +81,6 @@ int pciDeviceFileIterate(virConnectPtr conn, > int pciDeviceIsAssignable(virConnectPtr conn, > pciDevice *dev, > int strict_acs_check); > +int pciWaitForDeviceCleanup(pciDevice *dev, const char *matcher); > > #endif /* __VIR_PCI_H__ */ ACK, Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@xxxxxxxxxxxx | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/ -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list