From: "Daniel P. Berrange" <berrange@xxxxxxxxxx> --- po/POTFILES.in | 2 +- src/Makefile.am | 2 +- src/conf/node_device_conf.c | 2 +- src/network/bridge_driver.c | 2 +- src/node_device/node_device_hal.c | 2 +- src/node_device/node_device_udev.c | 2 +- src/qemu/qemu_conf.h | 2 +- src/qemu/qemu_driver.c | 2 +- src/qemu/qemu_hostdev.c | 2 +- src/qemu/qemu_hotplug.c | 2 +- src/security/security_apparmor.c | 2 +- src/security/security_dac.c | 2 +- src/security/security_selinux.c | 2 +- src/security/virt-aa-helper.c | 2 +- src/util/pci.c | 2285 ------------------------------------ src/util/pci.h | 136 --- src/util/virnetdev.c | 2 +- src/util/virnetdev.h | 2 +- src/util/virpci.c | 2285 ++++++++++++++++++++++++++++++++++++ src/util/virpci.h | 136 +++ src/xen/xen_driver.c | 2 +- 21 files changed, 2438 insertions(+), 2438 deletions(-) delete mode 100644 src/util/pci.c delete mode 100644 src/util/pci.h create mode 100644 src/util/virpci.c create mode 100644 src/util/virpci.h diff --git a/po/POTFILES.in b/po/POTFILES.in index a9dfcf4..18db09b 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -138,7 +138,6 @@ src/test/test_driver.c src/uml/uml_conf.c src/uml/uml_driver.c src/util/iohelper.c -src/util/pci.c src/util/processinfo.c src/util/sexpr.c src/util/stats_linux.c @@ -171,6 +170,7 @@ src/util/virnetdevtap.c src/util/virnetdevvportprofile.c src/util/virnetlink.c src/util/virnodesuspend.c +src/util/virpci.c src/util/virpidfile.c src/util/virprocess.c src/util/virrandom.c diff --git a/src/Makefile.am b/src/Makefile.am index 64f117d..d9f621f 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -53,7 +53,6 @@ augeastest_DATA = # These files are not related to driver APIs. Simply generic # helper APIs for various purposes UTIL_SOURCES = \ - util/pci.c util/pci.h \ util/processinfo.c util/processinfo.h \ util/sexpr.c util/sexpr.h \ util/stats_linux.c util/stats_linux.h \ @@ -85,6 +84,7 @@ UTIL_SOURCES = \ util/virjson.c util/virjson.h \ util/virlog.c util/virlog.h \ util/virobject.c util/virobject.h \ + util/virpci.c util/virpci.h \ util/virpidfile.c util/virpidfile.h \ util/virprocess.c util/virprocess.h \ util/virtypedparam.c util/virtypedparam.h \ diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c index 85de50f..045f05d 100644 --- a/src/conf/node_device_conf.c +++ b/src/conf/node_device_conf.c @@ -36,7 +36,7 @@ #include "util.h" #include "virbuffer.h" #include "uuid.h" -#include "pci.h" +#include "virpci.h" #include "virrandom.h" #define VIR_FROM_THIS VIR_FROM_NODEDEV diff --git a/src/network/bridge_driver.c b/src/network/bridge_driver.c index 52b36a1..2db9197 100644 --- a/src/network/bridge_driver.c +++ b/src/network/bridge_driver.c @@ -61,7 +61,7 @@ #include "virdnsmasq.h" #include "configmake.h" #include "virnetdev.h" -#include "pci.h" +#include "virpci.h" #include "virnetdevbridge.h" #include "virnetdevtap.h" #include "virnetdevvportprofile.h" diff --git a/src/node_device/node_device_hal.c b/src/node_device/node_device_hal.c index 6f89f16..257a363 100644 --- a/src/node_device/node_device_hal.c +++ b/src/node_device/node_device_hal.c @@ -35,7 +35,7 @@ #include "datatypes.h" #include "viralloc.h" #include "uuid.h" -#include "pci.h" +#include "virpci.h" #include "virlog.h" #include "node_device_driver.h" #include "virdbus.h" diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c index 81a11d7..7289a72 100644 --- a/src/node_device/node_device_udev.c +++ b/src/node_device/node_device_udev.c @@ -37,7 +37,7 @@ #include "uuid.h" #include "util.h" #include "virbuffer.h" -#include "pci.h" +#include "virpci.h" #define VIR_FROM_THIS VIR_FROM_NODEDEV diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index 283251a..f928c29 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -35,7 +35,7 @@ # include "threads.h" # include "security/security_manager.h" # include "vircgroup.h" -# include "pci.h" +# include "virpci.h" # include "virusb.h" # include "cpu_conf.h" # include "driver.h" diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 65254b6..14a5e44 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -72,7 +72,7 @@ #include "domain_conf.h" #include "domain_audit.h" #include "node_device_conf.h" -#include "pci.h" +#include "virpci.h" #include "virusb.h" #include "processinfo.h" #include "libvirt_internal.h" diff --git a/src/qemu/qemu_hostdev.c b/src/qemu/qemu_hostdev.c index 754f8d0..5c23ccb 100644 --- a/src/qemu/qemu_hostdev.c +++ b/src/qemu/qemu_hostdev.c @@ -27,7 +27,7 @@ #include "virlog.h" #include "virterror_internal.h" #include "viralloc.h" -#include "pci.h" +#include "virpci.h" #include "virusb.h" #include "virnetdev.h" diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index c432a32..e120988 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -36,7 +36,7 @@ #include "datatypes.h" #include "virterror_internal.h" #include "viralloc.h" -#include "pci.h" +#include "virpci.h" #include "virfile.h" #include "qemu_cgroup.h" #include "locking/domain_lock.h" diff --git a/src/security/security_apparmor.c b/src/security/security_apparmor.c index b65384e..d28189f 100644 --- a/src/security/security_apparmor.c +++ b/src/security/security_apparmor.c @@ -43,7 +43,7 @@ #include "virterror_internal.h" #include "datatypes.h" #include "uuid.h" -#include "pci.h" +#include "virpci.h" #include "virusb.h" #include "virfile.h" #include "configmake.h" diff --git a/src/security/security_dac.c b/src/security/security_dac.c index bdb29c7..e4f016a 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c @@ -28,7 +28,7 @@ #include "util.h" #include "viralloc.h" #include "virlog.h" -#include "pci.h" +#include "virpci.h" #include "virusb.h" #include "storage_file.h" diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c index 3a1548d..2adf5c9 100644 --- a/src/security/security_selinux.c +++ b/src/security/security_selinux.c @@ -37,7 +37,7 @@ #include "util.h" #include "viralloc.h" #include "virlog.h" -#include "pci.h" +#include "virpci.h" #include "virusb.h" #include "storage_file.h" #include "virfile.h" diff --git a/src/security/virt-aa-helper.c b/src/security/virt-aa-helper.c index c9c222a..4945f7c 100644 --- a/src/security/virt-aa-helper.c +++ b/src/security/virt-aa-helper.c @@ -51,7 +51,7 @@ #include "xml.h" #include "uuid.h" #include "virusb.h" -#include "pci.h" +#include "virpci.h" #include "virfile.h" #include "configmake.h" #include "virrandom.h" diff --git a/src/util/pci.c b/src/util/pci.c deleted file mode 100644 index e4009fa..0000000 --- a/src/util/pci.c +++ /dev/null @@ -1,2285 +0,0 @@ -/* - * Copyright (C) 2009-2012 Red Hat, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see - * <http://www.gnu.org/licenses/>. - * - * Authors: - * Mark McLoughlin <markmc@xxxxxxxxxx> - */ - -#include <config.h> - -#include "pci.h" - -#include <dirent.h> -#include <fcntl.h> -#include <inttypes.h> -#include <limits.h> -#include <stdio.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> -#include <stdlib.h> - -#include "virlog.h" -#include "viralloc.h" -#include "vircommand.h" -#include "virterror_internal.h" -#include "virfile.h" - -#define PCI_SYSFS "/sys/bus/pci/" -#define PCI_ID_LEN 10 /* "XXXX XXXX" */ -#define PCI_ADDR_LEN 13 /* "XXXX:XX:XX.X" */ - -#define SRIOV_FOUND 0 -#define SRIOV_NOT_FOUND 1 -#define SRIOV_ERROR -1 - -struct _pciDevice { - unsigned domain; - unsigned bus; - unsigned slot; - unsigned function; - - char name[PCI_ADDR_LEN]; /* domain:bus:slot.function */ - char id[PCI_ID_LEN]; /* product vendor */ - char *path; - const char *used_by; /* The domain which uses the device */ - - unsigned pcie_cap_pos; - unsigned pci_pm_cap_pos; - unsigned has_flr : 1; - unsigned has_pm_reset : 1; - unsigned managed : 1; - - /* used by reattach function */ - unsigned unbind_from_stub : 1; - unsigned remove_slot : 1; - unsigned reprobe : 1; -}; - -struct _pciDeviceList { - unsigned count; - pciDevice **devs; -}; - - -/* For virReportOOMError() and virReportSystemError() */ -#define VIR_FROM_THIS VIR_FROM_NONE - -/* Specifications referenced in comments: - * PCI30 - PCI Local Bus Specification 3.0 - * PCIe20 - PCI Express Base Specification 2.0 - * BR12 - PCI-to-PCI Bridge Architecture Specification 1.2 - * PM12 - PCI Bus Power Management Interface Specification 1.2 - * ECN_AF - Advanced Capabilities for Conventional PCI ECN - */ - -/* Type 0 config space header length; PCI30 Section 6.1 Configuration Space Organization */ -#define PCI_CONF_LEN 0x100 -#define PCI_CONF_HEADER_LEN 0x40 - -/* PCI30 6.2.1 */ -#define PCI_HEADER_TYPE 0x0e /* Header type */ -#define PCI_HEADER_TYPE_BRIDGE 0x1 -#define PCI_HEADER_TYPE_MASK 0x7f -#define PCI_HEADER_TYPE_MULTI 0x80 - -/* PCI30 6.2.1 Device Identification */ -#define PCI_CLASS_DEVICE 0x0a /* Device class */ - -/* Class Code for bridge; PCI30 D.7 Base Class 06h */ -#define PCI_CLASS_BRIDGE_PCI 0x0604 - -/* PCI30 6.2.3 Device Status */ -#define PCI_STATUS 0x06 /* 16 bits */ -#define PCI_STATUS_CAP_LIST 0x10 /* Support Capability List */ - -/* PCI30 6.7 Capabilities List */ -#define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list entry */ - -/* PM12 3.2.1 Capability Identifier */ -#define PCI_CAP_ID_PM 0x01 /* Power Management */ -/* PCI30 H Capability IDs */ -#define PCI_CAP_ID_EXP 0x10 /* PCI Express */ -/* ECN_AF 6.x.1.1 Capability ID for AF */ -#define PCI_CAP_ID_AF 0x13 /* Advanced Features */ - -/* PCIe20 7.8.3 Device Capabilities Register (Offset 04h) */ -#define PCI_EXP_DEVCAP 0x4 /* Device capabilities */ -#define PCI_EXP_DEVCAP_FLR (1<<28) /* Function Level Reset */ - -/* Header type 1 BR12 3.2 PCI-to-PCI Bridge Configuration Space Header Format */ -#define PCI_PRIMARY_BUS 0x18 /* BR12 3.2.5.2 Primary bus number */ -#define PCI_SECONDARY_BUS 0x19 /* BR12 3.2.5.3 Secondary bus number */ -#define PCI_SUBORDINATE_BUS 0x1a /* BR12 3.2.5.4 Highest bus number behind the bridge */ -#define PCI_BRIDGE_CONTROL 0x3e -/* BR12 3.2.5.18 Bridge Control Register */ -#define PCI_BRIDGE_CTL_RESET 0x40 /* Secondary bus reset */ - -/* PM12 3.2.4 Power Management Control/Status (Offset = 4) */ -#define PCI_PM_CTRL 4 /* PM control and status register */ -#define PCI_PM_CTRL_STATE_MASK 0x3 /* Current power state (D0 to D3) */ -#define PCI_PM_CTRL_STATE_D0 0x0 /* D0 state */ -#define PCI_PM_CTRL_STATE_D3hot 0x3 /* D3 state */ -#define PCI_PM_CTRL_NO_SOFT_RESET 0x8 /* No reset for D3hot->D0 */ - -/* ECN_AF 6.x.1 Advanced Features Capability Structure */ -#define PCI_AF_CAP 0x3 /* Advanced features capabilities */ -#define PCI_AF_CAP_FLR 0x2 /* Function Level Reset */ - -#define PCI_EXP_FLAGS 0x2 -#define PCI_EXP_FLAGS_TYPE 0x00f0 -#define PCI_EXP_TYPE_DOWNSTREAM 0x6 - -#define PCI_EXT_CAP_BASE 0x100 -#define PCI_EXT_CAP_LIMIT 0x1000 -#define PCI_EXT_CAP_ID_MASK 0x0000ffff -#define PCI_EXT_CAP_OFFSET_SHIFT 20 -#define PCI_EXT_CAP_OFFSET_MASK 0x00000ffc - -#define PCI_EXT_CAP_ID_ACS 0x000d -#define PCI_EXT_ACS_CTRL 0x06 - -#define PCI_EXT_CAP_ACS_SV 0x01 -#define PCI_EXT_CAP_ACS_RR 0x04 -#define PCI_EXT_CAP_ACS_CR 0x08 -#define PCI_EXT_CAP_ACS_UF 0x10 -#define PCI_EXT_CAP_ACS_ENABLED (PCI_EXT_CAP_ACS_SV | \ - PCI_EXT_CAP_ACS_RR | \ - PCI_EXT_CAP_ACS_CR | \ - PCI_EXT_CAP_ACS_UF) - -static int -pciConfigOpen(pciDevice *dev, bool fatal) -{ - int fd; - - fd = open(dev->path, O_RDWR); - - if (fd < 0) { - if (fatal) { - virReportSystemError(errno, - _("Failed to open config space file '%s'"), - dev->path); - } else { - char ebuf[1024]; - VIR_WARN("Failed to open config space file '%s': %s", - dev->path, virStrerror(errno, ebuf, sizeof(ebuf))); - } - return -1; - } - - VIR_DEBUG("%s %s: opened %s", dev->id, dev->name, dev->path); - return fd; -} - -static void -pciConfigClose(pciDevice *dev, int cfgfd) -{ - if (VIR_CLOSE(cfgfd) < 0) { - char ebuf[1024]; - VIR_WARN("Failed to close config space file '%s': %s", - dev->path, virStrerror(errno, ebuf, sizeof(ebuf))); - } -} - - -static int -pciRead(pciDevice *dev, - int cfgfd, - unsigned pos, - uint8_t *buf, - unsigned buflen) -{ - memset(buf, 0, buflen); - - if (lseek(cfgfd, pos, SEEK_SET) != pos || - saferead(cfgfd, buf, buflen) != buflen) { - char ebuf[1024]; - VIR_WARN("Failed to read from '%s' : %s", dev->path, - virStrerror(errno, ebuf, sizeof(ebuf))); - return -1; - } - return 0; -} - -static uint8_t -pciRead8(pciDevice *dev, int cfgfd, unsigned pos) -{ - uint8_t buf; - pciRead(dev, cfgfd, pos, &buf, sizeof(buf)); - return buf; -} - -static uint16_t -pciRead16(pciDevice *dev, int cfgfd, unsigned pos) -{ - uint8_t buf[2]; - pciRead(dev, cfgfd, pos, &buf[0], sizeof(buf)); - return (buf[0] << 0) | (buf[1] << 8); -} - -static uint32_t -pciRead32(pciDevice *dev, int cfgfd, unsigned pos) -{ - uint8_t buf[4]; - pciRead(dev, cfgfd, pos, &buf[0], sizeof(buf)); - return (buf[0] << 0) | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24); -} - -static int -pciWrite(pciDevice *dev, - int cfgfd, - unsigned pos, - uint8_t *buf, - unsigned buflen) -{ - if (lseek(cfgfd, pos, SEEK_SET) != pos || - safewrite(cfgfd, buf, buflen) != buflen) { - char ebuf[1024]; - VIR_WARN("Failed to write to '%s' : %s", dev->path, - virStrerror(errno, ebuf, sizeof(ebuf))); - return -1; - } - return 0; -} - -static void -pciWrite16(pciDevice *dev, int cfgfd, unsigned pos, uint16_t val) -{ - uint8_t buf[2] = { (val >> 0), (val >> 8) }; - pciWrite(dev, cfgfd, pos, &buf[0], sizeof(buf)); -} - -static void -pciWrite32(pciDevice *dev, int cfgfd, unsigned pos, uint32_t val) -{ - uint8_t buf[4] = { (val >> 0), (val >> 8), (val >> 16), (val >> 24) }; - pciWrite(dev, cfgfd, pos, &buf[0], sizeof(buf)); -} - -typedef int (*pciIterPredicate)(pciDevice *, pciDevice *, void *); - -/* Iterate over available PCI devices calling @predicate - * to compare each one to @dev. - * Return -1 on error since we don't want to assume it is - * safe to reset if there is an error. - */ -static int -pciIterDevices(pciIterPredicate predicate, - pciDevice *dev, - pciDevice **matched, - void *data) -{ - DIR *dir; - struct dirent *entry; - int ret = 0; - int rc; - - *matched = NULL; - - VIR_DEBUG("%s %s: iterating over " PCI_SYSFS "devices", dev->id, dev->name); - - dir = opendir(PCI_SYSFS "devices"); - if (!dir) { - VIR_WARN("Failed to open " PCI_SYSFS "devices"); - return -1; - } - - while ((entry = readdir(dir))) { - unsigned int domain, bus, slot, function; - pciDevice *check; - char *tmp; - - /* Ignore '.' and '..' */ - if (entry->d_name[0] == '.') - continue; - - /* expected format: <domain>:<bus>:<slot>.<function> */ - if (/* domain */ - virStrToLong_ui(entry->d_name, &tmp, 16, &domain) < 0 || *tmp != ':' || - /* bus */ - virStrToLong_ui(tmp + 1, &tmp, 16, &bus) < 0 || *tmp != ':' || - /* slot */ - virStrToLong_ui(tmp + 1, &tmp, 16, &slot) < 0 || *tmp != '.' || - /* function */ - virStrToLong_ui(tmp + 1, NULL, 16, &function) < 0) { - VIR_WARN("Unusual entry in " PCI_SYSFS "devices: %s", entry->d_name); - continue; - } - - check = pciGetDevice(domain, bus, slot, function); - if (!check) { - ret = -1; - break; - } - - rc = predicate(dev, check, data); - if (rc < 0) { - /* the predicate returned an error, bail */ - pciFreeDevice(check); - ret = -1; - break; - } - else if (rc == 1) { - VIR_DEBUG("%s %s: iter matched on %s", dev->id, dev->name, check->name); - *matched = check; - ret = 1; - break; - } - - pciFreeDevice(check); - } - closedir(dir); - return ret; -} - -static uint8_t -pciFindCapabilityOffset(pciDevice *dev, int cfgfd, unsigned capability) -{ - uint16_t status; - uint8_t pos; - - status = pciRead16(dev, cfgfd, PCI_STATUS); - if (!(status & PCI_STATUS_CAP_LIST)) - return 0; - - pos = pciRead8(dev, cfgfd, PCI_CAPABILITY_LIST); - - /* Zero indicates last capability, capabilities can't - * be in the config space header and 0xff is returned - * by the kernel if we don't have access to this region - * - * Note: we're not handling loops or extended - * capabilities here. - */ - while (pos >= PCI_CONF_HEADER_LEN && pos != 0xff) { - uint8_t capid = pciRead8(dev, cfgfd, pos); - if (capid == capability) { - VIR_DEBUG("%s %s: found cap 0x%.2x at 0x%.2x", - dev->id, dev->name, capability, pos); - return pos; - } - - pos = pciRead8(dev, cfgfd, pos + 1); - } - - VIR_DEBUG("%s %s: failed to find cap 0x%.2x", dev->id, dev->name, capability); - - return 0; -} - -static unsigned int -pciFindExtendedCapabilityOffset(pciDevice *dev, - int cfgfd, - unsigned capability) -{ - int ttl; - unsigned int pos; - uint32_t header; - - /* minimum 8 bytes per capability */ - ttl = (PCI_EXT_CAP_LIMIT - PCI_EXT_CAP_BASE) / 8; - pos = PCI_EXT_CAP_BASE; - - while (ttl > 0 && pos >= PCI_EXT_CAP_BASE) { - header = pciRead32(dev, cfgfd, pos); - - if ((header & PCI_EXT_CAP_ID_MASK) == capability) - return pos; - - pos = (header >> PCI_EXT_CAP_OFFSET_SHIFT) & PCI_EXT_CAP_OFFSET_MASK; - ttl--; - } - - return 0; -} - -/* detects whether this device has FLR. Returns 0 if the device does - * not have FLR, 1 if it does, and -1 on error - */ -static int -pciDetectFunctionLevelReset(pciDevice *dev, int cfgfd) -{ - uint32_t caps; - uint8_t pos; - char *path; - int found; - - /* The PCIe Function Level Reset capability allows - * individual device functions to be reset without - * affecting any other functions on the device or - * any other devices on the bus. This is only common - * on SR-IOV NICs at the moment. - */ - if (dev->pcie_cap_pos) { - caps = pciRead32(dev, cfgfd, dev->pcie_cap_pos + PCI_EXP_DEVCAP); - if (caps & PCI_EXP_DEVCAP_FLR) { - VIR_DEBUG("%s %s: detected PCIe FLR capability", dev->id, dev->name); - return 1; - } - } - - /* The PCI AF Function Level Reset capability is - * the same thing, except for conventional PCI - * devices. This is not common yet. - */ - pos = pciFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_AF); - if (pos) { - caps = pciRead16(dev, cfgfd, pos + PCI_AF_CAP); - if (caps & PCI_AF_CAP_FLR) { - VIR_DEBUG("%s %s: detected PCI FLR capability", dev->id, dev->name); - return 1; - } - } - - /* there are some buggy devices that do support FLR, but forget to - * advertise that fact in their capabilities. However, FLR is *required* - * to be present for virtual functions (VFs), so if we see that this - * device is a VF, we just assume FLR works - */ - - if (virAsprintf(&path, PCI_SYSFS "devices/%s/physfn", dev->name) < 0) { - virReportOOMError(); - return -1; - } - - found = virFileExists(path); - VIR_FREE(path); - if (found) { - VIR_DEBUG("%s %s: buggy device didn't advertise FLR, but is a VF; forcing flr on", - dev->id, dev->name); - return 1; - } - - VIR_DEBUG("%s %s: no FLR capability found", dev->id, dev->name); - - return 0; -} - -/* Require the device has the PCI Power Management capability - * and that a D3hot->D0 transition will results in a full - * internal reset, not just a soft reset. - */ -static unsigned -pciDetectPowerManagementReset(pciDevice *dev, int cfgfd) -{ - if (dev->pci_pm_cap_pos) { - uint32_t ctl; - - /* require the NO_SOFT_RESET bit is clear */ - ctl = pciRead32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL); - if (!(ctl & PCI_PM_CTRL_NO_SOFT_RESET)) { - VIR_DEBUG("%s %s: detected PM reset capability", dev->id, dev->name); - return 1; - } - } - - VIR_DEBUG("%s %s: no PM reset capability found", dev->id, dev->name); - - return 0; -} - -/* Any active devices on the same domain/bus ? */ -static int -pciSharesBusWithActive(pciDevice *dev, pciDevice *check, void *data) -{ - pciDeviceList *inactiveDevs = data; - - /* Different domain, different bus, or simply identical device */ - if (dev->domain != check->domain || - dev->bus != check->bus || - (dev->slot == check->slot && - dev->function == check->function)) - return 0; - - /* same bus, but inactive, i.e. about to be assigned to guest */ - if (inactiveDevs && pciDeviceListFind(inactiveDevs, check)) - return 0; - - return 1; -} - -static pciDevice * -pciBusContainsActiveDevices(pciDevice *dev, - pciDeviceList *inactiveDevs) -{ - pciDevice *active = NULL; - if (pciIterDevices(pciSharesBusWithActive, - dev, &active, inactiveDevs) < 0) - return NULL; - return active; -} - -/* Is @check the parent of @dev ? */ -static int -pciIsParent(pciDevice *dev, pciDevice *check, void *data) -{ - uint16_t device_class; - uint8_t header_type, secondary, subordinate; - pciDevice **best = data; - int ret = 0; - int fd; - - if (dev->domain != check->domain) - return 0; - - if ((fd = pciConfigOpen(check, false)) < 0) - return 0; - - /* Is it a bridge? */ - device_class = pciRead16(check, fd, PCI_CLASS_DEVICE); - if (device_class != PCI_CLASS_BRIDGE_PCI) - goto cleanup; - - /* Is it a plane? */ - header_type = pciRead8(check, fd, PCI_HEADER_TYPE); - if ((header_type & PCI_HEADER_TYPE_MASK) != PCI_HEADER_TYPE_BRIDGE) - goto cleanup; - - secondary = pciRead8(check, fd, PCI_SECONDARY_BUS); - subordinate = pciRead8(check, fd, PCI_SUBORDINATE_BUS); - - VIR_DEBUG("%s %s: found parent device %s", dev->id, dev->name, check->name); - - /* if the secondary bus exactly equals the device's bus, then we found - * the direct parent. No further work is necessary - */ - if (dev->bus == secondary) { - ret = 1; - goto cleanup; - } - - /* otherwise, SRIOV allows VFs to be on different busses then their PFs. - * In this case, what we need to do is look for the "best" match; i.e. - * the most restrictive match that still satisfies all of the conditions. - */ - if (dev->bus > secondary && dev->bus <= subordinate) { - if (*best == NULL) { - *best = pciGetDevice(check->domain, check->bus, check->slot, - check->function); - if (*best == NULL) { - ret = -1; - goto cleanup; - } - } else { - /* OK, we had already recorded a previous "best" match for the - * parent. See if the current device is more restrictive than the - * best, and if so, make it the new best - */ - int bestfd; - uint8_t best_secondary; - - if ((bestfd = pciConfigOpen(*best, false)) < 0) - goto cleanup; - best_secondary = pciRead8(*best, bestfd, PCI_SECONDARY_BUS); - pciConfigClose(*best, bestfd); - - if (secondary > best_secondary) { - pciFreeDevice(*best); - *best = pciGetDevice(check->domain, check->bus, check->slot, - check->function); - if (*best == NULL) { - ret = -1; - goto cleanup; - } - } - } - } - -cleanup: - pciConfigClose(check, fd); - return ret; -} - -static int -pciGetParentDevice(pciDevice *dev, pciDevice **parent) -{ - pciDevice *best = NULL; - int ret; - - *parent = NULL; - ret = pciIterDevices(pciIsParent, dev, parent, &best); - if (ret == 1) - pciFreeDevice(best); - else if (ret == 0) - *parent = best; - return ret; -} - -/* Secondary Bus Reset is our sledgehammer - it resets all - * devices behind a bus. - */ -static int -pciTrySecondaryBusReset(pciDevice *dev, - int cfgfd, - pciDeviceList *inactiveDevs) -{ - pciDevice *parent, *conflict; - uint8_t config_space[PCI_CONF_LEN]; - uint16_t ctl; - int ret = -1; - int parentfd; - - /* Refuse to do a secondary bus reset if there are other - * devices/functions behind the bus are used by the host - * or other guests. - */ - if ((conflict = pciBusContainsActiveDevices(dev, inactiveDevs))) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Active %s devices on bus with %s, not doing bus reset"), - conflict->name, dev->name); - return -1; - } - - /* Find the parent bus */ - if (pciGetParentDevice(dev, &parent) < 0) - return -1; - if (!parent) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Failed to find parent device for %s"), - dev->name); - return -1; - } - if ((parentfd = pciConfigOpen(parent, true)) < 0) - goto out; - - VIR_DEBUG("%s %s: doing a secondary bus reset", dev->id, dev->name); - - /* Save and restore the device's config space; we only do this - * for the supplied device since we refuse to do a reset if there - * are multiple devices/functions - */ - if (pciRead(dev, cfgfd, 0, config_space, PCI_CONF_LEN) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Failed to read PCI config space for %s"), - dev->name); - goto out; - } - - /* Read the control register, set the reset flag, wait 200ms, - * unset the reset flag and wait 200ms. - */ - ctl = pciRead16(dev, cfgfd, PCI_BRIDGE_CONTROL); - - pciWrite16(parent, parentfd, PCI_BRIDGE_CONTROL, - ctl | PCI_BRIDGE_CTL_RESET); - - usleep(200 * 1000); /* sleep 200ms */ - - pciWrite16(parent, parentfd, PCI_BRIDGE_CONTROL, ctl); - - usleep(200 * 1000); /* sleep 200ms */ - - if (pciWrite(dev, cfgfd, 0, config_space, PCI_CONF_LEN) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Failed to restore PCI config space for %s"), - dev->name); - goto out; - } - ret = 0; - -out: - pciConfigClose(parent, parentfd); - pciFreeDevice(parent); - return ret; -} - -/* Power management reset attempts to reset a device using a - * D-state transition from D3hot to D0. Note, in detect_pm_reset() - * above we require the device supports a full internal reset. - */ -static int -pciTryPowerManagementReset(pciDevice *dev, int cfgfd) -{ - uint8_t config_space[PCI_CONF_LEN]; - uint32_t ctl; - - if (!dev->pci_pm_cap_pos) - return -1; - - /* Save and restore the device's config space. */ - if (pciRead(dev, cfgfd, 0, &config_space[0], PCI_CONF_LEN) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Failed to read PCI config space for %s"), - dev->name); - return -1; - } - - VIR_DEBUG("%s %s: doing a power management reset", dev->id, dev->name); - - ctl = pciRead32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL); - ctl &= ~PCI_PM_CTRL_STATE_MASK; - - pciWrite32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL, - ctl | PCI_PM_CTRL_STATE_D3hot); - - usleep(10 * 1000); /* sleep 10ms */ - - pciWrite32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL, - ctl | PCI_PM_CTRL_STATE_D0); - - usleep(10 * 1000); /* sleep 10ms */ - - if (pciWrite(dev, cfgfd, 0, &config_space[0], PCI_CONF_LEN) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Failed to restore PCI config space for %s"), - dev->name); - return -1; - } - - return 0; -} - -static int -pciInitDevice(pciDevice *dev, int cfgfd) -{ - int flr; - - dev->pcie_cap_pos = pciFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_EXP); - dev->pci_pm_cap_pos = pciFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_PM); - flr = pciDetectFunctionLevelReset(dev, cfgfd); - if (flr < 0) - return flr; - dev->has_flr = flr; - dev->has_pm_reset = pciDetectPowerManagementReset(dev, cfgfd); - - return 0; -} - -int -pciResetDevice(pciDevice *dev, - pciDeviceList *activeDevs, - pciDeviceList *inactiveDevs) -{ - int ret = -1; - int fd; - - if (activeDevs && pciDeviceListFind(activeDevs, dev)) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Not resetting active device %s"), dev->name); - return -1; - } - - if ((fd = pciConfigOpen(dev, true)) < 0) - return -1; - - if (pciInitDevice(dev, fd) < 0) - goto cleanup; - - /* KVM will perform FLR when starting and stopping - * a guest, so there is no need for us to do it here. - */ - if (dev->has_flr) { - ret = 0; - goto cleanup; - } - - /* If the device supports PCI power management reset, - * that's the next best thing because it only resets - * the function, not the whole device. - */ - if (dev->has_pm_reset) - ret = pciTryPowerManagementReset(dev, fd); - - /* Bus reset is not an option with the root bus */ - if (ret < 0 && dev->bus != 0) - ret = pciTrySecondaryBusReset(dev, fd, inactiveDevs); - - if (ret < 0) { - virErrorPtr err = virGetLastError(); - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Unable to reset PCI device %s: %s"), - dev->name, - err ? err->message : _("no FLR, PM reset or bus reset available")); - } - -cleanup: - pciConfigClose(dev, fd); - return ret; -} - - -static int -pciDriverDir(char **buffer, const char *driver) -{ - VIR_FREE(*buffer); - - if (virAsprintf(buffer, PCI_SYSFS "drivers/%s", driver) < 0) { - virReportOOMError(); - return -1; - } - - return 0; -} - -static int -pciDriverFile(char **buffer, const char *driver, const char *file) -{ - VIR_FREE(*buffer); - - if (virAsprintf(buffer, PCI_SYSFS "drivers/%s/%s", driver, file) < 0) { - virReportOOMError(); - return -1; - } - - return 0; -} - -static int -pciDeviceFile(char **buffer, const char *device, const char *file) -{ - VIR_FREE(*buffer); - - if (virAsprintf(buffer, PCI_SYSFS "devices/%s/%s", device, file) < 0) { - virReportOOMError(); - return -1; - } - - return 0; -} - - -static const char * -pciFindStubDriver(void) -{ - char *drvpath = NULL; - int probed = 0; - -recheck: - if (pciDriverDir(&drvpath, "pci-stub") < 0) { - return NULL; - } - - if (virFileExists(drvpath)) { - VIR_FREE(drvpath); - return "pci-stub"; - } - - if (pciDriverDir(&drvpath, "pciback") < 0) { - return NULL; - } - - if (virFileExists(drvpath)) { - VIR_FREE(drvpath); - return "pciback"; - } - - VIR_FREE(drvpath); - - if (!probed) { - const char *const stubprobe[] = { MODPROBE, "pci-stub", NULL }; - const char *const backprobe[] = { MODPROBE, "pciback", NULL }; - - probed = 1; - /* - * Probing for pci-stub will succeed regardless of whether - * on native or Xen kernels. - * On Xen though, we want to prefer pciback, so probe - * for that first, because that will only work on Xen - */ - if (virRun(backprobe, NULL) < 0 && - virRun(stubprobe, NULL) < 0) { - char ebuf[1024]; - VIR_WARN("failed to load pci-stub or pciback drivers: %s", - virStrerror(errno, ebuf, sizeof(ebuf))); - return NULL; - } - - goto recheck; - } - - return NULL; -} - -static int -pciUnbindDeviceFromStub(pciDevice *dev, const char *driver) -{ - int result = -1; - char *drvdir = NULL; - char *path = NULL; - - if (pciDriverDir(&drvdir, driver) < 0) - goto cleanup; - - if (!dev->unbind_from_stub) - goto remove_slot; - - /* If the device is bound to stub, unbind it. - */ - if (pciDeviceFile(&path, dev->name, "driver") < 0) - goto cleanup; - - if (virFileExists(drvdir) && virFileLinkPointsTo(path, drvdir)) { - if (pciDriverFile(&path, driver, "unbind") < 0) { - goto cleanup; - } - - if (virFileWriteStr(path, dev->name, 0) < 0) { - virReportSystemError(errno, - _("Failed to unbind PCI device '%s' from %s"), - dev->name, driver); - goto cleanup; - } - } - dev->unbind_from_stub = 0; - -remove_slot: - if (!dev->remove_slot) - goto reprobe; - - /* Xen's pciback.ko wants you to use remove_slot on the specific device */ - if (pciDriverFile(&path, driver, "remove_slot") < 0) { - goto cleanup; - } - - if (virFileExists(path) && virFileWriteStr(path, dev->name, 0) < 0) { - virReportSystemError(errno, - _("Failed to remove slot for PCI device '%s' from %s"), - dev->name, driver); - goto cleanup; - } - dev->remove_slot = 0; - -reprobe: - if (!dev->reprobe) { - result = 0; - goto cleanup; - } - - /* Trigger a re-probe of the device is not in the stub's dynamic - * ID table. If the stub is available, but 'remove_id' isn't - * available, then re-probing would just cause the device to be - * re-bound to the stub. - */ - if (pciDriverFile(&path, driver, "remove_id") < 0) { - goto cleanup; - } - - if (!virFileExists(drvdir) || virFileExists(path)) { - if (virFileWriteStr(PCI_SYSFS "drivers_probe", dev->name, 0) < 0) { - virReportSystemError(errno, - _("Failed to trigger a re-probe for PCI device '%s'"), - dev->name); - goto cleanup; - } - } - - result = 0; - -cleanup: - /* do not do it again */ - dev->unbind_from_stub = 0; - dev->remove_slot = 0; - dev->reprobe = 0; - - VIR_FREE(drvdir); - VIR_FREE(path); - - return result; -} - - -static int -pciBindDeviceToStub(pciDevice *dev, const char *driver) -{ - int result = -1; - char *drvdir = NULL; - char *path = NULL; - int reprobe = 0; - - /* check whether the device is already bound to a driver */ - if (pciDriverDir(&drvdir, driver) < 0 || - pciDeviceFile(&path, dev->name, "driver") < 0) { - goto cleanup; - } - - if (virFileExists(path)) { - if (virFileLinkPointsTo(path, drvdir)) { - /* The device is already bound to pci-stub */ - result = 0; - goto cleanup; - } - reprobe = 1; - } - - /* Add the PCI device ID to the stub's dynamic ID table; - * this is needed to allow us to bind the device to the stub. - * Note: if the device is not currently bound to any driver, - * stub will immediately be bound to the device. Also, note - * that if a new device with this ID is hotplugged, or if a probe - * is triggered for such a device, it will also be immediately - * bound by the stub. - */ - if (pciDriverFile(&path, driver, "new_id") < 0) { - goto cleanup; - } - - if (virFileWriteStr(path, dev->id, 0) < 0) { - virReportSystemError(errno, - _("Failed to add PCI device ID '%s' to %s"), - dev->id, driver); - goto cleanup; - } - - /* check whether the device is bound to pci-stub when we write dev->id to - * new_id. - */ - if (pciDriverDir(&drvdir, driver) < 0 || - pciDeviceFile(&path, dev->name, "driver") < 0) { - goto remove_id; - } - - if (virFileLinkPointsTo(path, drvdir)) { - dev->unbind_from_stub = 1; - dev->remove_slot = 1; - goto remove_id; - } - - /* If the device is already bound to a driver, unbind it. - * Note, this will have rather unpleasant side effects if this - * PCI device happens to be IDE controller for the disk hosting - * your root filesystem. - */ - if (pciDeviceFile(&path, dev->name, "driver/unbind") < 0) { - goto cleanup; - } - - if (virFileExists(path)) { - if (virFileWriteStr(path, dev->name, 0) < 0) { - virReportSystemError(errno, - _("Failed to unbind PCI device '%s'"), - dev->name); - goto cleanup; - } - dev->reprobe = reprobe; - } - - /* If the device isn't already bound to pci-stub, try binding it now. - */ - if (pciDriverDir(&drvdir, driver) < 0 || - pciDeviceFile(&path, dev->name, "driver") < 0) { - goto remove_id; - } - - if (!virFileLinkPointsTo(path, drvdir)) { - /* Xen's pciback.ko wants you to use new_slot first */ - if (pciDriverFile(&path, driver, "new_slot") < 0) { - goto remove_id; - } - - if (virFileExists(path) && virFileWriteStr(path, dev->name, 0) < 0) { - virReportSystemError(errno, - _("Failed to add slot for PCI device '%s' to %s"), - dev->name, driver); - goto remove_id; - } - dev->remove_slot = 1; - - if (pciDriverFile(&path, driver, "bind") < 0) { - goto remove_id; - } - - if (virFileWriteStr(path, dev->name, 0) < 0) { - virReportSystemError(errno, - _("Failed to bind PCI device '%s' to %s"), - dev->name, driver); - goto remove_id; - } - dev->unbind_from_stub = 1; - } - -remove_id: - /* If 'remove_id' exists, remove the device id from pci-stub's dynamic - * ID table so that 'drivers_probe' works below. - */ - if (pciDriverFile(&path, driver, "remove_id") < 0) { - /* We do not remove PCI ID from pci-stub, and we cannot reprobe it */ - if (dev->reprobe) { - VIR_WARN("Could not remove PCI ID '%s' from %s, and the device " - "cannot be probed again.", dev->id, driver); - } - dev->reprobe = 0; - goto cleanup; - } - - if (virFileExists(path) && virFileWriteStr(path, dev->id, 0) < 0) { - virReportSystemError(errno, - _("Failed to remove PCI ID '%s' from %s"), - dev->id, driver); - - /* remove PCI ID from pci-stub failed, and we cannot reprobe it */ - if (dev->reprobe) { - VIR_WARN("Failed to remove PCI ID '%s' from %s, and the device " - "cannot be probed again.", dev->id, driver); - } - dev->reprobe = 0; - goto cleanup; - } - - result = 0; - -cleanup: - VIR_FREE(drvdir); - VIR_FREE(path); - - if (result < 0) { - pciUnbindDeviceFromStub(dev, driver); - } - - return result; -} - -int -pciDettachDevice(pciDevice *dev, - pciDeviceList *activeDevs, - pciDeviceList *inactiveDevs) -{ - const char *driver = pciFindStubDriver(); - if (!driver) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("cannot find any PCI stub module")); - return -1; - } - - if (activeDevs && pciDeviceListFind(activeDevs, dev)) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Not detaching active device %s"), dev->name); - return -1; - } - - if (pciBindDeviceToStub(dev, driver) < 0) - return -1; - - /* Add the dev into list inactiveDevs */ - if (inactiveDevs && !pciDeviceListFind(inactiveDevs, dev)) { - if (pciDeviceListAdd(inactiveDevs, dev) < 0) - return -1; - } - - return 0; -} - -int -pciReAttachDevice(pciDevice *dev, - pciDeviceList *activeDevs, - pciDeviceList *inactiveDevs) -{ - const char *driver = pciFindStubDriver(); - if (!driver) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("cannot find any PCI stub module")); - return -1; - } - - if (activeDevs && pciDeviceListFind(activeDevs, dev)) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Not reattaching active device %s"), dev->name); - return -1; - } - - if (pciUnbindDeviceFromStub(dev, driver) < 0) - return -1; - - /* Steal the dev from list inactiveDevs */ - if (inactiveDevs) - pciDeviceListSteal(inactiveDevs, dev); - - return 0; -} - -/* Certain hypervisors (like qemu/kvm) map the PCI bar(s) on - * the host when doing device passthrough. This can lead to a race - * condition where the hypervisor is still cleaning up the device while - * libvirt is trying to re-attach it to the host device driver. To avoid - * this situation, we look through /proc/iomem, and if the hypervisor is - * still holding onto the bar (denoted by the string in the matcher variable), - * then we can wait around a bit for that to clear up. - * - * A typical /proc/iomem looks like this (snipped for brevity): - * 00010000-0008efff : System RAM - * 0008f000-0008ffff : reserved - * ... - * 00100000-cc9fcfff : System RAM - * 00200000-00483d3b : Kernel code - * 00483d3c-005c88df : Kernel data - * cc9fd000-ccc71fff : ACPI Non-volatile Storage - * ... - * d0200000-d02fffff : PCI Bus #05 - * d0200000-d021ffff : 0000:05:00.0 - * d0200000-d021ffff : e1000e - * d0220000-d023ffff : 0000:05:00.0 - * d0220000-d023ffff : e1000e - * ... - * f0000000-f0003fff : 0000:00:1b.0 - * f0000000-f0003fff : kvm_assigned_device - * - * Returns 0 if we are clear to continue, and 1 if the hypervisor is still - * holding onto the resource. - */ -int -pciWaitForDeviceCleanup(pciDevice *dev, const char *matcher) -{ - FILE *fp; - char line[160]; - char *tmp; - unsigned long long start, end; - unsigned int domain, bus, slot, function; - int in_matching_device; - int ret; - size_t match_depth; - - fp = fopen("/proc/iomem", "r"); - if (!fp) { - /* If we failed to open iomem, we just basically ignore the error. The - * unbind might succeed anyway, and besides, it's very likely we have - * no way to report the error - */ - VIR_DEBUG("Failed to open /proc/iomem, trying to continue anyway"); - return 0; - } - - ret = 0; - in_matching_device = 0; - match_depth = 0; - while (fgets(line, sizeof(line), fp) != 0) { - /* the logic here is a bit confusing. For each line, we look to - * see if it matches the domain:bus:slot.function we were given. - * If this line matches the DBSF, then any subsequent lines indented - * by 2 spaces are the PCI regions for this device. It's also - * possible that none of the PCI regions are currently mapped, in - * which case we have no indented regions. This code handles all - * of these situations - */ - if (in_matching_device && (strspn(line, " ") == (match_depth + 2))) { - /* expected format: <start>-<end> : <suffix> */ - if (/* start */ - virStrToLong_ull(line, &tmp, 16, &start) < 0 || *tmp != '-' || - /* end */ - virStrToLong_ull(tmp + 1, &tmp, 16, &end) < 0 || - (tmp = STRSKIP(tmp, " : ")) == NULL) - continue; - - if (STRPREFIX(tmp, matcher)) { - ret = 1; - break; - } - } - else { - in_matching_device = 0; - - /* expected format: <start>-<end> : <domain>:<bus>:<slot>.<function> */ - if (/* start */ - virStrToLong_ull(line, &tmp, 16, &start) < 0 || *tmp != '-' || - /* end */ - virStrToLong_ull(tmp + 1, &tmp, 16, &end) < 0 || - (tmp = STRSKIP(tmp, " : ")) == NULL || - /* domain */ - virStrToLong_ui(tmp, &tmp, 16, &domain) < 0 || *tmp != ':' || - /* bus */ - virStrToLong_ui(tmp + 1, &tmp, 16, &bus) < 0 || *tmp != ':' || - /* slot */ - virStrToLong_ui(tmp + 1, &tmp, 16, &slot) < 0 || *tmp != '.' || - /* function */ - virStrToLong_ui(tmp + 1, &tmp, 16, &function) < 0 || *tmp != '\n') - continue; - - if (domain != dev->domain || bus != dev->bus || slot != dev->slot || - function != dev->function) - continue; - in_matching_device = 1; - match_depth = strspn(line, " "); - } - } - - VIR_FORCE_FCLOSE(fp); - - return ret; -} - -static char * -pciReadDeviceID(pciDevice *dev, const char *id_name) -{ - char *path = NULL; - char *id_str; - - if (pciDeviceFile(&path, dev->name, id_name) < 0) { - return NULL; - } - - /* ID string is '0xNNNN\n' ... i.e. 7 bytes */ - if (virFileReadAll(path, 7, &id_str) < 0) { - VIR_FREE(path); - return NULL; - } - - VIR_FREE(path); - - /* Check for 0x suffix */ - if (id_str[0] != '0' || id_str[1] != 'x') { - VIR_FREE(id_str); - return NULL; - } - - /* Chop off the newline; we know the string is 7 bytes */ - id_str[6] = '\0'; - - return id_str; -} - -int -pciGetDeviceAddrString(unsigned domain, - unsigned bus, - unsigned slot, - unsigned function, - char **pciConfigAddr) -{ - pciDevice *dev = NULL; - int ret = -1; - - dev = pciGetDevice(domain, bus, slot, function); - if (dev != NULL) { - if ((*pciConfigAddr = strdup(dev->name)) == NULL) { - virReportOOMError(); - goto cleanup; - } - ret = 0; - } - -cleanup: - pciFreeDevice(dev); - return ret; -} - -pciDevice * -pciGetDevice(unsigned domain, - unsigned bus, - unsigned slot, - unsigned function) -{ - pciDevice *dev; - char *vendor = NULL; - char *product = NULL; - - if (VIR_ALLOC(dev) < 0) { - virReportOOMError(); - return NULL; - } - - dev->domain = domain; - dev->bus = bus; - dev->slot = slot; - dev->function = function; - - if (snprintf(dev->name, sizeof(dev->name), "%.4x:%.2x:%.2x.%.1x", - dev->domain, dev->bus, dev->slot, - dev->function) >= sizeof(dev->name)) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("dev->name buffer overflow: %.4x:%.2x:%.2x.%.1x"), - dev->domain, dev->bus, dev->slot, dev->function); - goto error; - } - if (virAsprintf(&dev->path, PCI_SYSFS "devices/%s/config", - dev->name) < 0) { - virReportOOMError(); - goto error; - } - - if (access(dev->path, F_OK) != 0) { - virReportSystemError(errno, - _("Device %s not found: could not access %s"), - dev->name, dev->path); - goto error; - } - - vendor = pciReadDeviceID(dev, "vendor"); - product = pciReadDeviceID(dev, "device"); - - if (!vendor || !product) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Failed to read product/vendor ID for %s"), - dev->name); - goto error; - } - - /* strings contain '0x' prefix */ - if (snprintf(dev->id, sizeof(dev->id), "%s %s", &vendor[2], - &product[2]) >= sizeof(dev->id)) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("dev->id buffer overflow: %s %s"), - &vendor[2], &product[2]); - goto error; - } - - VIR_DEBUG("%s %s: initialized", dev->id, dev->name); - -cleanup: - VIR_FREE(product); - VIR_FREE(vendor); - return dev; - -error: - pciFreeDevice(dev); - dev = NULL; - goto cleanup; -} - -void -pciFreeDevice(pciDevice *dev) -{ - if (!dev) - return; - VIR_DEBUG("%s %s: freeing", dev->id, dev->name); - VIR_FREE(dev->path); - VIR_FREE(dev); -} - -const char * -pciDeviceGetName(pciDevice *dev) -{ - return dev->name; -} - -void pciDeviceSetManaged(pciDevice *dev, unsigned managed) -{ - dev->managed = !!managed; -} - -unsigned pciDeviceGetManaged(pciDevice *dev) -{ - return dev->managed; -} - -unsigned -pciDeviceGetUnbindFromStub(pciDevice *dev) -{ - return dev->unbind_from_stub; -} - -void -pciDeviceSetUnbindFromStub(pciDevice *dev, unsigned unbind) -{ - dev->unbind_from_stub = !!unbind; -} - -unsigned -pciDeviceGetRemoveSlot(pciDevice *dev) -{ - return dev->remove_slot; -} - -void -pciDeviceSetRemoveSlot(pciDevice *dev, unsigned remove_slot) -{ - dev->remove_slot = !!remove_slot; -} - -unsigned -pciDeviceGetReprobe(pciDevice *dev) -{ - return dev->reprobe; -} - -void -pciDeviceSetReprobe(pciDevice *dev, unsigned reprobe) -{ - dev->reprobe = !!reprobe; -} - -void -pciDeviceSetUsedBy(pciDevice *dev, const char *name) -{ - dev->used_by = name; -} - -const char * -pciDeviceGetUsedBy(pciDevice *dev) -{ - return dev->used_by; -} - -void pciDeviceReAttachInit(pciDevice *pci) -{ - pci->unbind_from_stub = 1; - pci->remove_slot = 1; - pci->reprobe = 1; -} - - -pciDeviceList * -pciDeviceListNew(void) -{ - pciDeviceList *list; - - if (VIR_ALLOC(list) < 0) { - virReportOOMError(); - return NULL; - } - - return list; -} - -void -pciDeviceListFree(pciDeviceList *list) -{ - int i; - - if (!list) - return; - - for (i = 0; i < list->count; i++) { - pciFreeDevice(list->devs[i]); - list->devs[i] = NULL; - } - - list->count = 0; - VIR_FREE(list->devs); - VIR_FREE(list); -} - -int -pciDeviceListAdd(pciDeviceList *list, - pciDevice *dev) -{ - if (pciDeviceListFind(list, dev)) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Device %s is already in use"), dev->name); - return -1; - } - - if (VIR_REALLOC_N(list->devs, list->count+1) < 0) { - virReportOOMError(); - return -1; - } - - list->devs[list->count++] = dev; - - return 0; -} - -pciDevice * -pciDeviceListGet(pciDeviceList *list, - int idx) -{ - if (idx >= list->count) - return NULL; - if (idx < 0) - return NULL; - - return list->devs[idx]; -} - -int -pciDeviceListCount(pciDeviceList *list) -{ - return list->count; -} - -pciDevice * -pciDeviceListStealIndex(pciDeviceList *list, - int idx) -{ - pciDevice *ret; - - if (idx < 0 || idx >= list->count) - return NULL; - - ret = list->devs[idx]; - - if (idx != --list->count) { - memmove(&list->devs[idx], - &list->devs[idx + 1], - sizeof(*list->devs) * (list->count - idx)); - } - - if (VIR_REALLOC_N(list->devs, list->count) < 0) { - ; /* not fatal */ - } - - return ret; -} - -pciDevice * -pciDeviceListSteal(pciDeviceList *list, - pciDevice *dev) -{ - return pciDeviceListStealIndex(list, pciDeviceListFindIndex(list, dev)); -} - -void -pciDeviceListDel(pciDeviceList *list, - pciDevice *dev) -{ - pciDevice *ret = pciDeviceListSteal(list, dev); - if (ret) - pciFreeDevice(ret); -} - -int -pciDeviceListFindIndex(pciDeviceList *list, pciDevice *dev) -{ - int i; - - for (i = 0; i < list->count; i++) - if (list->devs[i]->domain == dev->domain && - list->devs[i]->bus == dev->bus && - list->devs[i]->slot == dev->slot && - list->devs[i]->function == dev->function) - return i; - return -1; -} - -pciDevice * -pciDeviceListFind(pciDeviceList *list, pciDevice *dev) -{ - int i; - - if ((i = pciDeviceListFindIndex(list, dev)) >= 0) - return list->devs[i]; - else - return NULL; -} - - -int pciDeviceFileIterate(pciDevice *dev, - pciDeviceFileActor actor, - void *opaque) -{ - char *pcidir = NULL; - char *file = NULL; - DIR *dir = NULL; - int ret = -1; - struct dirent *ent; - - if (virAsprintf(&pcidir, "/sys/bus/pci/devices/%04x:%02x:%02x.%x", - dev->domain, dev->bus, dev->slot, dev->function) < 0) { - virReportOOMError(); - goto cleanup; - } - - if (!(dir = opendir(pcidir))) { - virReportSystemError(errno, - _("cannot open %s"), pcidir); - goto cleanup; - } - - while ((ent = readdir(dir)) != NULL) { - /* Device assignment requires: - * $PCIDIR/config, $PCIDIR/resource, $PCIDIR/resourceNNN, - * $PCIDIR/rom, $PCIDIR/reset - */ - if (STREQ(ent->d_name, "config") || - STRPREFIX(ent->d_name, "resource") || - STREQ(ent->d_name, "rom") || - STREQ(ent->d_name, "reset")) { - if (virAsprintf(&file, "%s/%s", pcidir, ent->d_name) < 0) { - virReportOOMError(); - goto cleanup; - } - if ((actor)(dev, file, opaque) < 0) - goto cleanup; - - VIR_FREE(file); - } - } - - ret = 0; - -cleanup: - if (dir) - closedir(dir); - VIR_FREE(file); - VIR_FREE(pcidir); - return ret; -} - -static int -pciDeviceDownstreamLacksACS(pciDevice *dev) -{ - uint16_t flags; - uint16_t ctrl; - unsigned int pos; - int fd; - int ret = 0; - - if ((fd = pciConfigOpen(dev, true)) < 0) - return -1; - - if (pciInitDevice(dev, fd) < 0) { - ret = -1; - goto cleanup; - } - - pos = dev->pcie_cap_pos; - if (!pos || pciRead16(dev, fd, PCI_CLASS_DEVICE) != PCI_CLASS_BRIDGE_PCI) - goto cleanup; - - flags = pciRead16(dev, fd, pos + PCI_EXP_FLAGS); - if (((flags & PCI_EXP_FLAGS_TYPE) >> 4) != PCI_EXP_TYPE_DOWNSTREAM) - goto cleanup; - - pos = pciFindExtendedCapabilityOffset(dev, fd, PCI_EXT_CAP_ID_ACS); - if (!pos) { - VIR_DEBUG("%s %s: downstream port lacks ACS", dev->id, dev->name); - ret = 1; - goto cleanup; - } - - ctrl = pciRead16(dev, fd, pos + PCI_EXT_ACS_CTRL); - if ((ctrl & PCI_EXT_CAP_ACS_ENABLED) != PCI_EXT_CAP_ACS_ENABLED) { - VIR_DEBUG("%s %s: downstream port has ACS disabled", - dev->id, dev->name); - ret = 1; - goto cleanup; - } - -cleanup: - pciConfigClose(dev, fd); - return ret; -} - -static int -pciDeviceIsBehindSwitchLackingACS(pciDevice *dev) -{ - pciDevice *parent; - - if (pciGetParentDevice(dev, &parent) < 0) - return -1; - if (!parent) { - /* if we have no parent, and this is the root bus, ACS doesn't come - * into play since devices on the root bus can't P2P without going - * through the root IOMMU. - */ - if (dev->bus == 0) - return 0; - else { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Failed to find parent device for %s"), - dev->name); - return -1; - } - } - - /* XXX we should rather fail when we can't find device's parent and - * stop the loop when we get to root instead of just stopping when no - * parent can be found - */ - do { - pciDevice *tmp; - int acs; - int ret; - - acs = pciDeviceDownstreamLacksACS(parent); - - if (acs) { - pciFreeDevice(parent); - if (acs < 0) - return -1; - else - return 1; - } - - tmp = parent; - ret = pciGetParentDevice(parent, &parent); - pciFreeDevice(tmp); - if (ret < 0) - return -1; - } while (parent); - - return 0; -} - -int pciDeviceIsAssignable(pciDevice *dev, - int strict_acs_check) -{ - int ret; - - /* XXX This could be a great place to actually check that a non-managed - * device isn't in use, e.g. by checking that device is either un-bound - * or bound to a stub driver. - */ - - ret = pciDeviceIsBehindSwitchLackingACS(dev); - if (ret < 0) - return 0; - - if (ret) { - if (!strict_acs_check) { - VIR_DEBUG("%s %s: strict ACS check disabled; device assignment allowed", - dev->id, dev->name); - } else { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Device %s is behind a switch lacking ACS and " - "cannot be assigned"), - dev->name); - return 0; - } - } - - return 1; -} - -#ifdef __linux__ - -/* - * returns true if equal - */ -static bool -pciConfigAddressEqual(struct pci_config_address *bdf1, - struct pci_config_address *bdf2) -{ - return ((bdf1->domain == bdf2->domain) && - (bdf1->bus == bdf2->bus) && - (bdf1->slot == bdf2->slot) && - (bdf1->function == bdf2->function)); -} - -static int -logStrToLong_ui(char const *s, - char **end_ptr, - int base, - unsigned int *result) -{ - int ret = 0; - - ret = virStrToLong_ui(s, end_ptr, base, result); - if (ret != 0) { - VIR_ERROR(_("Failed to convert '%s' to unsigned int"), s); - } else { - VIR_DEBUG("Converted '%s' to unsigned int %u", s, *result); - } - - return ret; -} - -static int -pciParsePciConfigAddress(char *address, - struct pci_config_address *bdf) -{ - char *p = NULL; - int ret = -1; - - if ((address == NULL) || (logStrToLong_ui(address, &p, 16, - &bdf->domain) == -1)) { - goto out; - } - - if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16, - &bdf->bus) == -1)) { - goto out; - } - - if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16, - &bdf->slot) == -1)) { - goto out; - } - - if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16, - &bdf->function) == -1)) { - goto out; - } - - ret = 0; - -out: - return ret; -} - -static int -pciGetPciConfigAddressFromSysfsDeviceLink(const char *device_link, - struct pci_config_address **bdf) -{ - char *config_address = NULL; - char *device_path = NULL; - char errbuf[64]; - int ret = -1; - - VIR_DEBUG("Attempting to resolve device path from device link '%s'", - device_link); - - if (!virFileExists(device_link)) { - VIR_DEBUG("sysfs_path '%s' does not exist", device_link); - return ret; - } - - device_path = canonicalize_file_name(device_link); - if (device_path == NULL) { - memset(errbuf, '\0', sizeof(errbuf)); - virReportSystemError(errno, - _("Failed to resolve device link '%s'"), - device_link); - return ret; - } - - config_address = basename(device_path); - if (VIR_ALLOC(*bdf) != 0) { - virReportOOMError(); - goto out; - } - - if (pciParsePciConfigAddress(config_address, *bdf) != 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Failed to parse PCI config address '%s'"), - config_address); - VIR_FREE(*bdf); - goto out; - } - - VIR_DEBUG("pci_config_address %.4x:%.2x:%.2x.%.1x", - (*bdf)->domain, - (*bdf)->bus, - (*bdf)->slot, - (*bdf)->function); - - ret = 0; - -out: - VIR_FREE(device_path); - - return ret; -} - -/* - * Returns Physical function given a virtual function - */ -int -pciGetPhysicalFunction(const char *vf_sysfs_path, - struct pci_config_address **physical_function) -{ - int ret = -1; - char *device_link = NULL; - - VIR_DEBUG("Attempting to get SR IOV physical function for device " - "with sysfs path '%s'", vf_sysfs_path); - - if (virBuildPath(&device_link, vf_sysfs_path, "physfn") == -1) { - virReportOOMError(); - return ret; - } else { - ret = pciGetPciConfigAddressFromSysfsDeviceLink(device_link, - physical_function); - } - - VIR_FREE(device_link); - - return ret; -} - -/* - * Returns virtual functions of a physical function - */ -int -pciGetVirtualFunctions(const char *sysfs_path, - struct pci_config_address ***virtual_functions, - unsigned int *num_virtual_functions) -{ - int ret = -1; - DIR *dir = NULL; - struct dirent *entry = NULL; - char *device_link = NULL; - char errbuf[64]; - - VIR_DEBUG("Attempting to get SR IOV virtual functions for device" - "with sysfs path '%s'", sysfs_path); - - dir = opendir(sysfs_path); - if (dir == NULL) { - memset(errbuf, '\0', sizeof(errbuf)); - virReportSystemError(errno, - _("Failed to open dir '%s'"), - sysfs_path); - return ret; - } - - *virtual_functions = NULL; - *num_virtual_functions = 0; - while ((entry = readdir(dir))) { - if (STRPREFIX(entry->d_name, "virtfn")) { - - if (virBuildPath(&device_link, sysfs_path, entry->d_name) == -1) { - virReportOOMError(); - goto out; - } - - VIR_DEBUG("Number of virtual functions: %d", - *num_virtual_functions); - if (VIR_REALLOC_N(*virtual_functions, - (*num_virtual_functions) + 1) != 0) { - virReportOOMError(); - VIR_FREE(device_link); - goto out; - } - - if (pciGetPciConfigAddressFromSysfsDeviceLink(device_link, - &((*virtual_functions)[*num_virtual_functions])) != - SRIOV_FOUND) { - /* We should not get back SRIOV_NOT_FOUND in this - * case, so if we do, it's an error. */ - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Failed to get SR IOV function from device " - "link '%s'"), device_link); - VIR_FREE(device_link); - goto out; - } else { - (*num_virtual_functions)++; - } - VIR_FREE(device_link); - } - } - - ret = 0; - -out: - if (dir) - closedir(dir); - - return ret; -} - -/* - * Returns 1 if vf device is a virtual function, 0 if not, -1 on error - */ -int -pciDeviceIsVirtualFunction(const char *vf_sysfs_device_link) -{ - char *vf_sysfs_physfn_link = NULL; - int ret = -1; - - if (virAsprintf(&vf_sysfs_physfn_link, "%s/physfn", - vf_sysfs_device_link) < 0) { - virReportOOMError(); - return ret; - } - - ret = virFileExists(vf_sysfs_physfn_link); - - VIR_FREE(vf_sysfs_physfn_link); - - return ret; -} - -/* - * Returns the sriov virtual function index of vf given its pf - */ -int -pciGetVirtualFunctionIndex(const char *pf_sysfs_device_link, - const char *vf_sysfs_device_link, - int *vf_index) -{ - int ret = -1, i; - unsigned int num_virt_fns = 0; - struct pci_config_address *vf_bdf = NULL; - struct pci_config_address **virt_fns = NULL; - - if (pciGetPciConfigAddressFromSysfsDeviceLink(vf_sysfs_device_link, - &vf_bdf) < 0) - return ret; - - if (pciGetVirtualFunctions(pf_sysfs_device_link, &virt_fns, - &num_virt_fns) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Error getting physical function's '%s' " - "virtual_functions"), pf_sysfs_device_link); - goto out; - } - - for (i = 0; i < num_virt_fns; i++) { - if (pciConfigAddressEqual(vf_bdf, virt_fns[i])) { - *vf_index = i; - ret = 0; - break; - } - } - -out: - - /* free virtual functions */ - for (i = 0; i < num_virt_fns; i++) - VIR_FREE(virt_fns[i]); - - VIR_FREE(virt_fns); - VIR_FREE(vf_bdf); - - return ret; -} - -/* - * Returns a path to the PCI sysfs file given the BDF of the PCI function - */ - -int -pciSysfsFile(char *pciDeviceName, char **pci_sysfs_device_link) -{ - if (virAsprintf(pci_sysfs_device_link, PCI_SYSFS "devices/%s", - pciDeviceName) < 0) { - virReportOOMError(); - return -1; - } - - return 0; -} - -int -pciConfigAddressToSysfsFile(struct pci_config_address *dev, - char **pci_sysfs_device_link) -{ - if (virAsprintf(pci_sysfs_device_link, - PCI_SYSFS "devices/%04x:%02x:%02x.%x", dev->domain, - dev->bus, dev->slot, dev->function) < 0) { - virReportOOMError(); - return -1; - } - - return 0; -} - -/* - * Returns the network device name of a pci device - */ -int -pciDeviceNetName(char *device_link_sysfs_path, char **netname) -{ - char *pcidev_sysfs_net_path = NULL; - int ret = -1; - DIR *dir = NULL; - struct dirent *entry = NULL; - - if (virBuildPath(&pcidev_sysfs_net_path, device_link_sysfs_path, - "net") == -1) { - virReportOOMError(); - return -1; - } - - dir = opendir(pcidev_sysfs_net_path); - if (dir == NULL) - goto out; - - while ((entry = readdir(dir))) { - if (STREQ(entry->d_name, ".") || - STREQ(entry->d_name, "..")) - continue; - - /* Assume a single directory entry */ - *netname = strdup(entry->d_name); - if (!*netname) - virReportOOMError(); - else - ret = 0; - break; - } - - closedir(dir); - -out: - VIR_FREE(pcidev_sysfs_net_path); - - return ret; -} - -int -pciDeviceGetVirtualFunctionInfo(const char *vf_sysfs_device_path, - char **pfname, int *vf_index) -{ - struct pci_config_address *pf_config_address = NULL; - char *pf_sysfs_device_path = NULL; - int ret = -1; - - if (pciGetPhysicalFunction(vf_sysfs_device_path, &pf_config_address) < 0) - return ret; - - if (pciConfigAddressToSysfsFile(pf_config_address, - &pf_sysfs_device_path) < 0) { - - VIR_FREE(pf_config_address); - return ret; - } - - if (pciGetVirtualFunctionIndex(pf_sysfs_device_path, vf_sysfs_device_path, - vf_index) < 0) - goto cleanup; - - ret = pciDeviceNetName(pf_sysfs_device_path, pfname); - -cleanup: - VIR_FREE(pf_config_address); - VIR_FREE(pf_sysfs_device_path); - - return ret; -} - -#else -static const char *unsupported = N_("not supported on non-linux platforms"); - -int -pciGetPhysicalFunction(const char *vf_sysfs_path ATTRIBUTE_UNUSED, - struct pci_config_address **physical_function ATTRIBUTE_UNUSED) -{ - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported)); - return -1; -} - -int -pciGetVirtualFunctions(const char *sysfs_path ATTRIBUTE_UNUSED, - struct pci_config_address ***virtual_functions ATTRIBUTE_UNUSED, - unsigned int *num_virtual_functions ATTRIBUTE_UNUSED) -{ - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported)); - return -1; -} - -int -pciDeviceIsVirtualFunction(const char *vf_sysfs_device_link ATTRIBUTE_UNUSED) -{ - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported)); - return -1; -} - -int -pciGetVirtualFunctionIndex(const char *pf_sysfs_device_link ATTRIBUTE_UNUSED, - const char *vf_sysfs_device_link ATTRIBUTE_UNUSED, - int *vf_index ATTRIBUTE_UNUSED) -{ - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported)); - return -1; - -} - -int -pciConfigAddressToSysfsFile(struct pci_config_address *dev ATTRIBUTE_UNUSED, - char **pci_sysfs_device_link ATTRIBUTE_UNUSED) -{ - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported)); - return -1; -} - -int -pciDeviceNetName(char *device_link_sysfs_path ATTRIBUTE_UNUSED, - char **netname ATTRIBUTE_UNUSED) -{ - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported)); - return -1; -} - -int -pciDeviceGetVirtualFunctionInfo(const char *vf_sysfs_device_path ATTRIBUTE_UNUSED, - char **pfname ATTRIBUTE_UNUSED, - int *vf_index ATTRIBUTE_UNUSED) -{ - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported)); - return -1; -} -#endif /* __linux__ */ diff --git a/src/util/pci.h b/src/util/pci.h deleted file mode 100644 index 814c24e..0000000 --- a/src/util/pci.h +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (C) 2009, 2011-2012 Red Hat, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see - * <http://www.gnu.org/licenses/>. - * - * Authors: - * Mark McLoughlin <markmc@xxxxxxxxxx> - */ - -#ifndef __VIR_PCI_H__ -# define __VIR_PCI_H__ - -# include "internal.h" - -typedef struct _pciDevice pciDevice; -typedef struct _pciDeviceList pciDeviceList; - -struct pci_config_address { - unsigned int domain; - unsigned int bus; - unsigned int slot; - unsigned int function; -}; - -pciDevice *pciGetDevice (unsigned domain, - unsigned bus, - unsigned slot, - unsigned function); -void pciFreeDevice (pciDevice *dev); -const char *pciDeviceGetName (pciDevice *dev); -int pciDettachDevice (pciDevice *dev, - pciDeviceList *activeDevs, - pciDeviceList *inactiveDevs); -int pciReAttachDevice (pciDevice *dev, - pciDeviceList *activeDevs, - pciDeviceList *inactiveDevs); -int pciResetDevice (pciDevice *dev, - pciDeviceList *activeDevs, - pciDeviceList *inactiveDevs); -void pciDeviceSetManaged(pciDevice *dev, - unsigned managed); -unsigned pciDeviceGetManaged(pciDevice *dev); -void pciDeviceSetUsedBy(pciDevice *dev, - const char *used_by); -const char *pciDeviceGetUsedBy(pciDevice *dev); -unsigned pciDeviceGetUnbindFromStub(pciDevice *dev); -void pciDeviceSetUnbindFromStub(pciDevice *dev, - unsigned unbind); -unsigned pciDeviceGetRemoveSlot(pciDevice *dev); -void pciDeviceSetRemoveSlot(pciDevice *dev, - unsigned remove_slot); -unsigned pciDeviceGetReprobe(pciDevice *dev); -void pciDeviceSetReprobe(pciDevice *dev, - unsigned reprobe); -void pciDeviceReAttachInit(pciDevice *dev); - -pciDeviceList *pciDeviceListNew (void); -void pciDeviceListFree (pciDeviceList *list); -int pciDeviceListAdd (pciDeviceList *list, - pciDevice *dev); -pciDevice * pciDeviceListGet (pciDeviceList *list, - int idx); -int pciDeviceListCount (pciDeviceList *list); -pciDevice * pciDeviceListSteal (pciDeviceList *list, - pciDevice *dev); -pciDevice * pciDeviceListStealIndex(pciDeviceList *list, - int idx); -void pciDeviceListDel (pciDeviceList *list, - pciDevice *dev); -pciDevice * pciDeviceListFind (pciDeviceList *list, - pciDevice *dev); -int pciDeviceListFindIndex(pciDeviceList *list, - pciDevice *dev); - -/* - * Callback that will be invoked once for each file - * associated with / used for PCI host device access. - * - * Should return 0 if successfully processed, or - * -1 to indicate error and abort iteration - */ -typedef int (*pciDeviceFileActor)(pciDevice *dev, - const char *path, void *opaque); - -int pciDeviceFileIterate(pciDevice *dev, - pciDeviceFileActor actor, - void *opaque); - -int pciDeviceIsAssignable(pciDevice *dev, - int strict_acs_check); -int pciWaitForDeviceCleanup(pciDevice *dev, const char *matcher); - -int pciGetPhysicalFunction(const char *sysfs_path, - struct pci_config_address **phys_fn); - -int pciGetVirtualFunctions(const char *sysfs_path, - struct pci_config_address ***virtual_functions, - unsigned int *num_virtual_functions); - -int pciDeviceIsVirtualFunction(const char *vf_sysfs_device_link); - -int pciGetVirtualFunctionIndex(const char *pf_sysfs_device_link, - const char *vf_sysfs_device_link, - int *vf_index); - -int pciConfigAddressToSysfsFile(struct pci_config_address *dev, - char **pci_sysfs_device_link); - -int pciDeviceNetName(char *device_link_sysfs_path, char **netname); - -int pciSysfsFile(char *pciDeviceName, char **pci_sysfs_device_link) - ATTRIBUTE_RETURN_CHECK; - -int pciGetDeviceAddrString(unsigned domain, - unsigned bus, - unsigned slot, - unsigned function, - char **pciConfigAddr) - ATTRIBUTE_NONNULL(5) ATTRIBUTE_RETURN_CHECK; - -int pciDeviceGetVirtualFunctionInfo(const char *vf_sysfs_device_path, - char **pfname, int *vf_index); - -#endif /* __VIR_PCI_H__ */ diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c index 88a2e4b..3ea0e39 100644 --- a/src/util/virnetdev.c +++ b/src/util/virnetdev.c @@ -28,7 +28,7 @@ #include "virterror_internal.h" #include "vircommand.h" #include "viralloc.h" -#include "pci.h" +#include "virpci.h" #include "virlog.h" #include <sys/ioctl.h> diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h index 123667c..d588e89 100644 --- a/src/util/virnetdev.h +++ b/src/util/virnetdev.h @@ -26,7 +26,7 @@ # include "virsocketaddr.h" # include "virnetlink.h" # include "virmacaddr.h" -# include "pci.h" +# include "virpci.h" int virNetDevExists(const char *brname) ATTRIBUTE_NONNULL(1) ATTRIBUTE_RETURN_CHECK; diff --git a/src/util/virpci.c b/src/util/virpci.c new file mode 100644 index 0000000..8875aa6 --- /dev/null +++ b/src/util/virpci.c @@ -0,0 +1,2285 @@ +/* + * Copyright (C) 2009-2012 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + * + * Authors: + * Mark McLoughlin <markmc@xxxxxxxxxx> + */ + +#include <config.h> + +#include "virpci.h" + +#include <dirent.h> +#include <fcntl.h> +#include <inttypes.h> +#include <limits.h> +#include <stdio.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <stdlib.h> + +#include "virlog.h" +#include "viralloc.h" +#include "vircommand.h" +#include "virterror_internal.h" +#include "virfile.h" + +#define PCI_SYSFS "/sys/bus/pci/" +#define PCI_ID_LEN 10 /* "XXXX XXXX" */ +#define PCI_ADDR_LEN 13 /* "XXXX:XX:XX.X" */ + +#define SRIOV_FOUND 0 +#define SRIOV_NOT_FOUND 1 +#define SRIOV_ERROR -1 + +struct _pciDevice { + unsigned domain; + unsigned bus; + unsigned slot; + unsigned function; + + char name[PCI_ADDR_LEN]; /* domain:bus:slot.function */ + char id[PCI_ID_LEN]; /* product vendor */ + char *path; + const char *used_by; /* The domain which uses the device */ + + unsigned pcie_cap_pos; + unsigned pci_pm_cap_pos; + unsigned has_flr : 1; + unsigned has_pm_reset : 1; + unsigned managed : 1; + + /* used by reattach function */ + unsigned unbind_from_stub : 1; + unsigned remove_slot : 1; + unsigned reprobe : 1; +}; + +struct _pciDeviceList { + unsigned count; + pciDevice **devs; +}; + + +/* For virReportOOMError() and virReportSystemError() */ +#define VIR_FROM_THIS VIR_FROM_NONE + +/* Specifications referenced in comments: + * PCI30 - PCI Local Bus Specification 3.0 + * PCIe20 - PCI Express Base Specification 2.0 + * BR12 - PCI-to-PCI Bridge Architecture Specification 1.2 + * PM12 - PCI Bus Power Management Interface Specification 1.2 + * ECN_AF - Advanced Capabilities for Conventional PCI ECN + */ + +/* Type 0 config space header length; PCI30 Section 6.1 Configuration Space Organization */ +#define PCI_CONF_LEN 0x100 +#define PCI_CONF_HEADER_LEN 0x40 + +/* PCI30 6.2.1 */ +#define PCI_HEADER_TYPE 0x0e /* Header type */ +#define PCI_HEADER_TYPE_BRIDGE 0x1 +#define PCI_HEADER_TYPE_MASK 0x7f +#define PCI_HEADER_TYPE_MULTI 0x80 + +/* PCI30 6.2.1 Device Identification */ +#define PCI_CLASS_DEVICE 0x0a /* Device class */ + +/* Class Code for bridge; PCI30 D.7 Base Class 06h */ +#define PCI_CLASS_BRIDGE_PCI 0x0604 + +/* PCI30 6.2.3 Device Status */ +#define PCI_STATUS 0x06 /* 16 bits */ +#define PCI_STATUS_CAP_LIST 0x10 /* Support Capability List */ + +/* PCI30 6.7 Capabilities List */ +#define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list entry */ + +/* PM12 3.2.1 Capability Identifier */ +#define PCI_CAP_ID_PM 0x01 /* Power Management */ +/* PCI30 H Capability IDs */ +#define PCI_CAP_ID_EXP 0x10 /* PCI Express */ +/* ECN_AF 6.x.1.1 Capability ID for AF */ +#define PCI_CAP_ID_AF 0x13 /* Advanced Features */ + +/* PCIe20 7.8.3 Device Capabilities Register (Offset 04h) */ +#define PCI_EXP_DEVCAP 0x4 /* Device capabilities */ +#define PCI_EXP_DEVCAP_FLR (1<<28) /* Function Level Reset */ + +/* Header type 1 BR12 3.2 PCI-to-PCI Bridge Configuration Space Header Format */ +#define PCI_PRIMARY_BUS 0x18 /* BR12 3.2.5.2 Primary bus number */ +#define PCI_SECONDARY_BUS 0x19 /* BR12 3.2.5.3 Secondary bus number */ +#define PCI_SUBORDINATE_BUS 0x1a /* BR12 3.2.5.4 Highest bus number behind the bridge */ +#define PCI_BRIDGE_CONTROL 0x3e +/* BR12 3.2.5.18 Bridge Control Register */ +#define PCI_BRIDGE_CTL_RESET 0x40 /* Secondary bus reset */ + +/* PM12 3.2.4 Power Management Control/Status (Offset = 4) */ +#define PCI_PM_CTRL 4 /* PM control and status register */ +#define PCI_PM_CTRL_STATE_MASK 0x3 /* Current power state (D0 to D3) */ +#define PCI_PM_CTRL_STATE_D0 0x0 /* D0 state */ +#define PCI_PM_CTRL_STATE_D3hot 0x3 /* D3 state */ +#define PCI_PM_CTRL_NO_SOFT_RESET 0x8 /* No reset for D3hot->D0 */ + +/* ECN_AF 6.x.1 Advanced Features Capability Structure */ +#define PCI_AF_CAP 0x3 /* Advanced features capabilities */ +#define PCI_AF_CAP_FLR 0x2 /* Function Level Reset */ + +#define PCI_EXP_FLAGS 0x2 +#define PCI_EXP_FLAGS_TYPE 0x00f0 +#define PCI_EXP_TYPE_DOWNSTREAM 0x6 + +#define PCI_EXT_CAP_BASE 0x100 +#define PCI_EXT_CAP_LIMIT 0x1000 +#define PCI_EXT_CAP_ID_MASK 0x0000ffff +#define PCI_EXT_CAP_OFFSET_SHIFT 20 +#define PCI_EXT_CAP_OFFSET_MASK 0x00000ffc + +#define PCI_EXT_CAP_ID_ACS 0x000d +#define PCI_EXT_ACS_CTRL 0x06 + +#define PCI_EXT_CAP_ACS_SV 0x01 +#define PCI_EXT_CAP_ACS_RR 0x04 +#define PCI_EXT_CAP_ACS_CR 0x08 +#define PCI_EXT_CAP_ACS_UF 0x10 +#define PCI_EXT_CAP_ACS_ENABLED (PCI_EXT_CAP_ACS_SV | \ + PCI_EXT_CAP_ACS_RR | \ + PCI_EXT_CAP_ACS_CR | \ + PCI_EXT_CAP_ACS_UF) + +static int +pciConfigOpen(pciDevice *dev, bool fatal) +{ + int fd; + + fd = open(dev->path, O_RDWR); + + if (fd < 0) { + if (fatal) { + virReportSystemError(errno, + _("Failed to open config space file '%s'"), + dev->path); + } else { + char ebuf[1024]; + VIR_WARN("Failed to open config space file '%s': %s", + dev->path, virStrerror(errno, ebuf, sizeof(ebuf))); + } + return -1; + } + + VIR_DEBUG("%s %s: opened %s", dev->id, dev->name, dev->path); + return fd; +} + +static void +pciConfigClose(pciDevice *dev, int cfgfd) +{ + if (VIR_CLOSE(cfgfd) < 0) { + char ebuf[1024]; + VIR_WARN("Failed to close config space file '%s': %s", + dev->path, virStrerror(errno, ebuf, sizeof(ebuf))); + } +} + + +static int +pciRead(pciDevice *dev, + int cfgfd, + unsigned pos, + uint8_t *buf, + unsigned buflen) +{ + memset(buf, 0, buflen); + + if (lseek(cfgfd, pos, SEEK_SET) != pos || + saferead(cfgfd, buf, buflen) != buflen) { + char ebuf[1024]; + VIR_WARN("Failed to read from '%s' : %s", dev->path, + virStrerror(errno, ebuf, sizeof(ebuf))); + return -1; + } + return 0; +} + +static uint8_t +pciRead8(pciDevice *dev, int cfgfd, unsigned pos) +{ + uint8_t buf; + pciRead(dev, cfgfd, pos, &buf, sizeof(buf)); + return buf; +} + +static uint16_t +pciRead16(pciDevice *dev, int cfgfd, unsigned pos) +{ + uint8_t buf[2]; + pciRead(dev, cfgfd, pos, &buf[0], sizeof(buf)); + return (buf[0] << 0) | (buf[1] << 8); +} + +static uint32_t +pciRead32(pciDevice *dev, int cfgfd, unsigned pos) +{ + uint8_t buf[4]; + pciRead(dev, cfgfd, pos, &buf[0], sizeof(buf)); + return (buf[0] << 0) | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24); +} + +static int +pciWrite(pciDevice *dev, + int cfgfd, + unsigned pos, + uint8_t *buf, + unsigned buflen) +{ + if (lseek(cfgfd, pos, SEEK_SET) != pos || + safewrite(cfgfd, buf, buflen) != buflen) { + char ebuf[1024]; + VIR_WARN("Failed to write to '%s' : %s", dev->path, + virStrerror(errno, ebuf, sizeof(ebuf))); + return -1; + } + return 0; +} + +static void +pciWrite16(pciDevice *dev, int cfgfd, unsigned pos, uint16_t val) +{ + uint8_t buf[2] = { (val >> 0), (val >> 8) }; + pciWrite(dev, cfgfd, pos, &buf[0], sizeof(buf)); +} + +static void +pciWrite32(pciDevice *dev, int cfgfd, unsigned pos, uint32_t val) +{ + uint8_t buf[4] = { (val >> 0), (val >> 8), (val >> 16), (val >> 24) }; + pciWrite(dev, cfgfd, pos, &buf[0], sizeof(buf)); +} + +typedef int (*pciIterPredicate)(pciDevice *, pciDevice *, void *); + +/* Iterate over available PCI devices calling @predicate + * to compare each one to @dev. + * Return -1 on error since we don't want to assume it is + * safe to reset if there is an error. + */ +static int +pciIterDevices(pciIterPredicate predicate, + pciDevice *dev, + pciDevice **matched, + void *data) +{ + DIR *dir; + struct dirent *entry; + int ret = 0; + int rc; + + *matched = NULL; + + VIR_DEBUG("%s %s: iterating over " PCI_SYSFS "devices", dev->id, dev->name); + + dir = opendir(PCI_SYSFS "devices"); + if (!dir) { + VIR_WARN("Failed to open " PCI_SYSFS "devices"); + return -1; + } + + while ((entry = readdir(dir))) { + unsigned int domain, bus, slot, function; + pciDevice *check; + char *tmp; + + /* Ignore '.' and '..' */ + if (entry->d_name[0] == '.') + continue; + + /* expected format: <domain>:<bus>:<slot>.<function> */ + if (/* domain */ + virStrToLong_ui(entry->d_name, &tmp, 16, &domain) < 0 || *tmp != ':' || + /* bus */ + virStrToLong_ui(tmp + 1, &tmp, 16, &bus) < 0 || *tmp != ':' || + /* slot */ + virStrToLong_ui(tmp + 1, &tmp, 16, &slot) < 0 || *tmp != '.' || + /* function */ + virStrToLong_ui(tmp + 1, NULL, 16, &function) < 0) { + VIR_WARN("Unusual entry in " PCI_SYSFS "devices: %s", entry->d_name); + continue; + } + + check = pciGetDevice(domain, bus, slot, function); + if (!check) { + ret = -1; + break; + } + + rc = predicate(dev, check, data); + if (rc < 0) { + /* the predicate returned an error, bail */ + pciFreeDevice(check); + ret = -1; + break; + } + else if (rc == 1) { + VIR_DEBUG("%s %s: iter matched on %s", dev->id, dev->name, check->name); + *matched = check; + ret = 1; + break; + } + + pciFreeDevice(check); + } + closedir(dir); + return ret; +} + +static uint8_t +pciFindCapabilityOffset(pciDevice *dev, int cfgfd, unsigned capability) +{ + uint16_t status; + uint8_t pos; + + status = pciRead16(dev, cfgfd, PCI_STATUS); + if (!(status & PCI_STATUS_CAP_LIST)) + return 0; + + pos = pciRead8(dev, cfgfd, PCI_CAPABILITY_LIST); + + /* Zero indicates last capability, capabilities can't + * be in the config space header and 0xff is returned + * by the kernel if we don't have access to this region + * + * Note: we're not handling loops or extended + * capabilities here. + */ + while (pos >= PCI_CONF_HEADER_LEN && pos != 0xff) { + uint8_t capid = pciRead8(dev, cfgfd, pos); + if (capid == capability) { + VIR_DEBUG("%s %s: found cap 0x%.2x at 0x%.2x", + dev->id, dev->name, capability, pos); + return pos; + } + + pos = pciRead8(dev, cfgfd, pos + 1); + } + + VIR_DEBUG("%s %s: failed to find cap 0x%.2x", dev->id, dev->name, capability); + + return 0; +} + +static unsigned int +pciFindExtendedCapabilityOffset(pciDevice *dev, + int cfgfd, + unsigned capability) +{ + int ttl; + unsigned int pos; + uint32_t header; + + /* minimum 8 bytes per capability */ + ttl = (PCI_EXT_CAP_LIMIT - PCI_EXT_CAP_BASE) / 8; + pos = PCI_EXT_CAP_BASE; + + while (ttl > 0 && pos >= PCI_EXT_CAP_BASE) { + header = pciRead32(dev, cfgfd, pos); + + if ((header & PCI_EXT_CAP_ID_MASK) == capability) + return pos; + + pos = (header >> PCI_EXT_CAP_OFFSET_SHIFT) & PCI_EXT_CAP_OFFSET_MASK; + ttl--; + } + + return 0; +} + +/* detects whether this device has FLR. Returns 0 if the device does + * not have FLR, 1 if it does, and -1 on error + */ +static int +pciDetectFunctionLevelReset(pciDevice *dev, int cfgfd) +{ + uint32_t caps; + uint8_t pos; + char *path; + int found; + + /* The PCIe Function Level Reset capability allows + * individual device functions to be reset without + * affecting any other functions on the device or + * any other devices on the bus. This is only common + * on SR-IOV NICs at the moment. + */ + if (dev->pcie_cap_pos) { + caps = pciRead32(dev, cfgfd, dev->pcie_cap_pos + PCI_EXP_DEVCAP); + if (caps & PCI_EXP_DEVCAP_FLR) { + VIR_DEBUG("%s %s: detected PCIe FLR capability", dev->id, dev->name); + return 1; + } + } + + /* The PCI AF Function Level Reset capability is + * the same thing, except for conventional PCI + * devices. This is not common yet. + */ + pos = pciFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_AF); + if (pos) { + caps = pciRead16(dev, cfgfd, pos + PCI_AF_CAP); + if (caps & PCI_AF_CAP_FLR) { + VIR_DEBUG("%s %s: detected PCI FLR capability", dev->id, dev->name); + return 1; + } + } + + /* there are some buggy devices that do support FLR, but forget to + * advertise that fact in their capabilities. However, FLR is *required* + * to be present for virtual functions (VFs), so if we see that this + * device is a VF, we just assume FLR works + */ + + if (virAsprintf(&path, PCI_SYSFS "devices/%s/physfn", dev->name) < 0) { + virReportOOMError(); + return -1; + } + + found = virFileExists(path); + VIR_FREE(path); + if (found) { + VIR_DEBUG("%s %s: buggy device didn't advertise FLR, but is a VF; forcing flr on", + dev->id, dev->name); + return 1; + } + + VIR_DEBUG("%s %s: no FLR capability found", dev->id, dev->name); + + return 0; +} + +/* Require the device has the PCI Power Management capability + * and that a D3hot->D0 transition will results in a full + * internal reset, not just a soft reset. + */ +static unsigned +pciDetectPowerManagementReset(pciDevice *dev, int cfgfd) +{ + if (dev->pci_pm_cap_pos) { + uint32_t ctl; + + /* require the NO_SOFT_RESET bit is clear */ + ctl = pciRead32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL); + if (!(ctl & PCI_PM_CTRL_NO_SOFT_RESET)) { + VIR_DEBUG("%s %s: detected PM reset capability", dev->id, dev->name); + return 1; + } + } + + VIR_DEBUG("%s %s: no PM reset capability found", dev->id, dev->name); + + return 0; +} + +/* Any active devices on the same domain/bus ? */ +static int +pciSharesBusWithActive(pciDevice *dev, pciDevice *check, void *data) +{ + pciDeviceList *inactiveDevs = data; + + /* Different domain, different bus, or simply identical device */ + if (dev->domain != check->domain || + dev->bus != check->bus || + (dev->slot == check->slot && + dev->function == check->function)) + return 0; + + /* same bus, but inactive, i.e. about to be assigned to guest */ + if (inactiveDevs && pciDeviceListFind(inactiveDevs, check)) + return 0; + + return 1; +} + +static pciDevice * +pciBusContainsActiveDevices(pciDevice *dev, + pciDeviceList *inactiveDevs) +{ + pciDevice *active = NULL; + if (pciIterDevices(pciSharesBusWithActive, + dev, &active, inactiveDevs) < 0) + return NULL; + return active; +} + +/* Is @check the parent of @dev ? */ +static int +pciIsParent(pciDevice *dev, pciDevice *check, void *data) +{ + uint16_t device_class; + uint8_t header_type, secondary, subordinate; + pciDevice **best = data; + int ret = 0; + int fd; + + if (dev->domain != check->domain) + return 0; + + if ((fd = pciConfigOpen(check, false)) < 0) + return 0; + + /* Is it a bridge? */ + device_class = pciRead16(check, fd, PCI_CLASS_DEVICE); + if (device_class != PCI_CLASS_BRIDGE_PCI) + goto cleanup; + + /* Is it a plane? */ + header_type = pciRead8(check, fd, PCI_HEADER_TYPE); + if ((header_type & PCI_HEADER_TYPE_MASK) != PCI_HEADER_TYPE_BRIDGE) + goto cleanup; + + secondary = pciRead8(check, fd, PCI_SECONDARY_BUS); + subordinate = pciRead8(check, fd, PCI_SUBORDINATE_BUS); + + VIR_DEBUG("%s %s: found parent device %s", dev->id, dev->name, check->name); + + /* if the secondary bus exactly equals the device's bus, then we found + * the direct parent. No further work is necessary + */ + if (dev->bus == secondary) { + ret = 1; + goto cleanup; + } + + /* otherwise, SRIOV allows VFs to be on different busses then their PFs. + * In this case, what we need to do is look for the "best" match; i.e. + * the most restrictive match that still satisfies all of the conditions. + */ + if (dev->bus > secondary && dev->bus <= subordinate) { + if (*best == NULL) { + *best = pciGetDevice(check->domain, check->bus, check->slot, + check->function); + if (*best == NULL) { + ret = -1; + goto cleanup; + } + } else { + /* OK, we had already recorded a previous "best" match for the + * parent. See if the current device is more restrictive than the + * best, and if so, make it the new best + */ + int bestfd; + uint8_t best_secondary; + + if ((bestfd = pciConfigOpen(*best, false)) < 0) + goto cleanup; + best_secondary = pciRead8(*best, bestfd, PCI_SECONDARY_BUS); + pciConfigClose(*best, bestfd); + + if (secondary > best_secondary) { + pciFreeDevice(*best); + *best = pciGetDevice(check->domain, check->bus, check->slot, + check->function); + if (*best == NULL) { + ret = -1; + goto cleanup; + } + } + } + } + +cleanup: + pciConfigClose(check, fd); + return ret; +} + +static int +pciGetParentDevice(pciDevice *dev, pciDevice **parent) +{ + pciDevice *best = NULL; + int ret; + + *parent = NULL; + ret = pciIterDevices(pciIsParent, dev, parent, &best); + if (ret == 1) + pciFreeDevice(best); + else if (ret == 0) + *parent = best; + return ret; +} + +/* Secondary Bus Reset is our sledgehammer - it resets all + * devices behind a bus. + */ +static int +pciTrySecondaryBusReset(pciDevice *dev, + int cfgfd, + pciDeviceList *inactiveDevs) +{ + pciDevice *parent, *conflict; + uint8_t config_space[PCI_CONF_LEN]; + uint16_t ctl; + int ret = -1; + int parentfd; + + /* Refuse to do a secondary bus reset if there are other + * devices/functions behind the bus are used by the host + * or other guests. + */ + if ((conflict = pciBusContainsActiveDevices(dev, inactiveDevs))) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Active %s devices on bus with %s, not doing bus reset"), + conflict->name, dev->name); + return -1; + } + + /* Find the parent bus */ + if (pciGetParentDevice(dev, &parent) < 0) + return -1; + if (!parent) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Failed to find parent device for %s"), + dev->name); + return -1; + } + if ((parentfd = pciConfigOpen(parent, true)) < 0) + goto out; + + VIR_DEBUG("%s %s: doing a secondary bus reset", dev->id, dev->name); + + /* Save and restore the device's config space; we only do this + * for the supplied device since we refuse to do a reset if there + * are multiple devices/functions + */ + if (pciRead(dev, cfgfd, 0, config_space, PCI_CONF_LEN) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Failed to read PCI config space for %s"), + dev->name); + goto out; + } + + /* Read the control register, set the reset flag, wait 200ms, + * unset the reset flag and wait 200ms. + */ + ctl = pciRead16(dev, cfgfd, PCI_BRIDGE_CONTROL); + + pciWrite16(parent, parentfd, PCI_BRIDGE_CONTROL, + ctl | PCI_BRIDGE_CTL_RESET); + + usleep(200 * 1000); /* sleep 200ms */ + + pciWrite16(parent, parentfd, PCI_BRIDGE_CONTROL, ctl); + + usleep(200 * 1000); /* sleep 200ms */ + + if (pciWrite(dev, cfgfd, 0, config_space, PCI_CONF_LEN) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Failed to restore PCI config space for %s"), + dev->name); + goto out; + } + ret = 0; + +out: + pciConfigClose(parent, parentfd); + pciFreeDevice(parent); + return ret; +} + +/* Power management reset attempts to reset a device using a + * D-state transition from D3hot to D0. Note, in detect_pm_reset() + * above we require the device supports a full internal reset. + */ +static int +pciTryPowerManagementReset(pciDevice *dev, int cfgfd) +{ + uint8_t config_space[PCI_CONF_LEN]; + uint32_t ctl; + + if (!dev->pci_pm_cap_pos) + return -1; + + /* Save and restore the device's config space. */ + if (pciRead(dev, cfgfd, 0, &config_space[0], PCI_CONF_LEN) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Failed to read PCI config space for %s"), + dev->name); + return -1; + } + + VIR_DEBUG("%s %s: doing a power management reset", dev->id, dev->name); + + ctl = pciRead32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL); + ctl &= ~PCI_PM_CTRL_STATE_MASK; + + pciWrite32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL, + ctl | PCI_PM_CTRL_STATE_D3hot); + + usleep(10 * 1000); /* sleep 10ms */ + + pciWrite32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL, + ctl | PCI_PM_CTRL_STATE_D0); + + usleep(10 * 1000); /* sleep 10ms */ + + if (pciWrite(dev, cfgfd, 0, &config_space[0], PCI_CONF_LEN) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Failed to restore PCI config space for %s"), + dev->name); + return -1; + } + + return 0; +} + +static int +pciInitDevice(pciDevice *dev, int cfgfd) +{ + int flr; + + dev->pcie_cap_pos = pciFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_EXP); + dev->pci_pm_cap_pos = pciFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_PM); + flr = pciDetectFunctionLevelReset(dev, cfgfd); + if (flr < 0) + return flr; + dev->has_flr = flr; + dev->has_pm_reset = pciDetectPowerManagementReset(dev, cfgfd); + + return 0; +} + +int +pciResetDevice(pciDevice *dev, + pciDeviceList *activeDevs, + pciDeviceList *inactiveDevs) +{ + int ret = -1; + int fd; + + if (activeDevs && pciDeviceListFind(activeDevs, dev)) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Not resetting active device %s"), dev->name); + return -1; + } + + if ((fd = pciConfigOpen(dev, true)) < 0) + return -1; + + if (pciInitDevice(dev, fd) < 0) + goto cleanup; + + /* KVM will perform FLR when starting and stopping + * a guest, so there is no need for us to do it here. + */ + if (dev->has_flr) { + ret = 0; + goto cleanup; + } + + /* If the device supports PCI power management reset, + * that's the next best thing because it only resets + * the function, not the whole device. + */ + if (dev->has_pm_reset) + ret = pciTryPowerManagementReset(dev, fd); + + /* Bus reset is not an option with the root bus */ + if (ret < 0 && dev->bus != 0) + ret = pciTrySecondaryBusReset(dev, fd, inactiveDevs); + + if (ret < 0) { + virErrorPtr err = virGetLastError(); + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Unable to reset PCI device %s: %s"), + dev->name, + err ? err->message : _("no FLR, PM reset or bus reset available")); + } + +cleanup: + pciConfigClose(dev, fd); + return ret; +} + + +static int +pciDriverDir(char **buffer, const char *driver) +{ + VIR_FREE(*buffer); + + if (virAsprintf(buffer, PCI_SYSFS "drivers/%s", driver) < 0) { + virReportOOMError(); + return -1; + } + + return 0; +} + +static int +pciDriverFile(char **buffer, const char *driver, const char *file) +{ + VIR_FREE(*buffer); + + if (virAsprintf(buffer, PCI_SYSFS "drivers/%s/%s", driver, file) < 0) { + virReportOOMError(); + return -1; + } + + return 0; +} + +static int +pciDeviceFile(char **buffer, const char *device, const char *file) +{ + VIR_FREE(*buffer); + + if (virAsprintf(buffer, PCI_SYSFS "devices/%s/%s", device, file) < 0) { + virReportOOMError(); + return -1; + } + + return 0; +} + + +static const char * +pciFindStubDriver(void) +{ + char *drvpath = NULL; + int probed = 0; + +recheck: + if (pciDriverDir(&drvpath, "pci-stub") < 0) { + return NULL; + } + + if (virFileExists(drvpath)) { + VIR_FREE(drvpath); + return "pci-stub"; + } + + if (pciDriverDir(&drvpath, "pciback") < 0) { + return NULL; + } + + if (virFileExists(drvpath)) { + VIR_FREE(drvpath); + return "pciback"; + } + + VIR_FREE(drvpath); + + if (!probed) { + const char *const stubprobe[] = { MODPROBE, "pci-stub", NULL }; + const char *const backprobe[] = { MODPROBE, "pciback", NULL }; + + probed = 1; + /* + * Probing for pci-stub will succeed regardless of whether + * on native or Xen kernels. + * On Xen though, we want to prefer pciback, so probe + * for that first, because that will only work on Xen + */ + if (virRun(backprobe, NULL) < 0 && + virRun(stubprobe, NULL) < 0) { + char ebuf[1024]; + VIR_WARN("failed to load pci-stub or pciback drivers: %s", + virStrerror(errno, ebuf, sizeof(ebuf))); + return NULL; + } + + goto recheck; + } + + return NULL; +} + +static int +pciUnbindDeviceFromStub(pciDevice *dev, const char *driver) +{ + int result = -1; + char *drvdir = NULL; + char *path = NULL; + + if (pciDriverDir(&drvdir, driver) < 0) + goto cleanup; + + if (!dev->unbind_from_stub) + goto remove_slot; + + /* If the device is bound to stub, unbind it. + */ + if (pciDeviceFile(&path, dev->name, "driver") < 0) + goto cleanup; + + if (virFileExists(drvdir) && virFileLinkPointsTo(path, drvdir)) { + if (pciDriverFile(&path, driver, "unbind") < 0) { + goto cleanup; + } + + if (virFileWriteStr(path, dev->name, 0) < 0) { + virReportSystemError(errno, + _("Failed to unbind PCI device '%s' from %s"), + dev->name, driver); + goto cleanup; + } + } + dev->unbind_from_stub = 0; + +remove_slot: + if (!dev->remove_slot) + goto reprobe; + + /* Xen's pciback.ko wants you to use remove_slot on the specific device */ + if (pciDriverFile(&path, driver, "remove_slot") < 0) { + goto cleanup; + } + + if (virFileExists(path) && virFileWriteStr(path, dev->name, 0) < 0) { + virReportSystemError(errno, + _("Failed to remove slot for PCI device '%s' from %s"), + dev->name, driver); + goto cleanup; + } + dev->remove_slot = 0; + +reprobe: + if (!dev->reprobe) { + result = 0; + goto cleanup; + } + + /* Trigger a re-probe of the device is not in the stub's dynamic + * ID table. If the stub is available, but 'remove_id' isn't + * available, then re-probing would just cause the device to be + * re-bound to the stub. + */ + if (pciDriverFile(&path, driver, "remove_id") < 0) { + goto cleanup; + } + + if (!virFileExists(drvdir) || virFileExists(path)) { + if (virFileWriteStr(PCI_SYSFS "drivers_probe", dev->name, 0) < 0) { + virReportSystemError(errno, + _("Failed to trigger a re-probe for PCI device '%s'"), + dev->name); + goto cleanup; + } + } + + result = 0; + +cleanup: + /* do not do it again */ + dev->unbind_from_stub = 0; + dev->remove_slot = 0; + dev->reprobe = 0; + + VIR_FREE(drvdir); + VIR_FREE(path); + + return result; +} + + +static int +pciBindDeviceToStub(pciDevice *dev, const char *driver) +{ + int result = -1; + char *drvdir = NULL; + char *path = NULL; + int reprobe = 0; + + /* check whether the device is already bound to a driver */ + if (pciDriverDir(&drvdir, driver) < 0 || + pciDeviceFile(&path, dev->name, "driver") < 0) { + goto cleanup; + } + + if (virFileExists(path)) { + if (virFileLinkPointsTo(path, drvdir)) { + /* The device is already bound to pci-stub */ + result = 0; + goto cleanup; + } + reprobe = 1; + } + + /* Add the PCI device ID to the stub's dynamic ID table; + * this is needed to allow us to bind the device to the stub. + * Note: if the device is not currently bound to any driver, + * stub will immediately be bound to the device. Also, note + * that if a new device with this ID is hotplugged, or if a probe + * is triggered for such a device, it will also be immediately + * bound by the stub. + */ + if (pciDriverFile(&path, driver, "new_id") < 0) { + goto cleanup; + } + + if (virFileWriteStr(path, dev->id, 0) < 0) { + virReportSystemError(errno, + _("Failed to add PCI device ID '%s' to %s"), + dev->id, driver); + goto cleanup; + } + + /* check whether the device is bound to pci-stub when we write dev->id to + * new_id. + */ + if (pciDriverDir(&drvdir, driver) < 0 || + pciDeviceFile(&path, dev->name, "driver") < 0) { + goto remove_id; + } + + if (virFileLinkPointsTo(path, drvdir)) { + dev->unbind_from_stub = 1; + dev->remove_slot = 1; + goto remove_id; + } + + /* If the device is already bound to a driver, unbind it. + * Note, this will have rather unpleasant side effects if this + * PCI device happens to be IDE controller for the disk hosting + * your root filesystem. + */ + if (pciDeviceFile(&path, dev->name, "driver/unbind") < 0) { + goto cleanup; + } + + if (virFileExists(path)) { + if (virFileWriteStr(path, dev->name, 0) < 0) { + virReportSystemError(errno, + _("Failed to unbind PCI device '%s'"), + dev->name); + goto cleanup; + } + dev->reprobe = reprobe; + } + + /* If the device isn't already bound to pci-stub, try binding it now. + */ + if (pciDriverDir(&drvdir, driver) < 0 || + pciDeviceFile(&path, dev->name, "driver") < 0) { + goto remove_id; + } + + if (!virFileLinkPointsTo(path, drvdir)) { + /* Xen's pciback.ko wants you to use new_slot first */ + if (pciDriverFile(&path, driver, "new_slot") < 0) { + goto remove_id; + } + + if (virFileExists(path) && virFileWriteStr(path, dev->name, 0) < 0) { + virReportSystemError(errno, + _("Failed to add slot for PCI device '%s' to %s"), + dev->name, driver); + goto remove_id; + } + dev->remove_slot = 1; + + if (pciDriverFile(&path, driver, "bind") < 0) { + goto remove_id; + } + + if (virFileWriteStr(path, dev->name, 0) < 0) { + virReportSystemError(errno, + _("Failed to bind PCI device '%s' to %s"), + dev->name, driver); + goto remove_id; + } + dev->unbind_from_stub = 1; + } + +remove_id: + /* If 'remove_id' exists, remove the device id from pci-stub's dynamic + * ID table so that 'drivers_probe' works below. + */ + if (pciDriverFile(&path, driver, "remove_id") < 0) { + /* We do not remove PCI ID from pci-stub, and we cannot reprobe it */ + if (dev->reprobe) { + VIR_WARN("Could not remove PCI ID '%s' from %s, and the device " + "cannot be probed again.", dev->id, driver); + } + dev->reprobe = 0; + goto cleanup; + } + + if (virFileExists(path) && virFileWriteStr(path, dev->id, 0) < 0) { + virReportSystemError(errno, + _("Failed to remove PCI ID '%s' from %s"), + dev->id, driver); + + /* remove PCI ID from pci-stub failed, and we cannot reprobe it */ + if (dev->reprobe) { + VIR_WARN("Failed to remove PCI ID '%s' from %s, and the device " + "cannot be probed again.", dev->id, driver); + } + dev->reprobe = 0; + goto cleanup; + } + + result = 0; + +cleanup: + VIR_FREE(drvdir); + VIR_FREE(path); + + if (result < 0) { + pciUnbindDeviceFromStub(dev, driver); + } + + return result; +} + +int +pciDettachDevice(pciDevice *dev, + pciDeviceList *activeDevs, + pciDeviceList *inactiveDevs) +{ + const char *driver = pciFindStubDriver(); + if (!driver) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("cannot find any PCI stub module")); + return -1; + } + + if (activeDevs && pciDeviceListFind(activeDevs, dev)) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Not detaching active device %s"), dev->name); + return -1; + } + + if (pciBindDeviceToStub(dev, driver) < 0) + return -1; + + /* Add the dev into list inactiveDevs */ + if (inactiveDevs && !pciDeviceListFind(inactiveDevs, dev)) { + if (pciDeviceListAdd(inactiveDevs, dev) < 0) + return -1; + } + + return 0; +} + +int +pciReAttachDevice(pciDevice *dev, + pciDeviceList *activeDevs, + pciDeviceList *inactiveDevs) +{ + const char *driver = pciFindStubDriver(); + if (!driver) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("cannot find any PCI stub module")); + return -1; + } + + if (activeDevs && pciDeviceListFind(activeDevs, dev)) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Not reattaching active device %s"), dev->name); + return -1; + } + + if (pciUnbindDeviceFromStub(dev, driver) < 0) + return -1; + + /* Steal the dev from list inactiveDevs */ + if (inactiveDevs) + pciDeviceListSteal(inactiveDevs, dev); + + return 0; +} + +/* Certain hypervisors (like qemu/kvm) map the PCI bar(s) on + * the host when doing device passthrough. This can lead to a race + * condition where the hypervisor is still cleaning up the device while + * libvirt is trying to re-attach it to the host device driver. To avoid + * this situation, we look through /proc/iomem, and if the hypervisor is + * still holding onto the bar (denoted by the string in the matcher variable), + * then we can wait around a bit for that to clear up. + * + * A typical /proc/iomem looks like this (snipped for brevity): + * 00010000-0008efff : System RAM + * 0008f000-0008ffff : reserved + * ... + * 00100000-cc9fcfff : System RAM + * 00200000-00483d3b : Kernel code + * 00483d3c-005c88df : Kernel data + * cc9fd000-ccc71fff : ACPI Non-volatile Storage + * ... + * d0200000-d02fffff : PCI Bus #05 + * d0200000-d021ffff : 0000:05:00.0 + * d0200000-d021ffff : e1000e + * d0220000-d023ffff : 0000:05:00.0 + * d0220000-d023ffff : e1000e + * ... + * f0000000-f0003fff : 0000:00:1b.0 + * f0000000-f0003fff : kvm_assigned_device + * + * Returns 0 if we are clear to continue, and 1 if the hypervisor is still + * holding onto the resource. + */ +int +pciWaitForDeviceCleanup(pciDevice *dev, const char *matcher) +{ + FILE *fp; + char line[160]; + char *tmp; + unsigned long long start, end; + unsigned int domain, bus, slot, function; + int in_matching_device; + int ret; + size_t match_depth; + + fp = fopen("/proc/iomem", "r"); + if (!fp) { + /* If we failed to open iomem, we just basically ignore the error. The + * unbind might succeed anyway, and besides, it's very likely we have + * no way to report the error + */ + VIR_DEBUG("Failed to open /proc/iomem, trying to continue anyway"); + return 0; + } + + ret = 0; + in_matching_device = 0; + match_depth = 0; + while (fgets(line, sizeof(line), fp) != 0) { + /* the logic here is a bit confusing. For each line, we look to + * see if it matches the domain:bus:slot.function we were given. + * If this line matches the DBSF, then any subsequent lines indented + * by 2 spaces are the PCI regions for this device. It's also + * possible that none of the PCI regions are currently mapped, in + * which case we have no indented regions. This code handles all + * of these situations + */ + if (in_matching_device && (strspn(line, " ") == (match_depth + 2))) { + /* expected format: <start>-<end> : <suffix> */ + if (/* start */ + virStrToLong_ull(line, &tmp, 16, &start) < 0 || *tmp != '-' || + /* end */ + virStrToLong_ull(tmp + 1, &tmp, 16, &end) < 0 || + (tmp = STRSKIP(tmp, " : ")) == NULL) + continue; + + if (STRPREFIX(tmp, matcher)) { + ret = 1; + break; + } + } + else { + in_matching_device = 0; + + /* expected format: <start>-<end> : <domain>:<bus>:<slot>.<function> */ + if (/* start */ + virStrToLong_ull(line, &tmp, 16, &start) < 0 || *tmp != '-' || + /* end */ + virStrToLong_ull(tmp + 1, &tmp, 16, &end) < 0 || + (tmp = STRSKIP(tmp, " : ")) == NULL || + /* domain */ + virStrToLong_ui(tmp, &tmp, 16, &domain) < 0 || *tmp != ':' || + /* bus */ + virStrToLong_ui(tmp + 1, &tmp, 16, &bus) < 0 || *tmp != ':' || + /* slot */ + virStrToLong_ui(tmp + 1, &tmp, 16, &slot) < 0 || *tmp != '.' || + /* function */ + virStrToLong_ui(tmp + 1, &tmp, 16, &function) < 0 || *tmp != '\n') + continue; + + if (domain != dev->domain || bus != dev->bus || slot != dev->slot || + function != dev->function) + continue; + in_matching_device = 1; + match_depth = strspn(line, " "); + } + } + + VIR_FORCE_FCLOSE(fp); + + return ret; +} + +static char * +pciReadDeviceID(pciDevice *dev, const char *id_name) +{ + char *path = NULL; + char *id_str; + + if (pciDeviceFile(&path, dev->name, id_name) < 0) { + return NULL; + } + + /* ID string is '0xNNNN\n' ... i.e. 7 bytes */ + if (virFileReadAll(path, 7, &id_str) < 0) { + VIR_FREE(path); + return NULL; + } + + VIR_FREE(path); + + /* Check for 0x suffix */ + if (id_str[0] != '0' || id_str[1] != 'x') { + VIR_FREE(id_str); + return NULL; + } + + /* Chop off the newline; we know the string is 7 bytes */ + id_str[6] = '\0'; + + return id_str; +} + +int +pciGetDeviceAddrString(unsigned domain, + unsigned bus, + unsigned slot, + unsigned function, + char **pciConfigAddr) +{ + pciDevice *dev = NULL; + int ret = -1; + + dev = pciGetDevice(domain, bus, slot, function); + if (dev != NULL) { + if ((*pciConfigAddr = strdup(dev->name)) == NULL) { + virReportOOMError(); + goto cleanup; + } + ret = 0; + } + +cleanup: + pciFreeDevice(dev); + return ret; +} + +pciDevice * +pciGetDevice(unsigned domain, + unsigned bus, + unsigned slot, + unsigned function) +{ + pciDevice *dev; + char *vendor = NULL; + char *product = NULL; + + if (VIR_ALLOC(dev) < 0) { + virReportOOMError(); + return NULL; + } + + dev->domain = domain; + dev->bus = bus; + dev->slot = slot; + dev->function = function; + + if (snprintf(dev->name, sizeof(dev->name), "%.4x:%.2x:%.2x.%.1x", + dev->domain, dev->bus, dev->slot, + dev->function) >= sizeof(dev->name)) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("dev->name buffer overflow: %.4x:%.2x:%.2x.%.1x"), + dev->domain, dev->bus, dev->slot, dev->function); + goto error; + } + if (virAsprintf(&dev->path, PCI_SYSFS "devices/%s/config", + dev->name) < 0) { + virReportOOMError(); + goto error; + } + + if (access(dev->path, F_OK) != 0) { + virReportSystemError(errno, + _("Device %s not found: could not access %s"), + dev->name, dev->path); + goto error; + } + + vendor = pciReadDeviceID(dev, "vendor"); + product = pciReadDeviceID(dev, "device"); + + if (!vendor || !product) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Failed to read product/vendor ID for %s"), + dev->name); + goto error; + } + + /* strings contain '0x' prefix */ + if (snprintf(dev->id, sizeof(dev->id), "%s %s", &vendor[2], + &product[2]) >= sizeof(dev->id)) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("dev->id buffer overflow: %s %s"), + &vendor[2], &product[2]); + goto error; + } + + VIR_DEBUG("%s %s: initialized", dev->id, dev->name); + +cleanup: + VIR_FREE(product); + VIR_FREE(vendor); + return dev; + +error: + pciFreeDevice(dev); + dev = NULL; + goto cleanup; +} + +void +pciFreeDevice(pciDevice *dev) +{ + if (!dev) + return; + VIR_DEBUG("%s %s: freeing", dev->id, dev->name); + VIR_FREE(dev->path); + VIR_FREE(dev); +} + +const char * +pciDeviceGetName(pciDevice *dev) +{ + return dev->name; +} + +void pciDeviceSetManaged(pciDevice *dev, unsigned managed) +{ + dev->managed = !!managed; +} + +unsigned pciDeviceGetManaged(pciDevice *dev) +{ + return dev->managed; +} + +unsigned +pciDeviceGetUnbindFromStub(pciDevice *dev) +{ + return dev->unbind_from_stub; +} + +void +pciDeviceSetUnbindFromStub(pciDevice *dev, unsigned unbind) +{ + dev->unbind_from_stub = !!unbind; +} + +unsigned +pciDeviceGetRemoveSlot(pciDevice *dev) +{ + return dev->remove_slot; +} + +void +pciDeviceSetRemoveSlot(pciDevice *dev, unsigned remove_slot) +{ + dev->remove_slot = !!remove_slot; +} + +unsigned +pciDeviceGetReprobe(pciDevice *dev) +{ + return dev->reprobe; +} + +void +pciDeviceSetReprobe(pciDevice *dev, unsigned reprobe) +{ + dev->reprobe = !!reprobe; +} + +void +pciDeviceSetUsedBy(pciDevice *dev, const char *name) +{ + dev->used_by = name; +} + +const char * +pciDeviceGetUsedBy(pciDevice *dev) +{ + return dev->used_by; +} + +void pciDeviceReAttachInit(pciDevice *pci) +{ + pci->unbind_from_stub = 1; + pci->remove_slot = 1; + pci->reprobe = 1; +} + + +pciDeviceList * +pciDeviceListNew(void) +{ + pciDeviceList *list; + + if (VIR_ALLOC(list) < 0) { + virReportOOMError(); + return NULL; + } + + return list; +} + +void +pciDeviceListFree(pciDeviceList *list) +{ + int i; + + if (!list) + return; + + for (i = 0; i < list->count; i++) { + pciFreeDevice(list->devs[i]); + list->devs[i] = NULL; + } + + list->count = 0; + VIR_FREE(list->devs); + VIR_FREE(list); +} + +int +pciDeviceListAdd(pciDeviceList *list, + pciDevice *dev) +{ + if (pciDeviceListFind(list, dev)) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Device %s is already in use"), dev->name); + return -1; + } + + if (VIR_REALLOC_N(list->devs, list->count+1) < 0) { + virReportOOMError(); + return -1; + } + + list->devs[list->count++] = dev; + + return 0; +} + +pciDevice * +pciDeviceListGet(pciDeviceList *list, + int idx) +{ + if (idx >= list->count) + return NULL; + if (idx < 0) + return NULL; + + return list->devs[idx]; +} + +int +pciDeviceListCount(pciDeviceList *list) +{ + return list->count; +} + +pciDevice * +pciDeviceListStealIndex(pciDeviceList *list, + int idx) +{ + pciDevice *ret; + + if (idx < 0 || idx >= list->count) + return NULL; + + ret = list->devs[idx]; + + if (idx != --list->count) { + memmove(&list->devs[idx], + &list->devs[idx + 1], + sizeof(*list->devs) * (list->count - idx)); + } + + if (VIR_REALLOC_N(list->devs, list->count) < 0) { + ; /* not fatal */ + } + + return ret; +} + +pciDevice * +pciDeviceListSteal(pciDeviceList *list, + pciDevice *dev) +{ + return pciDeviceListStealIndex(list, pciDeviceListFindIndex(list, dev)); +} + +void +pciDeviceListDel(pciDeviceList *list, + pciDevice *dev) +{ + pciDevice *ret = pciDeviceListSteal(list, dev); + if (ret) + pciFreeDevice(ret); +} + +int +pciDeviceListFindIndex(pciDeviceList *list, pciDevice *dev) +{ + int i; + + for (i = 0; i < list->count; i++) + if (list->devs[i]->domain == dev->domain && + list->devs[i]->bus == dev->bus && + list->devs[i]->slot == dev->slot && + list->devs[i]->function == dev->function) + return i; + return -1; +} + +pciDevice * +pciDeviceListFind(pciDeviceList *list, pciDevice *dev) +{ + int i; + + if ((i = pciDeviceListFindIndex(list, dev)) >= 0) + return list->devs[i]; + else + return NULL; +} + + +int pciDeviceFileIterate(pciDevice *dev, + pciDeviceFileActor actor, + void *opaque) +{ + char *pcidir = NULL; + char *file = NULL; + DIR *dir = NULL; + int ret = -1; + struct dirent *ent; + + if (virAsprintf(&pcidir, "/sys/bus/pci/devices/%04x:%02x:%02x.%x", + dev->domain, dev->bus, dev->slot, dev->function) < 0) { + virReportOOMError(); + goto cleanup; + } + + if (!(dir = opendir(pcidir))) { + virReportSystemError(errno, + _("cannot open %s"), pcidir); + goto cleanup; + } + + while ((ent = readdir(dir)) != NULL) { + /* Device assignment requires: + * $PCIDIR/config, $PCIDIR/resource, $PCIDIR/resourceNNN, + * $PCIDIR/rom, $PCIDIR/reset + */ + if (STREQ(ent->d_name, "config") || + STRPREFIX(ent->d_name, "resource") || + STREQ(ent->d_name, "rom") || + STREQ(ent->d_name, "reset")) { + if (virAsprintf(&file, "%s/%s", pcidir, ent->d_name) < 0) { + virReportOOMError(); + goto cleanup; + } + if ((actor)(dev, file, opaque) < 0) + goto cleanup; + + VIR_FREE(file); + } + } + + ret = 0; + +cleanup: + if (dir) + closedir(dir); + VIR_FREE(file); + VIR_FREE(pcidir); + return ret; +} + +static int +pciDeviceDownstreamLacksACS(pciDevice *dev) +{ + uint16_t flags; + uint16_t ctrl; + unsigned int pos; + int fd; + int ret = 0; + + if ((fd = pciConfigOpen(dev, true)) < 0) + return -1; + + if (pciInitDevice(dev, fd) < 0) { + ret = -1; + goto cleanup; + } + + pos = dev->pcie_cap_pos; + if (!pos || pciRead16(dev, fd, PCI_CLASS_DEVICE) != PCI_CLASS_BRIDGE_PCI) + goto cleanup; + + flags = pciRead16(dev, fd, pos + PCI_EXP_FLAGS); + if (((flags & PCI_EXP_FLAGS_TYPE) >> 4) != PCI_EXP_TYPE_DOWNSTREAM) + goto cleanup; + + pos = pciFindExtendedCapabilityOffset(dev, fd, PCI_EXT_CAP_ID_ACS); + if (!pos) { + VIR_DEBUG("%s %s: downstream port lacks ACS", dev->id, dev->name); + ret = 1; + goto cleanup; + } + + ctrl = pciRead16(dev, fd, pos + PCI_EXT_ACS_CTRL); + if ((ctrl & PCI_EXT_CAP_ACS_ENABLED) != PCI_EXT_CAP_ACS_ENABLED) { + VIR_DEBUG("%s %s: downstream port has ACS disabled", + dev->id, dev->name); + ret = 1; + goto cleanup; + } + +cleanup: + pciConfigClose(dev, fd); + return ret; +} + +static int +pciDeviceIsBehindSwitchLackingACS(pciDevice *dev) +{ + pciDevice *parent; + + if (pciGetParentDevice(dev, &parent) < 0) + return -1; + if (!parent) { + /* if we have no parent, and this is the root bus, ACS doesn't come + * into play since devices on the root bus can't P2P without going + * through the root IOMMU. + */ + if (dev->bus == 0) + return 0; + else { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Failed to find parent device for %s"), + dev->name); + return -1; + } + } + + /* XXX we should rather fail when we can't find device's parent and + * stop the loop when we get to root instead of just stopping when no + * parent can be found + */ + do { + pciDevice *tmp; + int acs; + int ret; + + acs = pciDeviceDownstreamLacksACS(parent); + + if (acs) { + pciFreeDevice(parent); + if (acs < 0) + return -1; + else + return 1; + } + + tmp = parent; + ret = pciGetParentDevice(parent, &parent); + pciFreeDevice(tmp); + if (ret < 0) + return -1; + } while (parent); + + return 0; +} + +int pciDeviceIsAssignable(pciDevice *dev, + int strict_acs_check) +{ + int ret; + + /* XXX This could be a great place to actually check that a non-managed + * device isn't in use, e.g. by checking that device is either un-bound + * or bound to a stub driver. + */ + + ret = pciDeviceIsBehindSwitchLackingACS(dev); + if (ret < 0) + return 0; + + if (ret) { + if (!strict_acs_check) { + VIR_DEBUG("%s %s: strict ACS check disabled; device assignment allowed", + dev->id, dev->name); + } else { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Device %s is behind a switch lacking ACS and " + "cannot be assigned"), + dev->name); + return 0; + } + } + + return 1; +} + +#ifdef __linux__ + +/* + * returns true if equal + */ +static bool +pciConfigAddressEqual(struct pci_config_address *bdf1, + struct pci_config_address *bdf2) +{ + return ((bdf1->domain == bdf2->domain) && + (bdf1->bus == bdf2->bus) && + (bdf1->slot == bdf2->slot) && + (bdf1->function == bdf2->function)); +} + +static int +logStrToLong_ui(char const *s, + char **end_ptr, + int base, + unsigned int *result) +{ + int ret = 0; + + ret = virStrToLong_ui(s, end_ptr, base, result); + if (ret != 0) { + VIR_ERROR(_("Failed to convert '%s' to unsigned int"), s); + } else { + VIR_DEBUG("Converted '%s' to unsigned int %u", s, *result); + } + + return ret; +} + +static int +pciParsePciConfigAddress(char *address, + struct pci_config_address *bdf) +{ + char *p = NULL; + int ret = -1; + + if ((address == NULL) || (logStrToLong_ui(address, &p, 16, + &bdf->domain) == -1)) { + goto out; + } + + if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16, + &bdf->bus) == -1)) { + goto out; + } + + if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16, + &bdf->slot) == -1)) { + goto out; + } + + if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16, + &bdf->function) == -1)) { + goto out; + } + + ret = 0; + +out: + return ret; +} + +static int +pciGetPciConfigAddressFromSysfsDeviceLink(const char *device_link, + struct pci_config_address **bdf) +{ + char *config_address = NULL; + char *device_path = NULL; + char errbuf[64]; + int ret = -1; + + VIR_DEBUG("Attempting to resolve device path from device link '%s'", + device_link); + + if (!virFileExists(device_link)) { + VIR_DEBUG("sysfs_path '%s' does not exist", device_link); + return ret; + } + + device_path = canonicalize_file_name(device_link); + if (device_path == NULL) { + memset(errbuf, '\0', sizeof(errbuf)); + virReportSystemError(errno, + _("Failed to resolve device link '%s'"), + device_link); + return ret; + } + + config_address = basename(device_path); + if (VIR_ALLOC(*bdf) != 0) { + virReportOOMError(); + goto out; + } + + if (pciParsePciConfigAddress(config_address, *bdf) != 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Failed to parse PCI config address '%s'"), + config_address); + VIR_FREE(*bdf); + goto out; + } + + VIR_DEBUG("pci_config_address %.4x:%.2x:%.2x.%.1x", + (*bdf)->domain, + (*bdf)->bus, + (*bdf)->slot, + (*bdf)->function); + + ret = 0; + +out: + VIR_FREE(device_path); + + return ret; +} + +/* + * Returns Physical function given a virtual function + */ +int +pciGetPhysicalFunction(const char *vf_sysfs_path, + struct pci_config_address **physical_function) +{ + int ret = -1; + char *device_link = NULL; + + VIR_DEBUG("Attempting to get SR IOV physical function for device " + "with sysfs path '%s'", vf_sysfs_path); + + if (virBuildPath(&device_link, vf_sysfs_path, "physfn") == -1) { + virReportOOMError(); + return ret; + } else { + ret = pciGetPciConfigAddressFromSysfsDeviceLink(device_link, + physical_function); + } + + VIR_FREE(device_link); + + return ret; +} + +/* + * Returns virtual functions of a physical function + */ +int +pciGetVirtualFunctions(const char *sysfs_path, + struct pci_config_address ***virtual_functions, + unsigned int *num_virtual_functions) +{ + int ret = -1; + DIR *dir = NULL; + struct dirent *entry = NULL; + char *device_link = NULL; + char errbuf[64]; + + VIR_DEBUG("Attempting to get SR IOV virtual functions for device" + "with sysfs path '%s'", sysfs_path); + + dir = opendir(sysfs_path); + if (dir == NULL) { + memset(errbuf, '\0', sizeof(errbuf)); + virReportSystemError(errno, + _("Failed to open dir '%s'"), + sysfs_path); + return ret; + } + + *virtual_functions = NULL; + *num_virtual_functions = 0; + while ((entry = readdir(dir))) { + if (STRPREFIX(entry->d_name, "virtfn")) { + + if (virBuildPath(&device_link, sysfs_path, entry->d_name) == -1) { + virReportOOMError(); + goto out; + } + + VIR_DEBUG("Number of virtual functions: %d", + *num_virtual_functions); + if (VIR_REALLOC_N(*virtual_functions, + (*num_virtual_functions) + 1) != 0) { + virReportOOMError(); + VIR_FREE(device_link); + goto out; + } + + if (pciGetPciConfigAddressFromSysfsDeviceLink(device_link, + &((*virtual_functions)[*num_virtual_functions])) != + SRIOV_FOUND) { + /* We should not get back SRIOV_NOT_FOUND in this + * case, so if we do, it's an error. */ + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Failed to get SR IOV function from device " + "link '%s'"), device_link); + VIR_FREE(device_link); + goto out; + } else { + (*num_virtual_functions)++; + } + VIR_FREE(device_link); + } + } + + ret = 0; + +out: + if (dir) + closedir(dir); + + return ret; +} + +/* + * Returns 1 if vf device is a virtual function, 0 if not, -1 on error + */ +int +pciDeviceIsVirtualFunction(const char *vf_sysfs_device_link) +{ + char *vf_sysfs_physfn_link = NULL; + int ret = -1; + + if (virAsprintf(&vf_sysfs_physfn_link, "%s/physfn", + vf_sysfs_device_link) < 0) { + virReportOOMError(); + return ret; + } + + ret = virFileExists(vf_sysfs_physfn_link); + + VIR_FREE(vf_sysfs_physfn_link); + + return ret; +} + +/* + * Returns the sriov virtual function index of vf given its pf + */ +int +pciGetVirtualFunctionIndex(const char *pf_sysfs_device_link, + const char *vf_sysfs_device_link, + int *vf_index) +{ + int ret = -1, i; + unsigned int num_virt_fns = 0; + struct pci_config_address *vf_bdf = NULL; + struct pci_config_address **virt_fns = NULL; + + if (pciGetPciConfigAddressFromSysfsDeviceLink(vf_sysfs_device_link, + &vf_bdf) < 0) + return ret; + + if (pciGetVirtualFunctions(pf_sysfs_device_link, &virt_fns, + &num_virt_fns) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Error getting physical function's '%s' " + "virtual_functions"), pf_sysfs_device_link); + goto out; + } + + for (i = 0; i < num_virt_fns; i++) { + if (pciConfigAddressEqual(vf_bdf, virt_fns[i])) { + *vf_index = i; + ret = 0; + break; + } + } + +out: + + /* free virtual functions */ + for (i = 0; i < num_virt_fns; i++) + VIR_FREE(virt_fns[i]); + + VIR_FREE(virt_fns); + VIR_FREE(vf_bdf); + + return ret; +} + +/* + * Returns a path to the PCI sysfs file given the BDF of the PCI function + */ + +int +pciSysfsFile(char *pciDeviceName, char **pci_sysfs_device_link) +{ + if (virAsprintf(pci_sysfs_device_link, PCI_SYSFS "devices/%s", + pciDeviceName) < 0) { + virReportOOMError(); + return -1; + } + + return 0; +} + +int +pciConfigAddressToSysfsFile(struct pci_config_address *dev, + char **pci_sysfs_device_link) +{ + if (virAsprintf(pci_sysfs_device_link, + PCI_SYSFS "devices/%04x:%02x:%02x.%x", dev->domain, + dev->bus, dev->slot, dev->function) < 0) { + virReportOOMError(); + return -1; + } + + return 0; +} + +/* + * Returns the network device name of a pci device + */ +int +pciDeviceNetName(char *device_link_sysfs_path, char **netname) +{ + char *pcidev_sysfs_net_path = NULL; + int ret = -1; + DIR *dir = NULL; + struct dirent *entry = NULL; + + if (virBuildPath(&pcidev_sysfs_net_path, device_link_sysfs_path, + "net") == -1) { + virReportOOMError(); + return -1; + } + + dir = opendir(pcidev_sysfs_net_path); + if (dir == NULL) + goto out; + + while ((entry = readdir(dir))) { + if (STREQ(entry->d_name, ".") || + STREQ(entry->d_name, "..")) + continue; + + /* Assume a single directory entry */ + *netname = strdup(entry->d_name); + if (!*netname) + virReportOOMError(); + else + ret = 0; + break; + } + + closedir(dir); + +out: + VIR_FREE(pcidev_sysfs_net_path); + + return ret; +} + +int +pciDeviceGetVirtualFunctionInfo(const char *vf_sysfs_device_path, + char **pfname, int *vf_index) +{ + struct pci_config_address *pf_config_address = NULL; + char *pf_sysfs_device_path = NULL; + int ret = -1; + + if (pciGetPhysicalFunction(vf_sysfs_device_path, &pf_config_address) < 0) + return ret; + + if (pciConfigAddressToSysfsFile(pf_config_address, + &pf_sysfs_device_path) < 0) { + + VIR_FREE(pf_config_address); + return ret; + } + + if (pciGetVirtualFunctionIndex(pf_sysfs_device_path, vf_sysfs_device_path, + vf_index) < 0) + goto cleanup; + + ret = pciDeviceNetName(pf_sysfs_device_path, pfname); + +cleanup: + VIR_FREE(pf_config_address); + VIR_FREE(pf_sysfs_device_path); + + return ret; +} + +#else +static const char *unsupported = N_("not supported on non-linux platforms"); + +int +pciGetPhysicalFunction(const char *vf_sysfs_path ATTRIBUTE_UNUSED, + struct pci_config_address **physical_function ATTRIBUTE_UNUSED) +{ + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported)); + return -1; +} + +int +pciGetVirtualFunctions(const char *sysfs_path ATTRIBUTE_UNUSED, + struct pci_config_address ***virtual_functions ATTRIBUTE_UNUSED, + unsigned int *num_virtual_functions ATTRIBUTE_UNUSED) +{ + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported)); + return -1; +} + +int +pciDeviceIsVirtualFunction(const char *vf_sysfs_device_link ATTRIBUTE_UNUSED) +{ + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported)); + return -1; +} + +int +pciGetVirtualFunctionIndex(const char *pf_sysfs_device_link ATTRIBUTE_UNUSED, + const char *vf_sysfs_device_link ATTRIBUTE_UNUSED, + int *vf_index ATTRIBUTE_UNUSED) +{ + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported)); + return -1; + +} + +int +pciConfigAddressToSysfsFile(struct pci_config_address *dev ATTRIBUTE_UNUSED, + char **pci_sysfs_device_link ATTRIBUTE_UNUSED) +{ + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported)); + return -1; +} + +int +pciDeviceNetName(char *device_link_sysfs_path ATTRIBUTE_UNUSED, + char **netname ATTRIBUTE_UNUSED) +{ + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported)); + return -1; +} + +int +pciDeviceGetVirtualFunctionInfo(const char *vf_sysfs_device_path ATTRIBUTE_UNUSED, + char **pfname ATTRIBUTE_UNUSED, + int *vf_index ATTRIBUTE_UNUSED) +{ + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported)); + return -1; +} +#endif /* __linux__ */ diff --git a/src/util/virpci.h b/src/util/virpci.h new file mode 100644 index 0000000..814c24e --- /dev/null +++ b/src/util/virpci.h @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2009, 2011-2012 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + * + * Authors: + * Mark McLoughlin <markmc@xxxxxxxxxx> + */ + +#ifndef __VIR_PCI_H__ +# define __VIR_PCI_H__ + +# include "internal.h" + +typedef struct _pciDevice pciDevice; +typedef struct _pciDeviceList pciDeviceList; + +struct pci_config_address { + unsigned int domain; + unsigned int bus; + unsigned int slot; + unsigned int function; +}; + +pciDevice *pciGetDevice (unsigned domain, + unsigned bus, + unsigned slot, + unsigned function); +void pciFreeDevice (pciDevice *dev); +const char *pciDeviceGetName (pciDevice *dev); +int pciDettachDevice (pciDevice *dev, + pciDeviceList *activeDevs, + pciDeviceList *inactiveDevs); +int pciReAttachDevice (pciDevice *dev, + pciDeviceList *activeDevs, + pciDeviceList *inactiveDevs); +int pciResetDevice (pciDevice *dev, + pciDeviceList *activeDevs, + pciDeviceList *inactiveDevs); +void pciDeviceSetManaged(pciDevice *dev, + unsigned managed); +unsigned pciDeviceGetManaged(pciDevice *dev); +void pciDeviceSetUsedBy(pciDevice *dev, + const char *used_by); +const char *pciDeviceGetUsedBy(pciDevice *dev); +unsigned pciDeviceGetUnbindFromStub(pciDevice *dev); +void pciDeviceSetUnbindFromStub(pciDevice *dev, + unsigned unbind); +unsigned pciDeviceGetRemoveSlot(pciDevice *dev); +void pciDeviceSetRemoveSlot(pciDevice *dev, + unsigned remove_slot); +unsigned pciDeviceGetReprobe(pciDevice *dev); +void pciDeviceSetReprobe(pciDevice *dev, + unsigned reprobe); +void pciDeviceReAttachInit(pciDevice *dev); + +pciDeviceList *pciDeviceListNew (void); +void pciDeviceListFree (pciDeviceList *list); +int pciDeviceListAdd (pciDeviceList *list, + pciDevice *dev); +pciDevice * pciDeviceListGet (pciDeviceList *list, + int idx); +int pciDeviceListCount (pciDeviceList *list); +pciDevice * pciDeviceListSteal (pciDeviceList *list, + pciDevice *dev); +pciDevice * pciDeviceListStealIndex(pciDeviceList *list, + int idx); +void pciDeviceListDel (pciDeviceList *list, + pciDevice *dev); +pciDevice * pciDeviceListFind (pciDeviceList *list, + pciDevice *dev); +int pciDeviceListFindIndex(pciDeviceList *list, + pciDevice *dev); + +/* + * Callback that will be invoked once for each file + * associated with / used for PCI host device access. + * + * Should return 0 if successfully processed, or + * -1 to indicate error and abort iteration + */ +typedef int (*pciDeviceFileActor)(pciDevice *dev, + const char *path, void *opaque); + +int pciDeviceFileIterate(pciDevice *dev, + pciDeviceFileActor actor, + void *opaque); + +int pciDeviceIsAssignable(pciDevice *dev, + int strict_acs_check); +int pciWaitForDeviceCleanup(pciDevice *dev, const char *matcher); + +int pciGetPhysicalFunction(const char *sysfs_path, + struct pci_config_address **phys_fn); + +int pciGetVirtualFunctions(const char *sysfs_path, + struct pci_config_address ***virtual_functions, + unsigned int *num_virtual_functions); + +int pciDeviceIsVirtualFunction(const char *vf_sysfs_device_link); + +int pciGetVirtualFunctionIndex(const char *pf_sysfs_device_link, + const char *vf_sysfs_device_link, + int *vf_index); + +int pciConfigAddressToSysfsFile(struct pci_config_address *dev, + char **pci_sysfs_device_link); + +int pciDeviceNetName(char *device_link_sysfs_path, char **netname); + +int pciSysfsFile(char *pciDeviceName, char **pci_sysfs_device_link) + ATTRIBUTE_RETURN_CHECK; + +int pciGetDeviceAddrString(unsigned domain, + unsigned bus, + unsigned slot, + unsigned function, + char **pciConfigAddr) + ATTRIBUTE_NONNULL(5) ATTRIBUTE_RETURN_CHECK; + +int pciDeviceGetVirtualFunctionInfo(const char *vf_sysfs_device_path, + char **pfname, int *vf_index); + +#endif /* __VIR_PCI_H__ */ diff --git a/src/xen/xen_driver.c b/src/xen/xen_driver.c index 94df116..9b2fcf3 100644 --- a/src/xen/xen_driver.c +++ b/src/xen/xen_driver.c @@ -57,7 +57,7 @@ #include "util.h" #include "viralloc.h" #include "node_device_conf.h" -#include "pci.h" +#include "virpci.h" #include "uuid.h" #include "fdstream.h" #include "virfile.h" -- 1.7.11.7 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list