Add support to PCI device assignment on the kvm test. It supports both SR-IOV virtual functions and physical NIC card device assignment. Single Root I/O Virtualization (SR-IOV) allows a single PCI device to be shared amongst multiple virtual machines while retaining the performance benefit of assigning a PCI device to a virtual machine. A common example is where a single SR-IOV capable NIC - with perhaps only a single physical network port - might be shared with multiple virtual machines by assigning a virtual function to each VM. SR-IOV support is implemented in the kernel. The core implementation is contained in the PCI subsystem, but there must also be driver support for both the Physical Function (PF) and Virtual Function (VF) devices. With an SR-IOV capable device one can allocate VFs from a PF. The VFs surface as PCI devices which are backed on the physical PCI device by resources (queues, and register sets). Device support: In 2.6.30, the Intel® 82576 Gigabit Ethernet Controller is the only SR-IOV capable device supported. The igb driver has PF support and the igbvf has VF support. In 2.6.31 the Neterion® X3100™ is supported as well. This device uses the same vxge driver for the PF as well as the VFs. In order to configure the test: * For SR-IOV virtual functions passthrough, we could specify the module parameter 'max_vfs' in config file. * For physical NIC card pass through, we should specify the device name(s). 3rd try: The patch was heavily modified from the first 2 attempts: * Naming is consistent with "PCI assignment" instead of "PCI passthrough", as it's a more correct term. * No more device database file, as all information about devices is stored on an attribute of the VM class (an instance of the PciAssignable class), so we don't have to bother dumping this info to a file. * Code simplified to avoid duplication As it's a fairly involved feature, the more reviews we get the better. Signed-off-by: Yolkfull Chow <yzhou@xxxxxxxxxx> Signed-off-by: Lucas Meneghel Rodrigues <lmr@xxxxxxxxxx> --- client/tests/kvm/kvm_tests.cfg.sample | 20 +++- client/tests/kvm/kvm_utils.py | 278 +++++++++++++++++++++++++++++++++ client/tests/kvm/kvm_vm.py | 59 +++++++ 3 files changed, 356 insertions(+), 1 deletions(-) diff --git a/client/tests/kvm/kvm_tests.cfg.sample b/client/tests/kvm/kvm_tests.cfg.sample index feffb8d..be60399 100644 --- a/client/tests/kvm/kvm_tests.cfg.sample +++ b/client/tests/kvm/kvm_tests.cfg.sample @@ -844,13 +844,31 @@ variants: only default image_format = raw - variants: - @smallpages: - hugepages: pre_command = "/usr/bin/python scripts/hugepage.py /mnt/kvm_hugepage" extra_params += " -mem-path /mnt/kvm_hugepage" +variants: + - @no_pci_assignable: + pci_assignable = no + - pf_assignable: + pci_assignable = pf + device_names = eth1 + - vf_assignable: + pci_assignable = vf + # Driver (kernel module) that supports SR-IOV hardware. + # As of today (30-11-2009), we have 2 drivers for this type of hardware: + # Intel® 82576 Gigabit Ethernet Controller - igb + # Neterion® X3100™ - vxge + driver = igb + # Driver option to specify the number of virtual functions + # (on vxge the option is , for example, is max_config_dev) + # the default below is for the igb driver + driver_option = max_vfs + # Number of devices that are going to be requested. + devices_requested = 7 variants: - @basic: diff --git a/client/tests/kvm/kvm_utils.py b/client/tests/kvm/kvm_utils.py index bf25900..fc04745 100644 --- a/client/tests/kvm/kvm_utils.py +++ b/client/tests/kvm/kvm_utils.py @@ -874,3 +874,281 @@ def unmap_url_cache(cachedir, url, expected_hash, method="md5"): file_path = utils.unmap_url(cachedir, src, cachedir) return file_path + + +def get_full_pci_id(pci_id): + """ + Get full PCI ID of pci_id. + + @param pci_id: PCI ID of a device. + """ + cmd = "lspci -D | awk '/%s/ {print $1}'" % pci_id + status, full_id = commands.getstatusoutput(cmd) + if status != 0: + return None + return full_id + + +def get_vendor_from_pci_id(pci_id): + """ + Check out the device vendor ID according to pci_id. + + @param pci_id: PCI ID of a device. + """ + cmd = "lspci -n | awk '/%s/ {print $3}'" % pci_id + return re.sub(":", " ", commands.getoutput(cmd)) + + +class PciAssignable(object): + """ + Request PCI assignable devices on host. It will check whether to request + PF (physical Functions) or VF (Virtual Functions). + """ + def __init__(self, type="nic_vf", driver=None, driver_option=None, + names=None, devices_requested=None): + """ + Initialize parameter 'type' which could be: + nic_vf: Virtual Functions + nic_pf: Physical Function (actual hardware) + mixed: Both includes VFs and PFs + + If pass through Physical NIC cards, we need to specify which devices + to be assigned, e.g. 'eth1 eth2'. + + If pass through Virtual Functions, we need to specify how many vfs + are going to be assigned, e.g. passthrough_count = 8 and max_vfs in + config file. + + @param type: PCI device type. + @param driver: Kernel module for the PCI assignable device. + @param driver_option: Module option to specify the number of VFs needed. + @param names: Physical NIC cards correspondent network interfaces, + e.g.'eth1 eth2 ...' + """ + self.type = type + self.driver = driver + self.driver_option = "%s=%s" % (driver_option, devices_requested) + if names: + self.name_list = names.split() + if devices_requested: + self.devices_requested = int(devices_requested) + + + def _get_pf_pci_id(self, name, search_str): + """ + Get the PF PCI ID according to name. + + @param name: Name of the PCI device. + @param search_str: Search string to be used on lspci. + """ + cmd = "ethtool -i %s | awk '/bus-info/ {print $2}'" % name + s, pci_id = commands.getstatusoutput(cmd) + if not (s or "Cannot get driver information" in pci_id): + return pci_id[5:] + cmd = "lspci | awk '/%s/ {print $1}'" % search_str + pci_ids = [id for id in commands.getoutput(cmd).splitlines()] + nic_id = int(re.search('[0-9]+', name).group(0)) + if (len(pci_ids) - 1) < nic_id: + return None + return pci_ids[nic_id] + + + def _release_dev(self, pci_id): + """ + Release a single PCI device. + + @param pci_id: PCI ID of a given PCI device. + """ + base_dir = "/sys/bus/pci" + full_id = get_full_pci_id(pci_id) + vendor_id = get_vendor_from_pci_id(pci_id) + drv_path = os.path.join(base_dir, "devices/%s/driver" % full_id) + if 'pci-stub' in os.readlink(drv_path): + cmd = "echo '%s' > %s/new_id" % (vendor_id, drv_path) + if os.system(cmd): + return False + + stub_path = os.path.join(base_dir, "drivers/pci-stub") + cmd = "echo '%s' > %s/unbind" % (full_id, stub_path) + if os.system(cmd): + return False + + driver = self.dev_drivers[pci_id] + cmd = "echo '%s' > %s/bind" % (full_id, driver) + if os.system(cmd): + return False + + return True + + + def get_vf_devs(self): + """ + Catch all VFs PCI IDs. + + @return: List with all PCI IDs for the Virtual Functions avaliable + """ + if not self.sr_iov_setup(): + return [] + + cmd = "lspci | awk '/Virtual Function/ {print $1}'" + return commands.getoutput(cmd).split() + + + def get_pf_devs(self): + """ + Catch all PFs PCI IDs. + + @return: List with all PCI IDs for the physical hardware requested + """ + pf_ids = [] + for name in self.name_list: + pf_id = self._get_pf_pci_id(name, "Ethernet") + if not pf_id: + continue + pf_ids.append(pf_id) + return pf_ids + + + def get_devs(self, count): + """ + Check out all devices' PCI IDs according to their name. + + @param count: count number of PCI devices needed for pass through + @return: a list of all devices' PCI IDs + """ + if self.type == "nic_vf": + vf_ids = self.get_vf_devs() + elif self.type == "nic_pf": + vf_ids = self.get_pf_devs() + elif self.type == "mixed": + vf_ids = self.get_vf_devs() + vf_ids.extend(self.get_pf_devs()) + return vf_ids[0:count] + + + def get_vfs_count(self): + """ + Get VFs count number according to lspci. + """ + cmd = "lspci | grep 'Virtual Function' | wc -l" + # For each VF we'll see 2 prints of 'Virtual Function', so let's + # divide the result per 2 + return int(commands.getoutput(cmd)) / 2 + + + def check_vfs_count(self): + """ + Check VFs count number according to the parameter driver_options. + """ + return (self.get_vfs_count == self.devices_requested) + + + def is_binded_to_stub(self, full_id): + """ + Verify whether the device with full_id is already binded to pci-stub. + """ + base_dir = "/sys/bus/pci" + stub_path = os.path.join(base_dir, "drivers/pci-stub") + if os.path.exists(os.path.join(stub_path, full_id)): + return True + return False + + + def sr_iov_setup(self): + """ + Ensure the PCI device is working in sr_iov mode. + + Check if the PCI hardware device drive is loaded with the appropriate, + parameters (number of VFs), and if it's not, perform setup. + + @return: True, if the setup was completed successfuly, False otherwise. + """ + re_probe = False + s, o = commands.getstatusoutput('lsmod | grep %s' % self.driver) + if s: + re_probe = True + elif not self.check_vfs_count(): + os.system("modprobe -r %s" % self.driver) + re_probe = True + + # Re-probe driver with proper number of VFs + if re_probe: + cmd = "modprobe %s %s" % (self.driver, self.driver_option) + s, o = commands.getstatusoutput(cmd) + if s: + return False + if not self.check_vfs_count(): + return False + return True + + + def request_devs(self): + """ + Implement setup process: unbind the PCI device and then bind it + to the pci-stub driver. + + @return: a list of successfully requested devices' PCI IDs. + """ + base_dir = "/sys/bus/pci" + stub_path = os.path.join(base_dir, "drivers/pci-stub") + + self.pci_ids = self.get_devs(self.devices_requested) + logging.debug("The following pci_ids were found: %s" % self.pci_ids) + requested_pci_ids = [] + self.dev_drivers = {} + + # Setup all devices specified for assignment to guest + for pci_id in self.pci_ids: + full_id = get_full_pci_id(pci_id) + if not full_id: + continue + drv_path = os.path.join(base_dir, "devices/%s/driver" % full_id) + dev_prev_driver= os.path.realpath(os.path.join(drv_path, + os.readlink(drv_path))) + self.dev_drivers[pci_id] = dev_prev_driver + + # Judge whether the device driver has been binded to stub + if not self.is_binded_to_stub(full_id): + logging.debug("Binding device %s to stub" % full_id) + vendor_id = get_vendor_from_pci_id(pci_id) + stub_new_id = os.path.join(stub_path, 'new_id') + unbind_dev = os.path.join(drv_path, 'unbind') + stub_bind = os.path.join(stub_path, 'bind') + + info_write_to_files = [(vendor_id, stub_new_id), + (full_id, unbind_dev), + (full_id, stub_bind)] + + for content, file in info_write_to_files: + try: + utils.open_write_close(content, file) + except IOError: + logging.debug("Failed to write %s to file %s" % + (content, file)) + continue + + if not self.is_binded_to_stub(full_id): + logging.error("Binding device %s to stub failed" % + pci_id) + continue + else: + logging.debug("Device %s already binded to stub" % pci_id) + requested_pci_ids.append(pci_id) + self.pci_ids = requested_pci_ids + return self.pci_ids + + + def release_devs(self): + """ + Release all PCI devices currently assigned to VMs back to the + virtualization host. + """ + try: + for pci_id in self.dev_drivers: + if not self._release_dev(pci_id): + logging.error("Failed to release device %s to host" % + pci_id) + else: + logging.info("Released device %s successfully" % pci_id) + except: + return diff --git a/client/tests/kvm/kvm_vm.py b/client/tests/kvm/kvm_vm.py index 100b567..045e3ed 100755 --- a/client/tests/kvm/kvm_vm.py +++ b/client/tests/kvm/kvm_vm.py @@ -304,6 +304,12 @@ class VM: elif params.get("uuid"): qemu_cmd += " -uuid %s" % params.get("uuid") + # If the PCI assignment step went OK, add each one of the PCI assigned + # devices to the qemu command line. + if self.pci_assignable: + for pci_id in self.pa_pci_ids: + qemu_cmd += " -pcidevice host=%s" % pci_id + return qemu_cmd @@ -392,6 +398,50 @@ class VM: self.uuid = f.read().strip() f.close() + if not params.get("pci_assignable") == "no": + pa_type = params.get("pci_assignable") + pa_devices_requested = params.get("devices_requested") + + # Virtual Functions (VF) assignable devices + if pa_type == "vf": + pa_driver = params.get("driver") + pa_driver_option = params.get("driver_option") + self.pci_assignable = kvm_utils.PciAssignable(type=pa_type, + driver=pa_driver, + driver_option=pa_driver_option, + devices_requested=pa_devices_requested) + # Physical NIC (PF) assignable devices + elif pa_type == "pf": + pa_device_names = params.get("device_names") + self.pci_assignable = kvm_utils.PciAssignable(type=pa_type, + names=pa_device_names, + devices_requested=pa_devices_requested) + # Working with both VF and PF + elif pa_type == "mixed": + pa_device_names = params.get("device_names") + pa_driver = params.get("driver") + pa_driver_option = params.get("driver_option") + self.pci_assignable = kvm_utils.PciAssignable(type=pa_type, + driver=pa_driver, + driver_option=pa_driver_option, + names=pa_device_names, + devices_requested=pa_devices_requested) + + self.pa_pci_ids = self.pci_assignable.request_devs() + + if self.pa_pci_ids: + logging.debug("Successfuly assigned devices: %s" % + self.pa_pci_ids) + else: + logging.error("No PCI assignable devices were assigned " + "and 'pci_assignable' is defined to %s " + "on your config file. Aborting VM creation." % + pa_type) + return False + + else: + self.pci_assignable = None + # Make qemu command qemu_command = self.make_qemu_command() @@ -537,6 +587,8 @@ class VM: # Is it already dead? if self.is_dead(): logging.debug("VM is already down") + if self.pci_assignable: + self.pci_assignable.release_devs() return logging.debug("Destroying VM with PID %d..." % @@ -557,6 +609,9 @@ class VM: return finally: session.close() + if self.pci_assignable: + self.pci_assignable.release_devs() + # Try to destroy with a monitor command logging.debug("Trying to kill VM with monitor command...") @@ -566,6 +621,8 @@ class VM: # Wait for the VM to be really dead if kvm_utils.wait_for(self.is_dead, 5, 0.5, 0.5): logging.debug("VM is down") + if self.pci_assignable: + self.pci_assignable.release_devs() return # If the VM isn't dead yet... @@ -575,6 +632,8 @@ class VM: # Wait for the VM to be really dead if kvm_utils.wait_for(self.is_dead, 5, 0.5, 0.5): logging.debug("VM is down") + if self.pci_assignable: + self.pci_assignable.release_devs() return logging.error("Process %s is a zombie!" % self.process.get_pid()) -- 1.6.5.2 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html