Re: [PATCH v2 kvmtool 25/30] pci: Implement callbacks for toggling BAR emulation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 23 Jan 2020 13:48:00 +0000
Alexandru Elisei <alexandru.elisei@xxxxxxx> wrote:

Hi,

> Implement callbacks for activating and deactivating emulation for a BAR
> region. This is in preparation for allowing a guest operating system to
> enable and disable access to I/O or memory space, or to reassign the
> BARs.
> 
> The emulated vesa device has been refactored in the process and the static
> variables were removed in order to make using the callbacks less painful.
> The framebuffer isn't designed to allow stopping and restarting at
> arbitrary points in the guest execution. Furthermore, on x86, the kernel
> will not change the BAR addresses, which on bare metal are programmed by
> the firmware, so take the easy way out and refuse to deactivate emulation
> for the BAR regions.
> 
> Signed-off-by: Alexandru Elisei <alexandru.elisei@xxxxxxx>
> ---
>  hw/vesa.c         | 120 ++++++++++++++++++++++++++++++++--------------
>  include/kvm/pci.h |  19 +++++++-
>  pci.c             |  44 +++++++++++++++++
>  vfio/pci.c        | 100 +++++++++++++++++++++++++++++++-------
>  virtio/pci.c      |  90 ++++++++++++++++++++++++----------
>  5 files changed, 294 insertions(+), 79 deletions(-)
> 
> diff --git a/hw/vesa.c b/hw/vesa.c
> index e988c0425946..74ebebbefa6b 100644
> --- a/hw/vesa.c
> +++ b/hw/vesa.c
> @@ -18,6 +18,12 @@
>  #include <inttypes.h>
>  #include <unistd.h>
>  
> +struct vesa_dev {
> +	struct pci_device_header	pci_hdr;
> +	struct device_header		dev_hdr;
> +	struct framebuffer		fb;
> +};
> +
>  static bool vesa_pci_io_in(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size)
>  {
>  	return true;
> @@ -33,29 +39,52 @@ static struct ioport_operations vesa_io_ops = {
>  	.io_out			= vesa_pci_io_out,
>  };
>  
> -static struct pci_device_header vesa_pci_device = {
> -	.vendor_id		= cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET),
> -	.device_id		= cpu_to_le16(PCI_DEVICE_ID_VESA),
> -	.header_type		= PCI_HEADER_TYPE_NORMAL,
> -	.revision_id		= 0,
> -	.class[2]		= 0x03,
> -	.subsys_vendor_id	= cpu_to_le16(PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET),
> -	.subsys_id		= cpu_to_le16(PCI_SUBSYSTEM_ID_VESA),
> -	.bar[1]			= cpu_to_le32(VESA_MEM_ADDR | PCI_BASE_ADDRESS_SPACE_MEMORY),
> -	.bar_size[1]		= VESA_MEM_SIZE,
> -};
> +static int vesa__bar_activate(struct kvm *kvm,
> +			      struct pci_device_header *pci_hdr,
> +			      int bar_num, void *data)
> +{
> +	struct vesa_dev *vdev = data;
> +	u32 bar_addr, bar_size;
> +	char *mem;
> +	int r;
>  
> -static struct device_header vesa_device = {
> -	.bus_type	= DEVICE_BUS_PCI,
> -	.data		= &vesa_pci_device,
> -};
> +	bar_addr = pci__bar_address(pci_hdr, bar_num);
> +	bar_size = pci_hdr->bar_size[bar_num];
>  
> -static struct framebuffer vesafb;
> +	switch (bar_num) {
> +	case 0:
> +		r = ioport__register(kvm, bar_addr, &vesa_io_ops, bar_size,
> +				     NULL);
> +		break;
> +	case 1:
> +		mem = mmap(NULL, bar_size, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
> +		if (mem == MAP_FAILED) {
> +			r = -errno;
> +			break;
> +		}
> +		r = kvm__register_dev_mem(kvm, bar_addr, bar_size, mem);
> +		if (r < 0)
> +			break;
> +		vdev->fb.mem = mem;
> +		break;
> +	default:
> +		r = -EINVAL;
> +	}
> +
> +	return r;
> +}
> +
> +static int vesa__bar_deactivate(struct kvm *kvm,
> +				struct pci_device_header *pci_hdr,
> +				int bar_num, void *data)
> +{
> +	return -EINVAL;
> +}
>  
>  struct framebuffer *vesa__init(struct kvm *kvm)
>  {
> -	u16 vesa_base_addr;
> -	char *mem;
> +	struct vesa_dev *vdev;
> +	u16 port_addr;
>  	int r;
>  
>  	BUILD_BUG_ON(!is_power_of_two(VESA_MEM_SIZE));
> @@ -63,34 +92,51 @@ struct framebuffer *vesa__init(struct kvm *kvm)
>  
>  	if (!kvm->cfg.vnc && !kvm->cfg.sdl && !kvm->cfg.gtk)
>  		return NULL;
> -	r = pci_get_io_port_block(PCI_IO_SIZE);
> -	r = ioport__register(kvm, r, &vesa_io_ops, PCI_IO_SIZE, NULL);
> -	if (r < 0)
> -		return ERR_PTR(r);
>  
> -	vesa_base_addr			= (u16)r;
> -	vesa_pci_device.bar[0]		= cpu_to_le32(vesa_base_addr | PCI_BASE_ADDRESS_SPACE_IO);
> -	vesa_pci_device.bar_size[0]	= PCI_IO_SIZE;
> -	r = device__register(&vesa_device);
> -	if (r < 0)
> -		return ERR_PTR(r);
> +	vdev = calloc(1, sizeof(*vdev));
> +	if (vdev == NULL)
> +		return ERR_PTR(-ENOMEM);

Is it really necessary to allocate this here? You never free this, and I don't see how you could actually do this. AFAICS conceptually there can be only one VESA device? So maybe have a static variable above and use that instead of passing the pointer around? Or use &vdev if you need a pointer argument for the callbacks.

>  
> -	mem = mmap(NULL, VESA_MEM_SIZE, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
> -	if (mem == MAP_FAILED)
> -		return ERR_PTR(-errno);
> +	port_addr = pci_get_io_port_block(PCI_IO_SIZE);
>  
> -	r = kvm__register_dev_mem(kvm, VESA_MEM_ADDR, VESA_MEM_SIZE, mem);
> -	if (r < 0)
> -		return ERR_PTR(r);
> +	vdev->pci_hdr = (struct pci_device_header) {
> +		.vendor_id		= cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET),
> +		.device_id		= cpu_to_le16(PCI_DEVICE_ID_VESA),
> +		.command		= PCI_COMMAND_IO | PCI_COMMAND_MEMORY,
> +		.header_type		= PCI_HEADER_TYPE_NORMAL,
> +		.revision_id		= 0,
> +		.class[2]		= 0x03,
> +		.subsys_vendor_id	= cpu_to_le16(PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET),
> +		.subsys_id		= cpu_to_le16(PCI_SUBSYSTEM_ID_VESA),
> +		.bar[0]			= cpu_to_le32(port_addr | PCI_BASE_ADDRESS_SPACE_IO),
> +		.bar_size[0]		= PCI_IO_SIZE,
> +		.bar[1]			= cpu_to_le32(VESA_MEM_ADDR | PCI_BASE_ADDRESS_SPACE_MEMORY),
> +		.bar_size[1]		= VESA_MEM_SIZE,
> +	};
>  
> -	vesafb = (struct framebuffer) {
> +	vdev->fb = (struct framebuffer) {
>  		.width			= VESA_WIDTH,
>  		.height			= VESA_HEIGHT,
>  		.depth			= VESA_BPP,
> -		.mem			= mem,
> +		.mem			= NULL,
>  		.mem_addr		= VESA_MEM_ADDR,
>  		.mem_size		= VESA_MEM_SIZE,
>  		.kvm			= kvm,
>  	};
> -	return fb__register(&vesafb);
> +
> +	r = pci__register_bar_regions(kvm, &vdev->pci_hdr, vesa__bar_activate,
> +				      vesa__bar_deactivate, vdev);
> +	if (r < 0)
> +		return ERR_PTR(r);
> +
> +	vdev->dev_hdr = (struct device_header) {
> +		.bus_type       = DEVICE_BUS_PCI,
> +		.data           = &vdev->pci_hdr,
> +	};
> +
> +	r = device__register(&vdev->dev_hdr);
> +	if (r < 0)
> +		return ERR_PTR(r);
> +
> +	return fb__register(&vdev->fb);
>  }
> diff --git a/include/kvm/pci.h b/include/kvm/pci.h
> index 235cd82fff3c..bf42f497168f 100644
> --- a/include/kvm/pci.h
> +++ b/include/kvm/pci.h
> @@ -89,12 +89,19 @@ struct pci_cap_hdr {
>  	u8	next;
>  };
>  
> +struct pci_device_header;
> +
> +typedef int (*bar_activate_fn_t)(struct kvm *kvm,
> +				 struct pci_device_header *pci_hdr,
> +				 int bar_num, void *data);
> +typedef int (*bar_deactivate_fn_t)(struct kvm *kvm,
> +				   struct pci_device_header *pci_hdr,
> +				   int bar_num, void *data);
> +
>  #define PCI_BAR_OFFSET(b)	(offsetof(struct pci_device_header, bar[b]))
>  #define PCI_DEV_CFG_SIZE	256
>  #define PCI_DEV_CFG_MASK	(PCI_DEV_CFG_SIZE - 1)
>  
> -struct pci_device_header;
> -
>  struct pci_config_operations {
>  	void (*write)(struct kvm *kvm, struct pci_device_header *pci_hdr,
>  		      u8 offset, void *data, int sz);
> @@ -136,6 +143,9 @@ struct pci_device_header {
>  
>  	/* Private to lkvm */
>  	u32		bar_size[6];
> +	bar_activate_fn_t	bar_activate_fn;
> +	bar_deactivate_fn_t	bar_deactivate_fn;
> +	void *data;
>  	struct pci_config_operations	cfg_ops;
>  	/*
>  	 * PCI INTx# are level-triggered, but virtual device often feature
> @@ -160,8 +170,13 @@ void pci__assign_irq(struct device_header *dev_hdr);
>  void pci__config_wr(struct kvm *kvm, union pci_config_address addr, void *data, int size);
>  void pci__config_rd(struct kvm *kvm, union pci_config_address addr, void *data, int size);
>  
> +

Stray empty line?

Cheers,
Andre

>  void *pci_find_cap(struct pci_device_header *hdr, u8 cap_type);
>  
> +int pci__register_bar_regions(struct kvm *kvm, struct pci_device_header *pci_hdr,
> +			      bar_activate_fn_t bar_activate_fn,
> +			      bar_deactivate_fn_t bar_deactivate_fn, void *data);
> +
>  static inline bool __pci__memory_space_enabled(u16 command)
>  {
>  	return command & PCI_COMMAND_MEMORY;
> diff --git a/pci.c b/pci.c
> index 4f7b863298f6..5412f2defa2e 100644
> --- a/pci.c
> +++ b/pci.c
> @@ -66,6 +66,11 @@ void pci__assign_irq(struct device_header *dev_hdr)
>  		pci_hdr->irq_type = IRQ_TYPE_EDGE_RISING;
>  }
>  
> +static bool pci_bar_is_implemented(struct pci_device_header *pci_hdr, int bar_num)
> +{
> +	return  bar_num < 6 && pci_hdr->bar_size[bar_num];
> +}
> +
>  static void *pci_config_address_ptr(u16 port)
>  {
>  	unsigned long offset;
> @@ -264,6 +269,45 @@ struct pci_device_header *pci__find_dev(u8 dev_num)
>  	return hdr->data;
>  }
>  
> +int pci__register_bar_regions(struct kvm *kvm, struct pci_device_header *pci_hdr,
> +			      bar_activate_fn_t bar_activate_fn,
> +			      bar_deactivate_fn_t bar_deactivate_fn, void *data)
> +{
> +	int i, r;
> +	bool has_bar_regions = false;
> +
> +	assert(bar_activate_fn && bar_deactivate_fn);
> +
> +	pci_hdr->bar_activate_fn = bar_activate_fn;
> +	pci_hdr->bar_deactivate_fn = bar_deactivate_fn;
> +	pci_hdr->data = data;
> +
> +	for (i = 0; i < 6; i++) {
> +		if (!pci_bar_is_implemented(pci_hdr, i))
> +			continue;
> +
> +		has_bar_regions = true;
> +
> +		if (pci__bar_is_io(pci_hdr, i) &&
> +		    pci__io_space_enabled(pci_hdr)) {
> +				r = bar_activate_fn(kvm, pci_hdr, i, data);
> +				if (r < 0)
> +					return r;
> +			}
> +
> +		if (pci__bar_is_memory(pci_hdr, i) &&
> +		    pci__memory_space_enabled(pci_hdr)) {
> +				r = bar_activate_fn(kvm, pci_hdr, i, data);
> +				if (r < 0)
> +					return r;
> +			}
> +	}
> +
> +	assert(has_bar_regions);
> +
> +	return 0;
> +}
> +
>  int pci__init(struct kvm *kvm)
>  {
>  	int r;
> diff --git a/vfio/pci.c b/vfio/pci.c
> index 8a775a4a4a54..9e595562180b 100644
> --- a/vfio/pci.c
> +++ b/vfio/pci.c
> @@ -446,6 +446,83 @@ out_unlock:
>  	mutex_unlock(&pdev->msi.mutex);
>  }
>  
> +static int vfio_pci_bar_activate(struct kvm *kvm,
> +				 struct pci_device_header *pci_hdr,
> +				 int bar_num, void *data)
> +{
> +	struct vfio_device *vdev = data;
> +	struct vfio_pci_device *pdev = &vdev->pci;
> +	struct vfio_pci_msix_pba *pba = &pdev->msix_pba;
> +	struct vfio_pci_msix_table *table = &pdev->msix_table;
> +	struct vfio_region *region = &vdev->regions[bar_num];
> +	int ret;
> +
> +	if (!region->info.size) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	if ((pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) &&
> +	    (u32)bar_num == table->bar) {
> +		ret = kvm__register_mmio(kvm, table->guest_phys_addr,
> +					 table->size, false,
> +					 vfio_pci_msix_table_access, pdev);
> +		if (ret < 0 || table->bar!= pba->bar)
> +			goto out;
> +	}
> +
> +	if ((pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) &&
> +	    (u32)bar_num == pba->bar) {
> +		ret = kvm__register_mmio(kvm, pba->guest_phys_addr,
> +					 pba->size, false,
> +					 vfio_pci_msix_pba_access, pdev);
> +		goto out;
> +	}
> +
> +	ret = vfio_map_region(kvm, vdev, region);
> +out:
> +	return ret;
> +}
> +
> +static int vfio_pci_bar_deactivate(struct kvm *kvm,
> +				   struct pci_device_header *pci_hdr,
> +				   int bar_num, void *data)
> +{
> +	struct vfio_device *vdev = data;
> +	struct vfio_pci_device *pdev = &vdev->pci;
> +	struct vfio_pci_msix_pba *pba = &pdev->msix_pba;
> +	struct vfio_pci_msix_table *table = &pdev->msix_table;
> +	struct vfio_region *region = &vdev->regions[bar_num];
> +	int ret;
> +	bool success;
> +
> +	if (!region->info.size) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	if ((pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) &&
> +	    (u32)bar_num == table->bar) {
> +		success = kvm__deregister_mmio(kvm, table->guest_phys_addr);
> +		ret = (success ? 0 : -EINVAL);
> +		if (ret < 0 || table->bar!= pba->bar)
> +			goto out;
> +	}
> +
> +	if ((pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) &&
> +	    (u32)bar_num == pba->bar) {
> +		success = kvm__deregister_mmio(kvm, pba->guest_phys_addr);
> +		ret = (success ? 0 : -EINVAL);
> +		goto out;
> +	}
> +
> +	vfio_unmap_region(kvm, region);
> +	ret = 0;
> +
> +out:
> +	return ret;
> +}
> +
>  static void vfio_pci_cfg_read(struct kvm *kvm, struct pci_device_header *pci_hdr,
>  			      u8 offset, void *data, int sz)
>  {
> @@ -804,12 +881,6 @@ static int vfio_pci_create_msix_table(struct kvm *kvm, struct vfio_device *vdev)
>  		ret = -ENOMEM;
>  		goto out_free;
>  	}
> -	pba->guest_phys_addr = table->guest_phys_addr + table->size;
> -
> -	ret = kvm__register_mmio(kvm, table->guest_phys_addr, table->size,
> -				 false, vfio_pci_msix_table_access, pdev);
> -	if (ret < 0)
> -		goto out_free;
>  
>  	/*
>  	 * We could map the physical PBA directly into the guest, but it's
> @@ -819,10 +890,7 @@ static int vfio_pci_create_msix_table(struct kvm *kvm, struct vfio_device *vdev)
>  	 * between MSI-X table and PBA. For the sake of isolation, create a
>  	 * virtual PBA.
>  	 */
> -	ret = kvm__register_mmio(kvm, pba->guest_phys_addr, pba->size, false,
> -				 vfio_pci_msix_pba_access, pdev);
> -	if (ret < 0)
> -		goto out_free;
> +	pba->guest_phys_addr = table->guest_phys_addr + table->size;
>  
>  	pdev->msix.entries = entries;
>  	pdev->msix.nr_entries = nr_entries;
> @@ -893,11 +961,6 @@ static int vfio_pci_configure_bar(struct kvm *kvm, struct vfio_device *vdev,
>  		region->guest_phys_addr = pci_get_mmio_block(map_size);
>  	}
>  
> -	/* Map the BARs into the guest or setup a trap region. */
> -	ret = vfio_map_region(kvm, vdev, region);
> -	if (ret)
> -		return ret;
> -
>  	return 0;
>  }
>  
> @@ -944,7 +1007,12 @@ static int vfio_pci_configure_dev_regions(struct kvm *kvm,
>  	}
>  
>  	/* We've configured the BARs, fake up a Configuration Space */
> -	return vfio_pci_fixup_cfg_space(vdev);
> +	ret = vfio_pci_fixup_cfg_space(vdev);
> +	if (ret)
> +		return ret;
> +
> +	return pci__register_bar_regions(kvm, &pdev->hdr, vfio_pci_bar_activate,
> +					 vfio_pci_bar_deactivate, vdev);
>  }
>  
>  /*
> diff --git a/virtio/pci.c b/virtio/pci.c
> index c4822514856c..5a3cc6f1e943 100644
> --- a/virtio/pci.c
> +++ b/virtio/pci.c
> @@ -474,6 +474,65 @@ static void virtio_pci__io_mmio_callback(struct kvm_cpu *vcpu,
>  		virtio_pci__data_out(vcpu, vdev, addr - mmio_addr, data, len);
>  }
>  
> +static int virtio_pci__bar_activate(struct kvm *kvm,
> +				    struct pci_device_header *pci_hdr,
> +				    int bar_num, void *data)
> +{
> +	struct virtio_device *vdev = data;
> +	u32 bar_addr, bar_size;
> +	int r;
> +
> +	bar_addr = pci__bar_address(pci_hdr, bar_num);
> +	bar_size = pci_hdr->bar_size[bar_num];
> +
> +	switch (bar_num) {
> +	case 0:
> +		r = ioport__register(kvm, bar_addr, &virtio_pci__io_ops,
> +				     bar_size, vdev);
> +		if (r > 0)
> +			r = 0;
> +		break;
> +	case 1:
> +		r =  kvm__register_mmio(kvm, bar_addr, bar_size, false,
> +					virtio_pci__io_mmio_callback, vdev);
> +		break;
> +	case 2:
> +		r =  kvm__register_mmio(kvm, bar_addr, bar_size, false,
> +					virtio_pci__msix_mmio_callback, vdev);
> +		break;
> +	default:
> +		r = -EINVAL;
> +	}
> +
> +	return r;
> +}
> +
> +static int virtio_pci__bar_deactivate(struct kvm *kvm,
> +				      struct pci_device_header *pci_hdr,
> +				      int bar_num, void *data)
> +{
> +	u32 bar_addr;
> +	bool success;
> +	int r;
> +
> +	bar_addr = pci__bar_address(pci_hdr, bar_num);
> +
> +	switch (bar_num) {
> +	case 0:
> +		r = ioport__unregister(kvm, bar_addr);
> +		break;
> +	case 1:
> +	case 2:
> +		success = kvm__deregister_mmio(kvm, bar_addr);
> +		r = (success ? 0 : -EINVAL);
> +		break;
> +	default:
> +		r = -EINVAL;
> +	}
> +
> +	return r;
> +}
> +
>  int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
>  		     int device_id, int subsys_id, int class)
>  {
> @@ -488,23 +547,8 @@ int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
>  	BUILD_BUG_ON(!is_power_of_two(PCI_IO_SIZE));
>  
>  	port_addr = pci_get_io_port_block(PCI_IO_SIZE);
> -	r = ioport__register(kvm, port_addr, &virtio_pci__io_ops, PCI_IO_SIZE,
> -			     vdev);
> -	if (r < 0)
> -		return r;
> -	port_addr = (u16)r;
> -
>  	mmio_addr = pci_get_mmio_block(PCI_IO_SIZE);
> -	r = kvm__register_mmio(kvm, mmio_addr, PCI_IO_SIZE, false,
> -			       virtio_pci__io_mmio_callback, vdev);
> -	if (r < 0)
> -		goto free_ioport;
> -
>  	msix_io_block = pci_get_mmio_block(PCI_IO_SIZE * 2);
> -	r = kvm__register_mmio(kvm, msix_io_block, PCI_IO_SIZE * 2, false,
> -			       virtio_pci__msix_mmio_callback, vdev);
> -	if (r < 0)
> -		goto free_mmio;
>  
>  	vpci->pci_hdr = (struct pci_device_header) {
>  		.vendor_id		= cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET),
> @@ -530,6 +574,12 @@ int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
>  		.bar_size[2]		= cpu_to_le32(PCI_IO_SIZE*2),
>  	};
>  
> +	r = pci__register_bar_regions(kvm, &vpci->pci_hdr,
> +				      virtio_pci__bar_activate,
> +				      virtio_pci__bar_deactivate, vdev);
> +	if (r < 0)
> +		return r;
> +
>  	vpci->dev_hdr = (struct device_header) {
>  		.bus_type		= DEVICE_BUS_PCI,
>  		.data			= &vpci->pci_hdr,
> @@ -560,20 +610,12 @@ int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
>  
>  	r = device__register(&vpci->dev_hdr);
>  	if (r < 0)
> -		goto free_msix_mmio;
> +		return r;
>  
>  	/* save the IRQ that device__register() has allocated */
>  	vpci->legacy_irq_line = vpci->pci_hdr.irq_line;
>  
>  	return 0;
> -
> -free_msix_mmio:
> -	kvm__deregister_mmio(kvm, msix_io_block);
> -free_mmio:
> -	kvm__deregister_mmio(kvm, mmio_addr);
> -free_ioport:
> -	ioport__unregister(kvm, port_addr);
> -	return r;
>  }
>  
>  int virtio_pci__reset(struct kvm *kvm, struct virtio_device *vdev)




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux