Re: [PATCH v3 kvmtool 32/32] arm/arm64: Add PCI Express 1.1 support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 06/05/2020 14:51, Alexandru Elisei wrote:

Hi,

> On 4/6/20 3:06 PM, André Przywara wrote:
>> On 26/03/2020 15:24, Alexandru Elisei wrote:
>>> PCI Express comes with an extended addressing scheme, which directly
>>> translated into a bigger device configuration space (256->4096 bytes)
>>> and bigger PCI configuration space (16->256 MB), as well as mandatory
>>> capabilities (power management [1] and PCI Express capability [2]).
>>>
>>> However, our virtio PCI implementation implements version 0.9 of the
>>> protocol and it still uses transitional PCI device ID's, so we have
>>> opted to omit the mandatory PCI Express capabilities.For VFIO, the power
>>> management and PCI Express capability are left for a subsequent patch.
>>>
>>> [1] PCI Express Base Specification Revision 1.1, section 7.6
>>> [2] PCI Express Base Specification Revision 1.1, section 7.8
>>>
>>> Signed-off-by: Alexandru Elisei <alexandru.elisei@xxxxxxx>
>>> ---
>>>  arm/include/arm-common/kvm-arch.h |  4 +-
>>>  arm/pci.c                         |  2 +-
>>>  builtin-run.c                     |  1 +
>>>  include/kvm/kvm-config.h          |  2 +-
>>>  include/kvm/pci.h                 | 76 ++++++++++++++++++++++++++++---
>>>  pci.c                             |  5 +-
>>>  vfio/pci.c                        | 26 +++++++----
>>>  7 files changed, 96 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/arm/include/arm-common/kvm-arch.h b/arm/include/arm-common/kvm-arch.h
>>> index b9d486d5eac2..13c55fa3dc29 100644
>>> --- a/arm/include/arm-common/kvm-arch.h
>>> +++ b/arm/include/arm-common/kvm-arch.h
>>> @@ -23,7 +23,7 @@
>>>  
>>>  #define ARM_IOPORT_SIZE		(ARM_MMIO_AREA - ARM_IOPORT_AREA)
>>>  #define ARM_VIRTIO_MMIO_SIZE	(ARM_AXI_AREA - (ARM_MMIO_AREA + ARM_GIC_SIZE))
>>> -#define ARM_PCI_CFG_SIZE	(1ULL << 24)
>>> +#define ARM_PCI_CFG_SIZE	(1ULL << 28)
>> The existence of this symbol seems somewhat odd, there should be no ARM
>> specific version of the config space size.
> 
> The existence of the symbol is required because at the moment PCI Express support
> is not available for all architecture, and this file needs to be included in
> pci.h, not the other way around. I agree it's not ideal, but that's the way it is
> right now.
> 
>> Do you know why we don't use the generic PCI_CFG_SIZE here?
>> At the very least I would expect something like:
>> #define ARM_PCI_CFG_SIZE	PCI_EXP_CFG_SIZE
> 
> We don't use PCI_EXP_CFG_SIZE because it might not be available for all
> architectures. It is possible it's not there on x86, like the error you have
> encountered showed, so instead of waiting for someone to report a compilation
> failure, I would rather use a number from the start.

Ah, OK, fair enough then.
I just wanted to avoid the impression that this is something
architecture specific.

>>
>>>  #define ARM_PCI_MMIO_SIZE	(ARM_MEMORY_AREA - \
>>>  				(ARM_AXI_AREA + ARM_PCI_CFG_SIZE))
>>>  
>>> @@ -50,6 +50,8 @@
>>>  
>>>  #define VIRTIO_RING_ENDIAN	(VIRTIO_ENDIAN_LE | VIRTIO_ENDIAN_BE)
>>>  
>>> +#define ARCH_HAS_PCI_EXP	1
>>> +
>>>  static inline bool arm_addr_in_ioport_region(u64 phys_addr)
>>>  {
>>>  	u64 limit = KVM_IOPORT_AREA + ARM_IOPORT_SIZE;
>>> diff --git a/arm/pci.c b/arm/pci.c
>>> index ed325fa4a811..2251f627d8b5 100644
>>> --- a/arm/pci.c
>>> +++ b/arm/pci.c
>>> @@ -62,7 +62,7 @@ void pci__generate_fdt_nodes(void *fdt)
>>>  	_FDT(fdt_property_cell(fdt, "#address-cells", 0x3));
>>>  	_FDT(fdt_property_cell(fdt, "#size-cells", 0x2));
>>>  	_FDT(fdt_property_cell(fdt, "#interrupt-cells", 0x1));
>>> -	_FDT(fdt_property_string(fdt, "compatible", "pci-host-cam-generic"));
>>> +	_FDT(fdt_property_string(fdt, "compatible", "pci-host-ecam-generic"));
>>>  	_FDT(fdt_property(fdt, "dma-coherent", NULL, 0));
>>>  
>>>  	_FDT(fdt_property(fdt, "bus-range", bus_range, sizeof(bus_range)));
>>> diff --git a/builtin-run.c b/builtin-run.c
>>> index 9cb8c75300eb..def8a1f803ad 100644
>>> --- a/builtin-run.c
>>> +++ b/builtin-run.c
>>> @@ -27,6 +27,7 @@
>>>  #include "kvm/irq.h"
>>>  #include "kvm/kvm.h"
>>>  #include "kvm/pci.h"
>>> +#include "kvm/vfio.h"
>>>  #include "kvm/rtc.h"
>>>  #include "kvm/sdl.h"
>>>  #include "kvm/vnc.h"
>>> diff --git a/include/kvm/kvm-config.h b/include/kvm/kvm-config.h
>>> index a052b0bc7582..a1012c57b7a7 100644
>>> --- a/include/kvm/kvm-config.h
>>> +++ b/include/kvm/kvm-config.h
>>> @@ -2,7 +2,6 @@
>>>  #define KVM_CONFIG_H_
>>>  
>>>  #include "kvm/disk-image.h"
>>> -#include "kvm/vfio.h"
>>>  #include "kvm/kvm-config-arch.h"
>>>  
>>>  #define DEFAULT_KVM_DEV		"/dev/kvm"
>>> @@ -18,6 +17,7 @@
>>>  #define MIN_RAM_SIZE_MB		(64ULL)
>>>  #define MIN_RAM_SIZE_BYTE	(MIN_RAM_SIZE_MB << MB_SHIFT)
>>>  
>>> +struct vfio_device_params;
>>>  struct kvm_config {
>>>  	struct kvm_config_arch arch;
>>>  	struct disk_image_params disk_image[MAX_DISK_IMAGES];
>>> diff --git a/include/kvm/pci.h b/include/kvm/pci.h
>>> index be75f77fd2cb..71ee9d8cb01f 100644
>>> --- a/include/kvm/pci.h
>>> +++ b/include/kvm/pci.h
>>> @@ -10,6 +10,7 @@
>>>  #include "kvm/devices.h"
>>>  #include "kvm/msi.h"
>>>  #include "kvm/fdt.h"
>>> +#include "kvm.h"
>>>  
>>>  #define pci_dev_err(pci_hdr, fmt, ...) \
>>>  	pr_err("[%04x:%04x] " fmt, pci_hdr->vendor_id, pci_hdr->device_id, ##__VA_ARGS__)
>>> @@ -32,9 +33,41 @@
>>>  #define PCI_CONFIG_BUS_FORWARD	0xcfa
>>>  #define PCI_IO_SIZE		0x100
>>>  #define PCI_IOPORT_START	0x6200
>>> -#define PCI_CFG_SIZE		(1ULL << 24)
>>>  
>>> -struct kvm;
>>> +#define PCIE_CAP_REG_VER	0x1
>>> +#define PCIE_CAP_REG_DEV_LEGACY	(1 << 4)
>>> +#define PM_CAP_VER		0x3
>>> +
>>> +#ifdef ARCH_HAS_PCI_EXP
>>> +#define PCI_CFG_SIZE		(1ULL << 28)
>>> +#define PCI_DEV_CFG_SIZE	4096
>> Maybe use PCI_CFG_SPACE_EXP_SIZE from pci_regs.h?
> 
> I cannot do that because it's not available on all distros. I used
> PCI_CFG_SPACE_EXP_SIZE in the previous iteration, but I changed it when Sami
> reported a compilation failure on his particular setup (it's mentioned in the
> changelog).

OK.

>>
>>> +
>>> +union pci_config_address {
>>> +	struct {
>>> +#if __BYTE_ORDER == __LITTLE_ENDIAN
>>> +		unsigned	reg_offset	: 2;		/* 1  .. 0  */
>>> +		unsigned	register_number	: 10;		/* 11 .. 2  */
>>> +		unsigned	function_number	: 3;		/* 14 .. 12 */
>>> +		unsigned	device_number	: 5;		/* 19 .. 15 */
>>> +		unsigned	bus_number	: 8;		/* 27 .. 20 */
>>> +		unsigned	reserved	: 3;		/* 30 .. 28 */
>>> +		unsigned	enable_bit	: 1;		/* 31       */
>>> +#else
>>> +		unsigned	enable_bit	: 1;		/* 31       */
>>> +		unsigned	reserved	: 3;		/* 30 .. 28 */
>>> +		unsigned	bus_number	: 8;		/* 27 .. 20 */
>>> +		unsigned	device_number	: 5;		/* 19 .. 15 */
>>> +		unsigned	function_number	: 3;		/* 14 .. 12 */
>>> +		unsigned	register_number	: 10;		/* 11 .. 2  */
>>> +		unsigned	reg_offset	: 2;		/* 1  .. 0  */
>>> +#endif
>> Just for the records:
>> I think we agreed on this before, but using a C bitfield to model
>> hardware defined bits is broken, because the C standard doesn't
>> guarantee those bits to be consecutive and layed out like we hope it would.
>> But since we have this issue already with the legacy config space, and
>> it seems to work (TM), we can fix this later.
> 
> Still on the record, it's broken for big endian because only the byte order is
> different, not the individual bits. But there are other things that are broken for
> big endian, so not a big deal.
> 
>>
>>> +	};
>>> +	u32 w;
>>> +};
>>> +
>>> +#else
>>> +#define PCI_CFG_SIZE		(1ULL << 24)
>>> +#define PCI_DEV_CFG_SIZE	256
>> Shall we use PCI_CFG_SPACE_SIZE from the kernel headers here?
> 
> I would rather not, for the reasons explained above.
> 
>>
>>>  
>>>  union pci_config_address {
>>>  	struct {
>>> @@ -58,6 +91,8 @@ union pci_config_address {
>>>  	};
>>>  	u32 w;
>>>  };
>>> +#endif
>>> +#define PCI_DEV_CFG_MASK	(PCI_DEV_CFG_SIZE - 1)
>>>  
>>>  struct msix_table {
>>>  	struct msi_msg msg;
>>> @@ -100,6 +135,33 @@ struct pci_cap_hdr {
>>>  	u8	next;
>>>  };
>>>  
>>> +struct pcie_cap {
>> I guess this is meant to map to the PCI Express Capability Structure as
>> described in the PCIe spec?
>> We would need to add the "packed" attribute then. But actually I am not
>> a fan of using C language constructs to model specified register
>> arrangements, the kernel tries to avoid this as well.
> 
> I'm not sure what you are suggesting. Should we rewrite the entire PCI emulation
> code in kvmtool then?

At least not add more of that problematic code, especially if we don't
need it. Maybe there is a better solution for the operations we will
need (byte array?), that's hard to say without seeing the code.

>> Actually, looking closer: why do we need this in the first place? I
>> removed this and struct pm_cap, and it still compiles.
>> So can we lose those two structures at all? And move the discussion and
>> implementation (for VirtIO 1.0?) to a later series?
> 
> I've answered both points in v2 of the series [1].
> 
> [1] https://www.spinics.net/lists/kvm/msg209601.html:

>From there:
>> But more importantly: Do we actually need those definitions? We
>> don't seem to use them, do we?
>> And the u8 __pad[PCI_DEV_CFG_SIZE] below should provide the extended
>> storage space a guest would expect?
>
> Yes, we don't use them for the reasons I explained in the commit
> message. I would rather keep them, because they are required by the
> PCIE spec.

I don't get the point of adding code / data structures that we don't
need, especially if it has issues. I understand it's mandatory as per
the spec, but just adding a struct here doesn't fix this or makes this
better.

Cheers,
Andre

> 
>>
>>> +	u8 cap;
>>> +	u8 next;
>>> +	u16 cap_reg;
>>> +	u32 dev_cap;
>>> +	u16 dev_ctrl;
>>> +	u16 dev_status;
>>> +	u32 link_cap;
>>> +	u16 link_ctrl;
>>> +	u16 link_status;
>>> +	u32 slot_cap;
>>> +	u16 slot_ctrl;
>>> +	u16 slot_status;
>>> +	u16 root_ctrl;
>>> +	u16 root_cap;
>>> +	u32 root_status;
>>> +};
>>> +
>>> +struct pm_cap {
>>> +	u8 cap;
>>> +	u8 next;
>>> +	u16 pmc;
>>> +	u16 pmcsr;
>>> +	u8 pmcsr_bse;
>>> +	u8 data;
>>> +};
>>> +
>>>  struct pci_device_header;
>>>  
>>>  typedef int (*bar_activate_fn_t)(struct kvm *kvm,
>>> @@ -110,14 +172,12 @@ typedef int (*bar_deactivate_fn_t)(struct kvm *kvm,
>>>  				   int bar_num, void *data);
>>>  
>>>  #define PCI_BAR_OFFSET(b)	(offsetof(struct pci_device_header, bar[b]))
>>> -#define PCI_DEV_CFG_SIZE	256
>>> -#define PCI_DEV_CFG_MASK	(PCI_DEV_CFG_SIZE - 1)
>>>  
>>>  struct pci_config_operations {
>>>  	void (*write)(struct kvm *kvm, struct pci_device_header *pci_hdr,
>>> -		      u8 offset, void *data, int sz);
>>> +		      u16 offset, void *data, int sz);
>>>  	void (*read)(struct kvm *kvm, struct pci_device_header *pci_hdr,
>>> -		     u8 offset, void *data, int sz);
>>> +		     u16 offset, void *data, int sz);
>>>  };
>>>  
>>>  struct pci_device_header {
>>> @@ -147,6 +207,10 @@ struct pci_device_header {
>>>  			u8		min_gnt;
>>>  			u8		max_lat;
>>>  			struct msix_cap msix;
>>> +#ifdef ARCH_HAS_PCI_EXP
>>> +			struct pm_cap pm;
>>> +			struct pcie_cap pcie;
>>> +#endif
>>>  		} __attribute__((packed));
>>>  		/* Pad to PCI config space size */
>>>  		u8	__pad[PCI_DEV_CFG_SIZE];
>>> diff --git a/pci.c b/pci.c
>>> index 68ece65441a6..b471209a6efc 100644
>>> --- a/pci.c
>>> +++ b/pci.c
>>> @@ -400,7 +400,8 @@ static void pci_config_bar_wr(struct kvm *kvm,
>>>  void pci__config_wr(struct kvm *kvm, union pci_config_address addr, void *data, int size)
>>>  {
>>>  	void *base;
>>> -	u8 bar, offset;
>>> +	u8 bar;
>>> +	u16 offset;
>>>  	struct pci_device_header *pci_hdr;
>>>  	u8 dev_num = addr.device_number;
>>>  	u32 value = 0;
>>> @@ -439,7 +440,7 @@ void pci__config_wr(struct kvm *kvm, union pci_config_address addr, void *data,
>>>  
>>>  void pci__config_rd(struct kvm *kvm, union pci_config_address addr, void *data, int size)
>>>  {
>>> -	u8 offset;
>>> +	u16 offset;
>>>  	struct pci_device_header *pci_hdr;
>>>  	u8 dev_num = addr.device_number;
>>>  
>>> diff --git a/vfio/pci.c b/vfio/pci.c
>>> index 2b891496547d..6b8726227ea0 100644
>>> --- a/vfio/pci.c
>>> +++ b/vfio/pci.c
>>> @@ -311,7 +311,7 @@ out_unlock:
>>>  }
>>>  
>>>  static void vfio_pci_msix_cap_write(struct kvm *kvm,
>>> -				    struct vfio_device *vdev, u8 off,
>>> +				    struct vfio_device *vdev, u16 off,
>>>  				    void *data, int sz)
>>>  {
>>>  	struct vfio_pci_device *pdev = &vdev->pci;
>>> @@ -343,7 +343,7 @@ static void vfio_pci_msix_cap_write(struct kvm *kvm,
>>>  }
>>>  
>>>  static int vfio_pci_msi_vector_write(struct kvm *kvm, struct vfio_device *vdev,
>>> -				     u8 off, u8 *data, u32 sz)
>>> +				     u16 off, u8 *data, u32 sz)
>>>  {
>>>  	size_t i;
>>>  	u32 mask = 0;
>>> @@ -391,7 +391,7 @@ static int vfio_pci_msi_vector_write(struct kvm *kvm, struct vfio_device *vdev,
>>>  }
>>>  
>>>  static void vfio_pci_msi_cap_write(struct kvm *kvm, struct vfio_device *vdev,
>>> -				   u8 off, u8 *data, u32 sz)
>>> +				   u16 off, u8 *data, u32 sz)
>>>  {
>>>  	u8 ctrl;
>>>  	struct msi_msg msg;
>>> @@ -536,7 +536,7 @@ out:
>>>  }
>>>  
>>>  static void vfio_pci_cfg_read(struct kvm *kvm, struct pci_device_header *pci_hdr,
>>> -			      u8 offset, void *data, int sz)
>>> +			      u16 offset, void *data, int sz)
>>>  {
>>>  	struct vfio_region_info *info;
>>>  	struct vfio_pci_device *pdev;
>>> @@ -554,7 +554,7 @@ static void vfio_pci_cfg_read(struct kvm *kvm, struct pci_device_header *pci_hdr
>>>  }
>>>  
>>>  static void vfio_pci_cfg_write(struct kvm *kvm, struct pci_device_header *pci_hdr,
>>> -			       u8 offset, void *data, int sz)
>>> +			       u16 offset, void *data, int sz)
>>>  {
>>>  	struct vfio_region_info *info;
>>>  	struct vfio_pci_device *pdev;
>>> @@ -638,15 +638,17 @@ static int vfio_pci_parse_caps(struct vfio_device *vdev)
>>>  {
>>>  	int ret;
>>>  	size_t size;
>>> -	u8 pos, next;
>>> +	u16 pos, next;
>>>  	struct pci_cap_hdr *cap;
>>> -	u8 virt_hdr[PCI_DEV_CFG_SIZE];
>>> +	u8 *virt_hdr;
>>>  	struct vfio_pci_device *pdev = &vdev->pci;
>>>  
>>>  	if (!(pdev->hdr.status & PCI_STATUS_CAP_LIST))
>>>  		return 0;
>>>  
>>> -	memset(virt_hdr, 0, PCI_DEV_CFG_SIZE);
>>> +	virt_hdr = calloc(1, PCI_DEV_CFG_SIZE);
>>> +	if (!virt_hdr)
>>> +		return -errno;
>> There are two places where we return in this function, we don't seem to
>> free virt_hdr in those cases. Looks like a job for your beloved goto ;-)
>>
> Indeed, I'll do that.
> 
> Thanks,
> Alex
> 




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux