The mediated device will emulate the PCI config access (read/write) from the guest. Add PCI config read/write functions to support the config read/write accesses from the guest. Signed-off-by: Dave Jiang <dave.jiang@xxxxxxxxx> --- drivers/dma/idxd/registers.h | 4 + drivers/vfio/mdev/Kconfig | 9 + drivers/vfio/mdev/Makefile | 1 drivers/vfio/mdev/idxd/Makefile | 4 + drivers/vfio/mdev/idxd/mdev.h | 76 ++++++++++++ drivers/vfio/mdev/idxd/vdev.c | 255 +++++++++++++++++++++++++++++++++++++++ include/uapi/linux/idxd.h | 1 7 files changed, 350 insertions(+) create mode 100644 drivers/vfio/mdev/idxd/Makefile create mode 100644 drivers/vfio/mdev/idxd/mdev.h create mode 100644 drivers/vfio/mdev/idxd/vdev.c diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index c970c3f025f0..c699c72bd8d2 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -155,6 +155,7 @@ enum idxd_device_reset_type { #define IDXD_INTC_CMD 0x02 #define IDXD_INTC_OCCUPY 0x04 #define IDXD_INTC_PERFMON_OVFL 0x08 +#define IDXD_INTC_HALT 0x10 #define IDXD_CMD_OFFSET 0xa0 union idxd_command_reg { @@ -279,6 +280,9 @@ union msix_perm { u32 bits; } __packed; +#define MSIX_ENTRY_MASK_INT 0x1 +#define MSIX_ENTRY_CTRL_BYTE 12 + union group_flags { struct { u32 tc_a:3; diff --git a/drivers/vfio/mdev/Kconfig b/drivers/vfio/mdev/Kconfig index 82f79d99a7db..57f89457e9a7 100644 --- a/drivers/vfio/mdev/Kconfig +++ b/drivers/vfio/mdev/Kconfig @@ -21,3 +21,12 @@ config VFIO_MDEV_IRQS devices. If you don't know what to do here, say N. + +config VFIO_MDEV_IDXD + tristate "VFIO Mediated device driver for Intel IDXD" + depends on VFIO && VFIO_MDEV && X86_64 + select VFIO_MDEV_IMS + select IMS_MSI_ARRAY + default n + help + VFIO based mediated device driver for Intel Accelerator Devices driver. diff --git a/drivers/vfio/mdev/Makefile b/drivers/vfio/mdev/Makefile index c3f160cae192..7e3f5fae4bf1 100644 --- a/drivers/vfio/mdev/Makefile +++ b/drivers/vfio/mdev/Makefile @@ -6,3 +6,4 @@ mdev-$(CONFIG_VFIO_MDEV_IRQS) += mdev_irqs.o obj-$(CONFIG_VFIO_MDEV) += mdev.o +obj-$(CONFIG_VFIO_MDEV_IDXD) += idxd/ diff --git a/drivers/vfio/mdev/idxd/Makefile b/drivers/vfio/mdev/idxd/Makefile new file mode 100644 index 000000000000..ccd3bc1c7ab6 --- /dev/null +++ b/drivers/vfio/mdev/idxd/Makefile @@ -0,0 +1,4 @@ +ccflags-y += -I$(srctree)/drivers/dma/idxd -DDEFAULT_SYMBOL_NAMESPACE=IDXD + +obj-$(CONFIG_VFIO_MDEV_IDXD) += idxd_mdev.o +idxd_mdev-y := vdev.o diff --git a/drivers/vfio/mdev/idxd/mdev.h b/drivers/vfio/mdev/idxd/mdev.h new file mode 100644 index 000000000000..120c2dc29ba7 --- /dev/null +++ b/drivers/vfio/mdev/idxd/mdev.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2020 Intel Corporation. All rights rsvd. */ + +#ifndef _IDXD_MDEV_H_ +#define _IDXD_MDEV_H_ + +/* two 64-bit BARs implemented */ +#define VIDXD_MAX_BARS 2 +#define VIDXD_MAX_CFG_SPACE_SZ 4096 +#define VIDXD_MAX_MMIO_SPACE_SZ 8192 +#define VIDXD_MSIX_TBL_SZ_OFFSET 0x42 +#define VIDXD_CAP_CTRL_SZ 0x100 +#define VIDXD_GRP_CTRL_SZ 0x100 +#define VIDXD_WQ_CTRL_SZ 0x100 +#define VIDXD_WQ_OCPY_INT_SZ 0x20 +#define VIDXD_MSIX_TBL_SZ 0x90 +#define VIDXD_MSIX_PERM_TBL_SZ 0x48 + +#define VIDXD_MSIX_PERM_OFFSET 0x300 +#define VIDXD_GRPCFG_OFFSET 0x400 +#define VIDXD_WQCFG_OFFSET 0x500 +#define VIDXD_MSIX_TABLE_OFFSET 0x600 +#define VIDXD_MSIX_PBA_OFFSET 0x700 +#define VIDXD_IMS_OFFSET 0x1000 + +#define VIDXD_BAR0_SIZE 0x2000 +#define VIDXD_BAR2_SIZE 0x2000 +#define VIDXD_MAX_MSIX_VECS 2 +#define VIDXD_MAX_MSIX_ENTRIES VIDXD_MAX_MSIX_VECS +#define VIDXD_MAX_WQS 1 + +struct vdcm_idxd { + struct idxd_device *idxd; + struct idxd_wq *wq; + struct mdev_device *mdev; + int num_wqs; + + u64 bar_val[VIDXD_MAX_BARS]; + u64 bar_size[VIDXD_MAX_BARS]; + u8 cfg[VIDXD_MAX_CFG_SPACE_SZ]; + u8 bar0[VIDXD_MAX_MMIO_SPACE_SZ]; + struct mutex dev_lock; /* lock for vidxd resources */ +}; + +static inline u64 get_reg_val(void *buf, int size) +{ + u64 val = 0; + + switch (size) { + case 8: + val = *(u64 *)buf; + break; + case 4: + val = *(u32 *)buf; + break; + case 2: + val = *(u16 *)buf; + break; + case 1: + val = *(u8 *)buf; + break; + } + + return val; +} + +static inline u8 vidxd_state(struct vdcm_idxd *vidxd) +{ + union gensts_reg *gensts = (union gensts_reg *)(vidxd->bar0 + IDXD_GENSTATS_OFFSET); + + return gensts->state; +} + +int vidxd_cfg_read(struct vdcm_idxd *vidxd, unsigned int pos, void *buf, unsigned int count); +int vidxd_cfg_write(struct vdcm_idxd *vidxd, unsigned int pos, void *buf, unsigned int size); +#endif diff --git a/drivers/vfio/mdev/idxd/vdev.c b/drivers/vfio/mdev/idxd/vdev.c new file mode 100644 index 000000000000..aca4a1228a97 --- /dev/null +++ b/drivers/vfio/mdev/idxd/vdev.c @@ -0,0 +1,255 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2019,2020 Intel Corporation. All rights rsvd. */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/device.h> +#include <linux/sched/task.h> +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/mm.h> +#include <linux/mmu_context.h> +#include <linux/vfio.h> +#include <linux/mdev.h> +#include <linux/msi.h> +#include <linux/intel-iommu.h> +#include <linux/intel-svm.h> +#include <linux/kvm_host.h> +#include <linux/eventfd.h> +#include <uapi/linux/idxd.h> +#include "registers.h" +#include "idxd.h" +#include "mdev.h" + +void vidxd_send_interrupt(struct vdcm_idxd *vidxd, int vector) +{ + struct mdev_device *mdev = vidxd->mdev; + u8 *bar0 = vidxd->bar0; + u8 *msix_entry = &bar0[VIDXD_MSIX_TABLE_OFFSET + vector * 0x10]; + u64 *pba = (u64 *)(bar0 + VIDXD_MSIX_PBA_OFFSET); + u8 ctrl; + + ctrl = msix_entry[MSIX_ENTRY_CTRL_BYTE]; + if (ctrl & MSIX_ENTRY_MASK_INT) + set_bit(vector, (unsigned long *)pba); + else + mdev_msix_send_signal(mdev, vector); +} + +static void vidxd_set_swerr(struct vdcm_idxd *vidxd, unsigned int error) +{ + union sw_err_reg *swerr = (union sw_err_reg *)(vidxd->bar0 + IDXD_SWERR_OFFSET); + + if (!swerr->valid) { + memset(swerr, 0, sizeof(*swerr)); + swerr->valid = 1; + swerr->error = error; + } else if (!swerr->overflow) { + swerr->overflow = 1; + } +} + +static inline void send_swerr_interrupt(struct vdcm_idxd *vidxd) +{ + union genctrl_reg *genctrl = (union genctrl_reg *)(vidxd->bar0 + IDXD_GENCTRL_OFFSET); + u32 *intcause = (u32 *)(vidxd->bar0 + IDXD_INTCAUSE_OFFSET); + + if (!genctrl->softerr_int_en) + return; + + *intcause |= IDXD_INTC_ERR; + vidxd_send_interrupt(vidxd, 0); +} + +static inline void send_halt_interrupt(struct vdcm_idxd *vidxd) +{ + union genctrl_reg *genctrl = (union genctrl_reg *)(vidxd->bar0 + IDXD_GENCTRL_OFFSET); + u32 *intcause = (u32 *)(vidxd->bar0 + IDXD_INTCAUSE_OFFSET); + + if (!genctrl->halt_int_en) + return; + + *intcause |= IDXD_INTC_HALT; + vidxd_send_interrupt(vidxd, 0); +} + +static void vidxd_report_pci_error(struct vdcm_idxd *vidxd) +{ + union gensts_reg *gensts = (union gensts_reg *)(vidxd->bar0 + IDXD_GENSTATS_OFFSET); + + vidxd_set_swerr(vidxd, DSA_ERR_PCI_CFG); + /* set device to halt */ + gensts->reset_type = IDXD_DEVICE_RESET_FLR; + gensts->state = IDXD_DEVICE_STATE_HALT; + + send_halt_interrupt(vidxd); +} + +int vidxd_cfg_read(struct vdcm_idxd *vidxd, unsigned int pos, void *buf, unsigned int count) +{ + u32 offset = pos & 0xfff; + struct device *dev = &vidxd->mdev->dev; + + memcpy(buf, &vidxd->cfg[offset], count); + + dev_dbg(dev, "vidxd pci R %d %x %x: %llx\n", + vidxd->wq->id, count, offset, get_reg_val(buf, count)); + + return 0; +} + +/* + * Much of the emulation code has been borrowed from Intel i915 cfg space + * emulation code. + * drivers/gpu/drm/i915/gvt/cfg_space.c: + */ + +/* + * Bitmap for writable bits (RW or RW1C bits, but cannot co-exist in one + * byte) byte by byte in standard pci configuration space. (not the full + * 256 bytes.) + */ +static const u8 pci_cfg_space_rw_bmp[PCI_INTERRUPT_LINE + 4] = { + [PCI_COMMAND] = 0xff, 0x07, + [PCI_STATUS] = 0x00, 0xf9, /* the only one RW1C byte */ + [PCI_CACHE_LINE_SIZE] = 0xff, + [PCI_BASE_ADDRESS_0 ... PCI_CARDBUS_CIS - 1] = 0xff, + [PCI_ROM_ADDRESS] = 0x01, 0xf8, 0xff, 0xff, + [PCI_INTERRUPT_LINE] = 0xff, +}; + +static void _pci_cfg_mem_write(struct vdcm_idxd *vidxd, unsigned int off, u8 *src, + unsigned int bytes) +{ + u8 *cfg_base = vidxd->cfg; + u8 mask, new, old; + int i = 0; + + for (; i < bytes && (off + i < sizeof(pci_cfg_space_rw_bmp)); i++) { + mask = pci_cfg_space_rw_bmp[off + i]; + old = cfg_base[off + i]; + new = src[i] & mask; + + /** + * The PCI_STATUS high byte has RW1C bits, here + * emulates clear by writing 1 for these bits. + * Writing a 0b to RW1C bits has no effect. + */ + if (off + i == PCI_STATUS + 1) + new = (~new & old) & mask; + + cfg_base[off + i] = (old & ~mask) | new; + } + + /* For other configuration space directly copy as it is. */ + if (i < bytes) + memcpy(cfg_base + off + i, src + i, bytes - i); +} + +static inline void _write_pci_bar(struct vdcm_idxd *vidxd, u32 offset, u32 val, bool low) +{ + u32 *pval; + + /* BAR offset should be 32 bits algiend */ + offset = rounddown(offset, 4); + pval = (u32 *)(vidxd->cfg + offset); + + if (low) { + /* + * only update bit 31 - bit 4, + * leave the bit 3 - bit 0 unchanged. + */ + *pval = (val & GENMASK(31, 4)) | (*pval & GENMASK(3, 0)); + } else { + *pval = val; + } +} + +static int _pci_cfg_bar_write(struct vdcm_idxd *vidxd, unsigned int offset, void *p_data, + unsigned int bytes) +{ + u32 new = *(u32 *)(p_data); + bool lo = IS_ALIGNED(offset, 8); + u64 size; + unsigned int bar_id; + + /* + * Power-up software can determine how much address + * space the device requires by writing a value of + * all 1's to the register and then reading the value + * back. The device will return 0's in all don't-care + * address bits. + */ + if (new == 0xffffffff) { + switch (offset) { + case PCI_BASE_ADDRESS_0: + case PCI_BASE_ADDRESS_1: + case PCI_BASE_ADDRESS_2: + case PCI_BASE_ADDRESS_3: + bar_id = (offset - PCI_BASE_ADDRESS_0) / 8; + size = vidxd->bar_size[bar_id]; + _write_pci_bar(vidxd, offset, size >> (lo ? 0 : 32), lo); + break; + default: + /* Unimplemented BARs */ + _write_pci_bar(vidxd, offset, 0x0, false); + } + } else { + switch (offset) { + case PCI_BASE_ADDRESS_0: + case PCI_BASE_ADDRESS_1: + case PCI_BASE_ADDRESS_2: + case PCI_BASE_ADDRESS_3: + _write_pci_bar(vidxd, offset, new, lo); + break; + default: + break; + } + } + return 0; +} + +int vidxd_cfg_write(struct vdcm_idxd *vidxd, unsigned int pos, void *buf, unsigned int size) +{ + struct device *dev = &vidxd->idxd->pdev->dev; + + if (size > 4) + return -EINVAL; + + if (pos + size > VIDXD_MAX_CFG_SPACE_SZ) + return -EINVAL; + + dev_dbg(dev, "vidxd pci W %d %x %x: %llx\n", vidxd->wq->id, size, pos, + get_reg_val(buf, size)); + + /* First check if it's PCI_COMMAND */ + if (IS_ALIGNED(pos, 2) && pos == PCI_COMMAND) { + bool new_bme; + bool bme; + + if (size > 2) + return -EINVAL; + + new_bme = !!(get_reg_val(buf, 2) & PCI_COMMAND_MASTER); + bme = !!(vidxd->cfg[pos] & PCI_COMMAND_MASTER); + _pci_cfg_mem_write(vidxd, pos, buf, size); + + /* Flag error if turning off BME while device is enabled */ + if ((bme && !new_bme) && vidxd_state(vidxd) == IDXD_DEVICE_STATE_ENABLED) + vidxd_report_pci_error(vidxd); + return 0; + } + + switch (pos) { + case PCI_BASE_ADDRESS_0 ... PCI_BASE_ADDRESS_5: + if (!IS_ALIGNED(pos, 4)) + return -EINVAL; + return _pci_cfg_bar_write(vidxd, pos, buf, size); + + default: + _pci_cfg_mem_write(vidxd, pos, buf, size); + } + return 0; +} + +MODULE_LICENSE("GPL v2"); diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index e33997b4d750..751f6107217c 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -89,6 +89,7 @@ enum dsa_completion_status { DSA_COMP_HW_ERR1, DSA_COMP_HW_ERR_DRB, DSA_COMP_TRANSLATION_FAIL, + DSA_ERR_PCI_CFG = 0x51, }; enum iax_completion_status {