Hi Diana, On 9/7/20 4:34 PM, Diana Craciun OSS wrote: > Hi Eric, > > On 9/4/2020 11:18 AM, Auger Eric wrote: >> Hi Diana, >> >> On 8/26/20 11:33 AM, Diana Craciun wrote: >>> The software uses a memory-mapped I/O command interface (MC portals) to >>> communicate with the MC hardware. This command interface is used to >>> discover, enumerate, configure and remove DPAA2 objects. The DPAA2 >>> objects use MSIs, so the command interface needs to be emulated >>> such that the correct MSI is configured in the hardware (the guest >>> has the virtual MSIs). >> What I don't get is all the regions are mmappable too. >> And this patch does not seem to introduce special handling with respect >> to MSIs. Please could you clarify? > > The device can be controlled using commands issued towards a firmware. > Most of the commands can be passthrough, among exceptions is the command > that configures the interrupts. In a guest the interrupts are emulated > and for the hardware the numbers in the guest mean nothing. So, in a > virtual machine scenario, the DPMCP and DPRC regions are emulated in > qemu such that the command which configures the interrupts will not go > to hardware with the information set by the guest. > However there are other scenarios apart from virtual machines like DPDK > in which the interrupt configuration command is not used. The problem > might be that the userspace could issue the command because there is no > restriction in the VFIO, but in that case the worst thing that may > happen is for the interrupts for the device not to work. > However it is possible to restrict the command for this scenario as well > if I change the code and not allow the DPRC region to be mmapable. In > practice it proved that it might not gain much by direct assigning that > area. Also the interrupt configuration command was restricted from the > firmware to be issued only from the DPRC device region to help such a > scenario. Yes actually I meant that the region used to configure MSIs should not be mmappable then? Thanks Eric > > >>> >>> This patch is adding read/write support for fsl-mc devices. The mc >>> commands are emulated by the userspace. The host is just passing >>> the correct command to the hardware. >>> >>> Also the current patch limits userspace to write complete >>> 64byte command once and read 64byte response by one ioctl. >>> >>> Signed-off-by: Bharat Bhushan <Bharat.Bhushan@xxxxxxx> >>> Signed-off-by: Diana Craciun <diana.craciun@xxxxxxxxxxx> >>> --- >>> drivers/vfio/fsl-mc/vfio_fsl_mc.c | 115 +++++++++++++++++++++- >>> drivers/vfio/fsl-mc/vfio_fsl_mc_private.h | 1 + >>> 2 files changed, 114 insertions(+), 2 deletions(-) >>> >>> diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c >>> b/drivers/vfio/fsl-mc/vfio_fsl_mc.c >>> index 73834f488a94..27713aa86878 100644 >>> --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c >>> +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c >>> @@ -12,6 +12,7 @@ >>> #include <linux/types.h> >>> #include <linux/vfio.h> >>> #include <linux/fsl/mc.h> >>> +#include <linux/delay.h> >>> #include "vfio_fsl_mc_private.h" >>> @@ -106,6 +107,9 @@ static int vfio_fsl_mc_regions_init(struct >>> vfio_fsl_mc_device *vdev) >>> vdev->regions[i].size = resource_size(res); >>> vdev->regions[i].flags = VFIO_REGION_INFO_FLAG_MMAP; >>> vdev->regions[i].type = mc_dev->regions[i].flags & >>> IORESOURCE_BITS; >>> + vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ; >>> + if (!(mc_dev->regions[i].flags & IORESOURCE_READONLY)) >>> + vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE; >>> } >>> vdev->num_regions = mc_dev->obj_desc.region_count; >>> @@ -114,6 +118,11 @@ static int vfio_fsl_mc_regions_init(struct >>> vfio_fsl_mc_device *vdev) >>> static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device >>> *vdev) >>> { >>> + int i; >>> + >>> + for (i = 0; i < vdev->num_regions; i++) >>> + iounmap(vdev->regions[i].ioaddr); >>> + >>> vdev->num_regions = 0; >>> kfree(vdev->regions); >>> } >>> @@ -311,13 +320,115 @@ static long vfio_fsl_mc_ioctl(void >>> *device_data, unsigned int cmd, >>> static ssize_t vfio_fsl_mc_read(void *device_data, char __user *buf, >>> size_t count, loff_t *ppos) >>> { >>> - return -EINVAL; >>> + struct vfio_fsl_mc_device *vdev = device_data; >>> + unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos); >>> + loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK; >>> + struct vfio_fsl_mc_region *region; >>> + u64 data[8]; >>> + int i; >>> + >>> + if (index >= vdev->num_regions) >>> + return -EINVAL; >>> + >>> + region = &vdev->regions[index]; >>> + >>> + if (!(region->flags & VFIO_REGION_INFO_FLAG_READ)) >>> + return -EINVAL; >>> + >>> + if (!region->ioaddr) { >>> + region->ioaddr = ioremap(region->addr, region->size); >>> + if (!region->ioaddr) >>> + return -ENOMEM; >>> + } >>> + >>> + if (count != 64 || off != 0) >>> + return -EINVAL; >>> + >>> + for (i = 7; i >= 0; i--) >>> + data[i] = readq(region->ioaddr + i * sizeof(uint64_t)); >>> + >>> + if (copy_to_user(buf, data, 64)) >>> + return -EFAULT; >>> + >>> + return count; >>> +} >>> + >>> +#define MC_CMD_COMPLETION_TIMEOUT_MS 5000 >>> +#define MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS 500 >>> + >>> +static int vfio_fsl_mc_send_command(void __iomem *ioaddr, uint64_t >>> *cmd_data) >>> +{ >>> + int i; >>> + enum mc_cmd_status status; >>> + unsigned long timeout_usecs = MC_CMD_COMPLETION_TIMEOUT_MS * 1000; >>> + >>> + /* Write at command parameter into portal */ >>> + for (i = 7; i >= 1; i--) >>> + writeq_relaxed(cmd_data[i], ioaddr + i * sizeof(uint64_t)); >>> + >>> + /* Write command header in the end */ >>> + writeq(cmd_data[0], ioaddr); >>> + >>> + /* Wait for response before returning to user-space >>> + * This can be optimized in future to even prepare response >>> + * before returning to user-space and avoid read ioctl. >>> + */ >>> + for (;;) { >>> + u64 header; >>> + struct mc_cmd_header *resp_hdr; >>> + >>> + header = cpu_to_le64(readq_relaxed(ioaddr)); >>> + >>> + resp_hdr = (struct mc_cmd_header *)&header; >>> + status = (enum mc_cmd_status)resp_hdr->status; >>> + if (status != MC_CMD_STATUS_READY) >>> + break; >>> + >>> + udelay(MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS); >>> + timeout_usecs -= MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS; >>> + if (timeout_usecs == 0) >>> + return -ETIMEDOUT; >>> + } >>> + >>> + return 0; >>> } >>> static ssize_t vfio_fsl_mc_write(void *device_data, const char >>> __user *buf, >>> size_t count, loff_t *ppos) >>> { >>> - return -EINVAL; >>> + struct vfio_fsl_mc_device *vdev = device_data; >>> + unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos); >>> + loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK; >>> + struct vfio_fsl_mc_region *region; >>> + u64 data[8]; >>> + int ret; >>> + >>> + if (index >= vdev->num_regions) >>> + return -EINVAL; >>> + >>> + region = &vdev->regions[index]; >>> + >>> + if (!(region->flags & VFIO_REGION_INFO_FLAG_WRITE)) >>> + return -EINVAL; >>> + >>> + if (!region->ioaddr) { >>> + region->ioaddr = ioremap(region->addr, region->size); >>> + if (!region->ioaddr) >>> + return -ENOMEM; >>> + } >>> + >>> + if (count != 64 || off != 0) >>> + return -EINVAL; >>> + >>> + if (copy_from_user(&data, buf, 64)) >>> + return -EFAULT; >>> + >>> + ret = vfio_fsl_mc_send_command(region->ioaddr, data); >>> + if (ret) >>> + return ret; >>> + >>> + return count; >>> + >>> } >>> static int vfio_fsl_mc_mmap_mmio(struct vfio_fsl_mc_region region, >>> diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h >>> b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h >>> index bbfca8b55f8a..e6804e516c4a 100644 >>> --- a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h >>> +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h >>> @@ -32,6 +32,7 @@ struct vfio_fsl_mc_region { >>> u32 type; >>> u64 addr; >>> resource_size_t size; >>> + void __iomem *ioaddr; >>> }; >>> struct vfio_fsl_mc_device { >>> >> Thanks >> >> Eric >> > > Thanks, > Diana >