On 06/21/2016 12:31 AM, Kirti Wankhede wrote:
+static int mdev_dev_mmio_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + int ret; + struct vfio_mdev *vmdev = vma->vm_private_data; + struct mdev_device *mdev; + struct parent_device *parent; + u64 virtaddr = (u64)vmf->virtual_address; + u64 offset, phyaddr; + unsigned long req_size, pgoff; + pgprot_t pg_prot; + + if (!vmdev && !vmdev->mdev) + return -EINVAL; + + mdev = vmdev->mdev; + parent = mdev->parent; + + offset = virtaddr - vma->vm_start; + phyaddr = (vma->vm_pgoff << PAGE_SHIFT) + offset; + pgoff = phyaddr >> PAGE_SHIFT; + req_size = vma->vm_end - virtaddr; + pg_prot = vma->vm_page_prot; + + if (parent && parent->ops->validate_map_request) { + mutex_lock(&mdev->ops_lock); + ret = parent->ops->validate_map_request(mdev, virtaddr, + &pgoff, &req_size, + &pg_prot);
It is not only 'validate' but also adjust the parameters.
+ mutex_unlock(&mdev->ops_lock); + if (ret) + return ret; + + if (!req_size) + return -EINVAL; + } + + ret = remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot); +
Do you know why "Issues with mmap region fault handler, EPT is not correctly populated with the information provided by remap_pfn_range() inside fault handler." as you mentioned in the patch 0.
+ return ret | VM_FAULT_NOPAGE; +} + +static const struct vm_operations_struct mdev_dev_mmio_ops = { + .fault = mdev_dev_mmio_fault, +}; + + +static int vfio_mpci_mmap(void *device_data, struct vm_area_struct *vma) +{ + unsigned int index; + struct vfio_mdev *vmdev = device_data; + struct mdev_device *mdev = vmdev->mdev; + struct pci_dev *pdev; + unsigned long pgoff; + loff_t offset; + + if (!mdev->parent || !dev_is_pci(mdev->parent->dev)) + return -EINVAL; + + pdev = to_pci_dev(mdev->parent->dev); + + offset = vma->vm_pgoff << PAGE_SHIFT; + + index = VFIO_PCI_OFFSET_TO_INDEX(offset); + + if (index >= VFIO_PCI_ROM_REGION_INDEX) + return -EINVAL; + + pgoff = vma->vm_pgoff & + ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); + + vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff; + + vma->vm_private_data = vmdev; + vma->vm_ops = &mdev_dev_mmio_ops; + + return 0; +} + +static const struct vfio_device_ops vfio_mpci_dev_ops = { + .name = "vfio-mpci", + .open = vfio_mpci_open, + .release = vfio_mpci_close, + .ioctl = vfio_mpci_unlocked_ioctl, + .read = vfio_mpci_read, + .write = vfio_mpci_write, + .mmap = vfio_mpci_mmap, +}; + +int vfio_mpci_probe(struct device *dev) +{ + struct vfio_mdev *vmdev; + struct mdev_device *mdev = to_mdev_device(dev); + int ret; + + if (!mdev) + return -EINVAL; + + vmdev = kzalloc(sizeof(*vmdev), GFP_KERNEL); + if (IS_ERR(vmdev)) + return PTR_ERR(vmdev); + + vmdev->mdev = mdev_get_device(mdev); + vmdev->group = mdev->group; + mutex_init(&vmdev->vfio_mdev_lock); + + ret = vfio_add_group_dev(dev, &vfio_mpci_dev_ops, vmdev); + if (ret) + kfree(vmdev); + + mdev_put_device(mdev);
If you can make sure that mdev is always valid during vmdev's lifecyle, it is not necessary to get and put the refcount of mdev. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html