Add an ioctl to transfer file descriptor ownership and pinned memory accounting from one process to another. This is more efficient than VHOST_RESET_OWNER followed by VHOST_SET_OWNER, as that would unpin all physical pages, requiring them to be repinned in the new process. That would cost multiple seconds for large memories, and be incurred during a virtual machine's pause time during live update. Signed-off-by: Steve Sistare <steven.sistare@xxxxxxxxxx> --- drivers/vhost/vdpa.c | 41 ++++++++++++++++++++++++++++++++++++++ drivers/vhost/vhost.c | 15 ++++++++++++++ drivers/vhost/vhost.h | 1 + include/uapi/linux/vhost.h | 10 ++++++++++ 4 files changed, 67 insertions(+) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index b49e5831b3f0..5cf55ca4ec02 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -632,6 +632,44 @@ static long vhost_vdpa_resume(struct vhost_vdpa *v) return ret; } +static long vhost_vdpa_new_owner(struct vhost_vdpa *v) +{ + int r; + struct vhost_dev *vdev = &v->vdev; + struct mm_struct *mm_old = vdev->mm; + struct mm_struct *mm_new = current->mm; + long pinned_vm = v->pinned_vm; + unsigned long lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK)); + + if (!mm_old) + return -EINVAL; + mmgrab(mm_old); + + if (!v->vdpa->use_va && + pinned_vm + atomic64_read(&mm_new->pinned_vm) > lock_limit) { + r = -ENOMEM; + goto out; + } + r = vhost_vdpa_bind_mm(v, mm_new); + if (r) + goto out; + + r = vhost_dev_new_owner(vdev); + if (r) { + vhost_vdpa_bind_mm(v, mm_old); + goto out; + } + + if (!v->vdpa->use_va) { + atomic64_sub(pinned_vm, &mm_old->pinned_vm); + atomic64_add(pinned_vm, &mm_new->pinned_vm); + } + +out: + mmdrop(mm_old); + return r; +} + static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, void __user *argp) { @@ -876,6 +914,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, case VHOST_VDPA_RESUME: r = vhost_vdpa_resume(v); break; + case VHOST_NEW_OWNER: + r = vhost_vdpa_new_owner(v); + break; default: r = vhost_dev_ioctl(&v->vdev, cmd, argp); if (r == -ENOIOCTLCMD) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index b60955682474..ab40ae50552f 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -963,6 +963,21 @@ long vhost_dev_set_owner(struct vhost_dev *dev) } EXPORT_SYMBOL_GPL(vhost_dev_set_owner); +/* Caller should have device mutex */ +long vhost_dev_new_owner(struct vhost_dev *dev) +{ + if (dev->mm == current->mm) + return -EBUSY; + + if (!vhost_dev_has_owner(dev)) + return -EINVAL; + + vhost_detach_mm(dev); + vhost_attach_mm(dev); + return 0; +} +EXPORT_SYMBOL_GPL(vhost_dev_new_owner); + static struct vhost_iotlb *iotlb_alloc(void) { return vhost_iotlb_alloc(max_iotlb_entries, diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index bb75a292d50c..8b2018bb02b1 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -187,6 +187,7 @@ void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int (*msg_handler)(struct vhost_dev *dev, u32 asid, struct vhost_iotlb_msg *msg)); long vhost_dev_set_owner(struct vhost_dev *dev); +long vhost_dev_new_owner(struct vhost_dev *dev); bool vhost_dev_has_owner(struct vhost_dev *dev); long vhost_dev_check_owner(struct vhost_dev *); struct vhost_iotlb *vhost_dev_reset_owner_prepare(void); diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index b95dd84eef2d..543d0e3434c3 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -123,6 +123,16 @@ #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64) #define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64) +/* Set current process as the new owner of this file descriptor. The fd must + * already be owned, via a prior call to VHOST_SET_OWNER. The pinned memory + * count is transferred from the previous to the new owner. + * Errors: + * EINVAL: not owned + * EBUSY: caller is already the owner + * ENOMEM: RLIMIT_MEMLOCK exceeded + */ +#define VHOST_NEW_OWNER _IO(VHOST_VIRTIO, 0x27) + /* VHOST_NET specific defines */ /* Attach virtio net ring to a raw socket, or tap device. -- 2.39.3