From: Xin Xiaohui <xiaohui.xin@xxxxxxxxx> --- Michael, I have move the ioctl to configure the locked memory to vhost and check the limit with mm->locked_vm. please have a look. Thanks Xiaohui drivers/vhost/mpassthru.c | 74 +++++++++---------------------------------- drivers/vhost/net.c | 78 ++++++++++++++++++++++++++++++++++++++------ include/linux/vhost.h | 3 ++ 3 files changed, 85 insertions(+), 70 deletions(-) diff --git a/drivers/vhost/mpassthru.c b/drivers/vhost/mpassthru.c index d86d94c..fd3827b 100644 --- a/drivers/vhost/mpassthru.c +++ b/drivers/vhost/mpassthru.c @@ -109,9 +109,6 @@ struct page_ctor { int wq_len; int rq_len; spinlock_t read_lock; - /* record the locked pages */ - int lock_pages; - struct rlimit o_rlim; struct net_device *dev; struct mpassthru_port port; struct page_info **hash_table; @@ -231,7 +228,6 @@ static int page_ctor_attach(struct mp_struct *mp) ctor->port.ctor = page_ctor; ctor->port.sock = &mp->socket; ctor->port.hash = mp_lookup; - ctor->lock_pages = 0; /* locked by mp_mutex */ dev->mp_port = &ctor->port; @@ -264,37 +260,6 @@ struct page_info *info_dequeue(struct page_ctor *ctor) return info; } -static int set_memlock_rlimit(struct page_ctor *ctor, int resource, - unsigned long cur, unsigned long max) -{ - struct rlimit new_rlim, *old_rlim; - int retval; - - if (resource != RLIMIT_MEMLOCK) - return -EINVAL; - new_rlim.rlim_cur = cur; - new_rlim.rlim_max = max; - - old_rlim = current->signal->rlim + resource; - - /* remember the old rlimit value when backend enabled */ - ctor->o_rlim.rlim_cur = old_rlim->rlim_cur; - ctor->o_rlim.rlim_max = old_rlim->rlim_max; - - if ((new_rlim.rlim_max > old_rlim->rlim_max) && - !capable(CAP_SYS_RESOURCE)) - return -EPERM; - - retval = security_task_setrlimit(resource, &new_rlim); - if (retval) - return retval; - - task_lock(current->group_leader); - *old_rlim = new_rlim; - task_unlock(current->group_leader); - return 0; -} - static void relinquish_resource(struct page_ctor *ctor) { if (!(ctor->dev->flags & IFF_UP) && @@ -322,8 +287,6 @@ static void mp_ki_dtor(struct kiocb *iocb) info->ctor->rq_len--; } else info->ctor->wq_len--; - /* Decrement the number of locked pages */ - info->ctor->lock_pages -= info->pnum; kmem_cache_free(ext_page_info_cache, info); relinquish_resource(info->ctor); @@ -349,7 +312,7 @@ static struct kiocb *create_iocb(struct page_info *info, int size) iocb->ki_dtor(iocb); iocb->private = (void *)info; iocb->ki_dtor = mp_ki_dtor; - + iocb->ki_user_data = info->pnum; return iocb; } @@ -375,10 +338,6 @@ static int page_ctor_detach(struct mp_struct *mp) relinquish_resource(ctor); - set_memlock_rlimit(ctor, RLIMIT_MEMLOCK, - ctor->o_rlim.rlim_cur, - ctor->o_rlim.rlim_max); - /* locked by mp_mutex */ ctor->dev->mp_port = NULL; dev_put(ctor->dev); @@ -565,21 +524,23 @@ static struct page_info *alloc_page_info(struct page_ctor *ctor, int rc; int i, j, n = 0; int len; - unsigned long base, lock_limit; + unsigned long base, lock_limit, locked; struct page_info *info = NULL; - lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; - lock_limit >>= PAGE_SHIFT; + down_write(¤t->mm->mmap_sem); + locked = count + current->mm->locked_vm; + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - if (ctor->lock_pages + count > lock_limit && npages) { - printk(KERN_INFO "exceed the locked memory rlimit."); - return NULL; - } + if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) + goto out; info = kmem_cache_alloc(ext_page_info_cache, GFP_KERNEL); if (!info) - return NULL; + goto out; + + up_write(¤t->mm->mmap_sem); + info->skb = NULL; info->next = info->prev = NULL; @@ -633,8 +594,7 @@ static struct page_info *alloc_page_info(struct page_ctor *ctor, for (i = 0; i < j; i++) mp_hash_insert(ctor, info->pages[i], info); } - /* increment the number of locked pages */ - ctor->lock_pages += j; + return info; failed: @@ -642,7 +602,9 @@ failed: put_page(info->pages[i]); kmem_cache_free(ext_page_info_cache, info); - + return NULL; +out: + up(¤t->mm->mmap_sem); return NULL; } @@ -1006,12 +968,6 @@ proceed: count--; } - if (!ctor->lock_pages || !ctor->rq_len) { - set_memlock_rlimit(ctor, RLIMIT_MEMLOCK, - iocb->ki_user_data * 4096 * 2, - iocb->ki_user_data * 4096 * 2); - } - /* Translate address to kernel */ info = alloc_page_info(ctor, iocb, iov, count, frags, npages, 0); if (!info) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index c4bc815..da78837 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -42,6 +42,7 @@ enum { }; static struct kmem_cache *notify_cache; +static struct rlimit orig_rlim; enum vhost_net_poll_state { VHOST_NET_POLL_DISABLED = 0, @@ -136,13 +137,7 @@ static void handle_async_rx_events_notify(struct vhost_net *net, struct vhost_log *vq_log = NULL; int rx_total_len = 0; unsigned int head, log, in, out; - int size; - int count; - - struct virtio_net_hdr_mrg_rxbuf hdr = { - .hdr.flags = 0, - .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE - }; + int size, free = 0; if (!is_async_vq(vq)) return; @@ -160,7 +155,7 @@ static void handle_async_rx_events_notify(struct vhost_net *net, size = iocb->ki_nbytes; head = iocb->ki_pos; rx_total_len += iocb->ki_nbytes; - + free += iocb->ki_user_data; if (iocb->ki_dtor) iocb->ki_dtor(iocb); kmem_cache_free(net->cache, iocb); @@ -192,6 +187,7 @@ static void handle_async_rx_events_notify(struct vhost_net *net, size = iocb->ki_nbytes; head = iocb->ki_pos; rx_total_len += iocb->ki_nbytes; + free += iocb->ki_user_data; if (iocb->ki_dtor) iocb->ki_dtor(iocb); @@ -211,7 +207,6 @@ static void handle_async_rx_events_notify(struct vhost_net *net, break; i++; - iocb == NULL; if (count) iocb = notify_dequeue(vq); } @@ -219,6 +214,10 @@ static void handle_async_rx_events_notify(struct vhost_net *net, &net->dev, vq, vq->heads, hc); } } + /* record locked memroy */ + down_write(¤t->mm->mmap_sem); + current->mm->locked_vm -= free; + up_write(¤t->mm->mmap_sem); } static void handle_async_tx_events_notify(struct vhost_net *net, @@ -227,7 +226,7 @@ static void handle_async_tx_events_notify(struct vhost_net *net, struct kiocb *iocb = NULL; struct list_head *entry, *tmp; unsigned long flags; - int tx_total_len = 0; + int tx_total_len = 0, free = 0; if (!is_async_vq(vq)) return; @@ -242,7 +241,7 @@ static void handle_async_tx_events_notify(struct vhost_net *net, vhost_add_used_and_signal(&net->dev, vq, iocb->ki_pos, 0); tx_total_len += iocb->ki_nbytes; - + free += iocb->ki_user_data; if (iocb->ki_dtor) iocb->ki_dtor(iocb); @@ -253,6 +252,10 @@ static void handle_async_tx_events_notify(struct vhost_net *net, } } spin_unlock_irqrestore(&vq->notify_lock, flags); + /* record locked memroy */ + down_write(¤t->mm->mmap_sem); + current->mm->locked_vm -= free; + up_write(¤t->mm->mmap_sem); } static struct kiocb *create_iocb(struct vhost_net *net, @@ -581,6 +584,7 @@ static void handle_rx_net(struct work_struct *work) static int vhost_net_open(struct inode *inode, struct file *f) { struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL); + struct rlimit *old_rlim; int r; if (!n) return -ENOMEM; @@ -597,6 +601,12 @@ static int vhost_net_open(struct inode *inode, struct file *f) n->tx_poll_state = VHOST_NET_POLL_DISABLED; n->cache = NULL; + old_rlim = current->signal->rlim + RLIMIT_MEMLOCK; + + /* remember the old rlimit value when backend enabled */ + orig_rlim.rlim_cur = old_rlim->rlim_cur; + orig_rlim.rlim_max = old_rlim->rlim_max; + f->private_data = n; return 0; @@ -659,6 +669,39 @@ static void vhost_net_flush(struct vhost_net *n) vhost_net_flush_vq(n, VHOST_NET_VQ_RX); } +static long vhost_net_set_mem_locked(struct vhost_net *n, + unsigned long cur, + unsigned long max) +{ + struct rlimit new_rlim, *old_rlim; + int retval = 0; + + mutex_lock(&n->dev.mutex); + new_rlim.rlim_cur = cur; + new_rlim.rlim_max = max; + + old_rlim = current->signal->rlim + RLIMIT_MEMLOCK; + + if ((new_rlim.rlim_max > old_rlim->rlim_max) && + !capable(CAP_SYS_RESOURCE)) { + retval = -EPERM; + goto err; + } + + retval = security_task_setrlimit(RLIMIT_MEMLOCK, &new_rlim); + if (retval) { + retval = retval; + goto err; + } + + task_lock(current->group_leader); + *old_rlim = new_rlim; + task_unlock(current->group_leader); +err: + mutex_unlock(&n->dev.mutex); + return retval; +} + static void vhost_async_cleanup(struct vhost_net *n) { /* clean the notifier */ @@ -691,6 +734,10 @@ static int vhost_net_release(struct inode *inode, struct file *f) * since jobs can re-queue themselves. */ vhost_net_flush(n); vhost_async_cleanup(n); + /* return back the rlimit */ + vhost_net_set_mem_locked(n, + orig_rlim.rlim_cur, + orig_rlim.rlim_max); kfree(n); return 0; } @@ -846,6 +893,7 @@ err: return r; } + static long vhost_net_reset_owner(struct vhost_net *n) { struct socket *tx_sock = NULL; @@ -913,6 +961,7 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl, void __user *argp = (void __user *)arg; u64 __user *featurep = argp; struct vhost_vring_file backend; + struct rlimit rlim; u64 features; int r; switch (ioctl) { @@ -933,6 +982,13 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl, return vhost_net_set_features(n, features); case VHOST_RESET_OWNER: return vhost_net_reset_owner(n); + case VHOST_SET_MEM_LOCKED: + r = copy_from_user(&rlim, argp, sizeof rlim); + if (r < 0) + return r; + return vhost_net_set_mem_locked(n, + rlim.rlim_cur, + rlim.rlim_max); default: mutex_lock(&n->dev.mutex); r = vhost_dev_ioctl(&n->dev, ioctl, arg); diff --git a/include/linux/vhost.h b/include/linux/vhost.h index e847f1e..df93f5a 100644 --- a/include/linux/vhost.h +++ b/include/linux/vhost.h @@ -92,6 +92,9 @@ struct vhost_memory { /* Specify an eventfd file descriptor to signal on log write. */ #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) +/* Specify how much locked memory can be used */ +#define VHOST_SET_MEM_LOCKED _IOW(VHOST_VIRTIO, 0x08, struct rlimit) + /* Ring setup. */ /* Set number of descriptors in ring. This parameter can not * be modified while ring is running (bound to a device). */ -- 1.5.4.4 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html