On Mon, Apr 29, 2019 at 11:11:49AM +0200, gregkh@xxxxxxxxxxxxxxxxxxx wrote: > > This is a note to let you know that I've just added the patch titled > > RDMA/ucontext: Fix regression with disassociate > > to the 5.0-stable tree which can be found at: > http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary > > The filename of the patch is: > rdma-ucontext-fix-regression-with-disassociate.patch > and it can be found in the queue-5.0 subdirectory. > > If you, or anyone else, feels it should not be added to the stable tree, > please let <stable@xxxxxxxxxxxxxxx> know about it. Greg, Please be aware that this patch has compilation issues on s390 platform. https://patchwork.kernel.org/patch/10920895/#22610993 Thanks > > > From 67f269b37f9b4d52c5e7f97acea26c0852e9b8a1 Mon Sep 17 00:00:00 2001 > From: Jason Gunthorpe <jgg@xxxxxxxxxxxx> > Date: Tue, 16 Apr 2019 14:07:28 +0300 > Subject: RDMA/ucontext: Fix regression with disassociate > > From: Jason Gunthorpe <jgg@xxxxxxxxxxxx> > > commit 67f269b37f9b4d52c5e7f97acea26c0852e9b8a1 upstream. > > When this code was consolidated the intention was that the VMA would > become backed by anonymous zero pages after the zap_vma_pte - however this > very subtly relied on setting the vm_ops = NULL and clearing the VM_SHARED > bits to transform the VMA into an anonymous VMA. Since the vm_ops was > removed this broke. > > Now userspace gets a SIGBUS if it touches the vma after disassociation. > > Instead of converting the VMA to anonymous provide a fault handler that > puts a zero'd page into the VMA when user-space touches it after > disassociation. > > Cc: stable@xxxxxxxxxxxxxxx > Suggested-by: Andrea Arcangeli <aarcange@xxxxxxxxxx> > Fixes: 5f9794dc94f5 ("RDMA/ucontext: Add a core API for mmaping driver IO memory") > Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxxxx> > Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxxxx> > Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxxxx> > Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> > > --- > drivers/infiniband/core/uverbs.h | 1 > drivers/infiniband/core/uverbs_main.c | 52 ++++++++++++++++++++++++++++++++-- > 2 files changed, 50 insertions(+), 3 deletions(-) > > --- a/drivers/infiniband/core/uverbs.h > +++ b/drivers/infiniband/core/uverbs.h > @@ -160,6 +160,7 @@ struct ib_uverbs_file { > > struct mutex umap_lock; > struct list_head umaps; > + struct page *disassociate_page; > > struct idr idr; > /* spinlock protects write access to idr */ > --- a/drivers/infiniband/core/uverbs_main.c > +++ b/drivers/infiniband/core/uverbs_main.c > @@ -208,6 +208,9 @@ void ib_uverbs_release_file(struct kref > kref_put(&file->async_file->ref, > ib_uverbs_release_async_event_file); > put_device(&file->device->dev); > + > + if (file->disassociate_page) > + __free_pages(file->disassociate_page, 0); > kfree(file); > } > > @@ -876,9 +879,50 @@ static void rdma_umap_close(struct vm_ar > kfree(priv); > } > > +/* > + * Once the zap_vma_ptes has been called touches to the VMA will come here and > + * we return a dummy writable zero page for all the pfns. > + */ > +static vm_fault_t rdma_umap_fault(struct vm_fault *vmf) > +{ > + struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data; > + struct rdma_umap_priv *priv = vmf->vma->vm_private_data; > + vm_fault_t ret = 0; > + > + if (!priv) > + return VM_FAULT_SIGBUS; > + > + /* Read only pages can just use the system zero page. */ > + if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) { > + vmf->page = ZERO_PAGE(vmf->vm_start); > + get_page(vmf->page); > + return 0; > + } > + > + mutex_lock(&ufile->umap_lock); > + if (!ufile->disassociate_page) > + ufile->disassociate_page = > + alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0); > + > + if (ufile->disassociate_page) { > + /* > + * This VMA is forced to always be shared so this doesn't have > + * to worry about COW. > + */ > + vmf->page = ufile->disassociate_page; > + get_page(vmf->page); > + } else { > + ret = VM_FAULT_SIGBUS; > + } > + mutex_unlock(&ufile->umap_lock); > + > + return ret; > +} > + > static const struct vm_operations_struct rdma_umap_ops = { > .open = rdma_umap_open, > .close = rdma_umap_close, > + .fault = rdma_umap_fault, > }; > > static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext, > @@ -888,6 +932,9 @@ static struct rdma_umap_priv *rdma_user_ > struct ib_uverbs_file *ufile = ucontext->ufile; > struct rdma_umap_priv *priv; > > + if (!(vma->vm_flags & VM_SHARED)) > + return ERR_PTR(-EINVAL); > + > if (vma->vm_end - vma->vm_start != size) > return ERR_PTR(-EINVAL); > > @@ -991,7 +1038,7 @@ void uverbs_user_mmap_disassociate(struc > * at a time to get the lock ordering right. Typically there > * will only be one mm, so no big deal. > */ > - down_write(&mm->mmap_sem); > + down_read(&mm->mmap_sem); > if (!mmget_still_valid(mm)) > goto skip_mm; > mutex_lock(&ufile->umap_lock); > @@ -1005,11 +1052,10 @@ void uverbs_user_mmap_disassociate(struc > > zap_vma_ptes(vma, vma->vm_start, > vma->vm_end - vma->vm_start); > - vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE); > } > mutex_unlock(&ufile->umap_lock); > skip_mm: > - up_write(&mm->mmap_sem); > + up_read(&mm->mmap_sem); > mmput(mm); > } > } > > > Patches currently in stable-queue which might be from jgg@xxxxxxxxxxxx are > > queue-5.0/rdma-ucontext-fix-regression-with-disassociate.patch > queue-5.0/ib-rdmavt-fix-frwr-memory-registration.patch > queue-5.0/rdma-mlx5-use-rdma_user_map_io-for-mapping-bar-pages.patch > queue-5.0/rdma-mlx5-do-not-allow-the-user-to-write-to-the-clock-page.patch