On 2024/8/23 23:25, Jason Gunthorpe wrote: > On Mon, Aug 12, 2024 at 08:56:38PM +0800, Junxian Huang wrote: >> From: Chengchang Tang <tangchengchang@xxxxxxxxxx> >> >> Provide a new api rdma_user_mmap_disassociate() for drivers to >> disassociate mmap pages for ucontext. >> >> Signed-off-by: Chengchang Tang <tangchengchang@xxxxxxxxxx> >> Signed-off-by: Junxian Huang <huangjunxian6@xxxxxxxxxxxxx> >> --- >> drivers/infiniband/core/uverbs_main.c | 21 +++++++++++++++++++++ >> include/rdma/ib_verbs.h | 1 + >> 2 files changed, 22 insertions(+) >> >> diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c >> index bc099287de9a..00dab5bfb78c 100644 >> --- a/drivers/infiniband/core/uverbs_main.c >> +++ b/drivers/infiniband/core/uverbs_main.c >> @@ -880,6 +880,27 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) >> } >> } >> >> +/** >> + * rdma_user_mmap_disassociate() - disassociate the mmap from the ucontext. >> + * >> + * @ucontext: associated user context. >> + * >> + * This function should be called by drivers that need to disable mmap for >> + * some ucontexts. >> + */ >> +void rdma_user_mmap_disassociate(struct ib_ucontext *ucontext) >> +{ >> + struct ib_uverbs_file *ufile = ucontext->ufile; >> + >> + /* Racing with uverbs_destroy_ufile_hw */ >> + if (!down_read_trylock(&ufile->hw_destroy_rwsem)) >> + return; > > This is not quite right here, in the other cases lock failure is > aborting an operation that is about to start, in this case we are must > ensure the zap completed otherwise we break the contract of no mmaps > upon return. > > So at least this needs to be a naked down_read() > > But.. > > That lock lockdep assertion in uverbs_user_mmap_disassociate() I think > was supposed to say the write side is held, which we can't actually > get here. > > This is because the nasty algorithm works by pulling things off the > list, if we don't have a lock then one thread could be working on an > item while another thread is unaware which will also break the > contract. > > You may need to add a dedicated mutex inside > uverbs_user_mmap_disassociate() and not try to re-use > hw_destroy_rwsem. > Thanks for the reply, Jason. Based on your modification in patch #3, we plan to change the codes like this (some init and destroy are omitted here). We'll re-send as soon as we finish the testings. diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 821d93c8f712..05b589aad5ef 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -160,6 +160,9 @@ struct ib_uverbs_file { struct page *disassociate_page; struct xarray idr; + + struct mutex disassociation_lock; + bool disassociated; }; struct ib_uverbs_event { diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 6b4d5a660a2f..6503f9a23211 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -722,12 +722,12 @@ static void rdma_umap_open(struct vm_area_struct *vma) return; /* We are racing with disassociation */ - if (!down_read_trylock(&ufile->hw_destroy_rwsem)) + if (!mutex_trylock(&ufile->disassociation_lock)) goto out_zap; /* * Disassociation already completed, the VMA should already be zapped. */ - if (!ufile->ucontext) + if (!ufile->ucontext || ufile->disassociated) goto out_unlock; priv = kzalloc(sizeof(*priv), GFP_KERNEL); @@ -735,11 +735,11 @@ static void rdma_umap_open(struct vm_area_struct *vma) goto out_unlock; rdma_umap_priv_init(priv, vma, opriv->entry); - up_read(&ufile->hw_destroy_rwsem); + mutex_unlock(&ufile->disassociation_lock); return; out_unlock: - up_read(&ufile->hw_destroy_rwsem); + mutex_unlock(&ufile->disassociation_lock); out_zap: /* * We can't allow the VMA to be created with the actual IO pages, that @@ -823,6 +823,8 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) struct rdma_umap_priv *priv, *next_priv; lockdep_assert_held(&ufile->hw_destroy_rwsem); + mutex_lock(&ufile->disassociation_lock); + ufile->disassociated = true; while (1) { struct mm_struct *mm = NULL; @@ -848,8 +850,10 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) break; } mutex_unlock(&ufile->umap_lock); - if (!mm) + if (!mm) { + mutex_unlock(&ufile->disassociation_lock); return; + } /* * The umap_lock is nested under mmap_lock since it used within @@ -879,6 +883,8 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) mmap_read_unlock(mm); mmput(mm); } + + mutex_unlock(&ufile->disassociation_lock); } /** @@ -897,7 +903,8 @@ void rdma_user_mmap_disassociate(struct ib_device *device) mutex_lock(&uverbs_dev->lists_mutex); list_for_each_entry(ufile, &uverbs_dev->uverbs_file_list, list) { down_read(&ufile->hw_destroy_rwsem); - uverbs_user_mmap_disassociate(ufile); + if (ufile->ucontext && !ufile->disassociated) + uverbs_user_mmap_disassociate(ufile); up_read(&ufile->hw_destroy_rwsem); } mutex_unlock(&uverbs_dev->lists_mutex); > Jason