On 2024/9/2 14:57, Leon Romanovsky wrote: > On Wed, Aug 28, 2024 at 02:46:04PM +0800, Junxian Huang wrote: >> From: Chengchang Tang <tangchengchang@xxxxxxxxxx> >> >> Provide a new api rdma_user_mmap_disassociate() for drivers to >> disassociate mmap pages for a device. >> >> Signed-off-by: Chengchang Tang <tangchengchang@xxxxxxxxxx> >> Signed-off-by: Junxian Huang <huangjunxian6@xxxxxxxxxxxxx> >> --- >> drivers/infiniband/core/uverbs.h | 3 ++ >> drivers/infiniband/core/uverbs_main.c | 45 +++++++++++++++++++++++++-- >> include/rdma/ib_verbs.h | 8 +++++ >> 3 files changed, 54 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h >> index 821d93c8f712..0999d27cb1c9 100644 >> --- a/drivers/infiniband/core/uverbs.h >> +++ b/drivers/infiniband/core/uverbs.h >> @@ -160,6 +160,9 @@ struct ib_uverbs_file { >> struct page *disassociate_page; >> >> struct xarray idr; >> + >> + struct mutex disassociation_lock; >> + atomic_t disassociated; >> }; >> >> struct ib_uverbs_event { >> diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c >> index bc099287de9a..589f27c09a2e 100644 >> --- a/drivers/infiniband/core/uverbs_main.c >> +++ b/drivers/infiniband/core/uverbs_main.c >> @@ -76,6 +76,7 @@ static dev_t dynamic_uverbs_dev; >> static DEFINE_IDA(uverbs_ida); >> static int ib_uverbs_add_one(struct ib_device *device); >> static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); >> +static struct ib_client uverbs_client; >> >> static char *uverbs_devnode(const struct device *dev, umode_t *mode) >> { >> @@ -217,6 +218,7 @@ void ib_uverbs_release_file(struct kref *ref) >> >> if (file->disassociate_page) >> __free_pages(file->disassociate_page, 0); >> + mutex_destroy(&file->disassociation_lock); >> mutex_destroy(&file->umap_lock); >> mutex_destroy(&file->ucontext_lock); >> kfree(file); >> @@ -700,6 +702,12 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) >> ret = PTR_ERR(ucontext); >> goto out; >> } >> + >> + if (atomic_read(&file->disassociated)) { > > I don't see any of the newly introduced locks here. If it is > intentional, it needs to be documented. > <...> >> + ret = -EPERM; >> + goto out; >> + } >> + >> vma->vm_ops = &rdma_umap_ops; >> ret = ucontext->device->ops.mmap(ucontext, vma); >> out: >> @@ -726,7 +734,7 @@ static void rdma_umap_open(struct vm_area_struct *vma) >> /* >> * Disassociation already completed, the VMA should already be zapped. >> */ >> - if (!ufile->ucontext) >> + if (!ufile->ucontext || atomic_read(&ufile->disassociated)) >> goto out_unlock; >> >> priv = kzalloc(sizeof(*priv), GFP_KERNEL); >> @@ -822,6 +830,8 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) >> struct rdma_umap_priv *priv, *next_priv; >> >> lockdep_assert_held(&ufile->hw_destroy_rwsem); >> + mutex_lock(&ufile->disassociation_lock); >> + atomic_set(&ufile->disassociated, 1); > > Why do you use atomic_t and not regular bool? > The original thought was that ib_uverbs_mmap() reads ufile->disassociated while uverbs_user_mmap_disassociate() writes it, and there might be a racing. We tried to use atomic_t to avoid racing without adding locks. But I looked into the code again and now I think ufile->disassociated is not sufficient to deal with the racing like this: ib_uverbs_mmap() uverbs_user_mmap_disassociate() ---------------- ------------------------------ atomic_read(&file->disassociated) == 0 atomic_set(&ufile->disassociated, 1) all mmaps from the list are zapped ucontext->device->ops.mmap(ucontext, vma) rdma_user_mmap_io() rdma_umap_priv_init() adds a new mmap to the list So we may still need a lock, and ufile->disassociated can be bool now, something like: @@ -700,9 +702,17 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) ret = PTR_ERR(ucontext); goto out; } + + mutex_lock(&file->disassociation_lock); + if (file->disassociated) { + ret = -EPERM; + goto out; + } + vma->vm_ops = &rdma_umap_ops; ret = ucontext->device->ops.mmap(ucontext, vma); out: + mutex_unlock(&file->disassociation_lock); srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); return ret; } Similar changes on rdma_umap_open(): @@ -723,10 +733,12 @@ static void rdma_umap_open(struct vm_area_struct *vma) /* We are racing with disassociation */ if (!down_read_trylock(&ufile->hw_destroy_rwsem)) goto out_zap; + + mutex_lock(&ufile->disassociation_lock); /* * Disassociation already completed, the VMA should already be zapped. */ - if (!ufile->ucontext) + if (!ufile->ucontext || ufile->disassociated) goto out_unlock; priv = kzalloc(sizeof(*priv), GFP_KERNEL); @@ -734,10 +746,12 @@ static void rdma_umap_open(struct vm_area_struct *vma) goto out_unlock; rdma_umap_priv_init(priv, vma, opriv->entry); + mutex_unlock(&ufile->disassociation_lock); up_read(&ufile->hw_destroy_rwsem); return; out_unlock: + mutex_unlock(&ufile->disassociation_lock); up_read(&ufile->hw_destroy_rwsem); out_zap: /* >> >> while (1) { >> struct mm_struct *mm = NULL; >> @@ -847,8 +857,10 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) >> break; >> } >> mutex_unlock(&ufile->umap_lock); >> - if (!mm) >> + if (!mm) { >> + mutex_unlock(&ufile->disassociation_lock); >> return; >> + } >> >> /* >> * The umap_lock is nested under mmap_lock since it used within >> @@ -878,8 +890,34 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) >> mmap_read_unlock(mm); >> mmput(mm); >> } >> + >> + mutex_unlock(&ufile->disassociation_lock); >> } >> >> +/** >> + * rdma_user_mmap_disassociate() - Revoke mmaps for a device >> + * @device: device to revoke >> + * >> + * This function should be called by drivers that need to disable mmaps for the >> + * device, for instance because it is going to be reset. >> + */ >> +void rdma_user_mmap_disassociate(struct ib_device *device) >> +{ >> + struct ib_uverbs_device *uverbs_dev = >> + ib_get_client_data(device, &uverbs_client); >> + struct ib_uverbs_file *ufile; >> + >> + mutex_lock(&uverbs_dev->lists_mutex); >> + list_for_each_entry(ufile, &uverbs_dev->uverbs_file_list, list) { >> + down_read(&ufile->hw_destroy_rwsem); > > I personally don't understand this locking scheme at all. I see newly > introduced locks mixed together some old locks. > We must hold the rwsem because of the lockdep. The newly introduced lock is also needed to prevent the racing that one thread is calling rdma_user_mmap_disassociate(), while the other thread is calling ib_uverbs_mmap() or rdma_umap_open(). Thanks, Junxian > Jason, do you agree with this proposed locking scheme? > > Thanks > >> + if (ufile->ucontext && !atomic_read(&ufile->disassociated)) >> + uverbs_user_mmap_disassociate(ufile); >> + up_read(&ufile->hw_destroy_rwsem); >> + } >> + mutex_unlock(&uverbs_dev->lists_mutex); >> +} >> +EXPORT_SYMBOL(rdma_user_mmap_disassociate); >> + >> /* >> * ib_uverbs_open() does not need the BKL: >> * >> @@ -949,6 +987,9 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) >> mutex_init(&file->umap_lock); >> INIT_LIST_HEAD(&file->umaps); >> >> + mutex_init(&file->disassociation_lock); >> + atomic_set(&file->disassociated, 0); >> + >> filp->private_data = file; >> list_add_tail(&file->list, &dev->uverbs_file_list); >> mutex_unlock(&dev->lists_mutex); >> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h >> index a1dcf812d787..09b80c8253e2 100644 >> --- a/include/rdma/ib_verbs.h >> +++ b/include/rdma/ib_verbs.h >> @@ -2948,6 +2948,14 @@ int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext, >> size_t length, u32 min_pgoff, >> u32 max_pgoff); >> >> +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) >> +void rdma_user_mmap_disassociate(struct ib_device *device); >> +#else >> +static inline void rdma_user_mmap_disassociate(struct ib_device *device) >> +{ >> +} >> +#endif >> + >> static inline int >> rdma_user_mmap_entry_insert_exact(struct ib_ucontext *ucontext, >> struct rdma_user_mmap_entry *entry, >> -- >> 2.33.0 >> >>