Re: [PATCH v3 for-next 1/2] RDMA/core: Provide rdma_user_mmap_disassociate() to disassociate mmap pages

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 2024/9/2 14:57, Leon Romanovsky wrote:
> On Wed, Aug 28, 2024 at 02:46:04PM +0800, Junxian Huang wrote:
>> From: Chengchang Tang <tangchengchang@xxxxxxxxxx>
>>
>> Provide a new api rdma_user_mmap_disassociate() for drivers to
>> disassociate mmap pages for a device.
>>
>> Signed-off-by: Chengchang Tang <tangchengchang@xxxxxxxxxx>
>> Signed-off-by: Junxian Huang <huangjunxian6@xxxxxxxxxxxxx>
>> ---
>>  drivers/infiniband/core/uverbs.h      |  3 ++
>>  drivers/infiniband/core/uverbs_main.c | 45 +++++++++++++++++++++++++--
>>  include/rdma/ib_verbs.h               |  8 +++++
>>  3 files changed, 54 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
>> index 821d93c8f712..0999d27cb1c9 100644
>> --- a/drivers/infiniband/core/uverbs.h
>> +++ b/drivers/infiniband/core/uverbs.h
>> @@ -160,6 +160,9 @@ struct ib_uverbs_file {
>>  	struct page *disassociate_page;
>>  
>>  	struct xarray		idr;
>> +
>> +	struct mutex disassociation_lock;
>> +	atomic_t disassociated;
>>  };
>>  
>>  struct ib_uverbs_event {
>> diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
>> index bc099287de9a..589f27c09a2e 100644
>> --- a/drivers/infiniband/core/uverbs_main.c
>> +++ b/drivers/infiniband/core/uverbs_main.c
>> @@ -76,6 +76,7 @@ static dev_t dynamic_uverbs_dev;
>>  static DEFINE_IDA(uverbs_ida);
>>  static int ib_uverbs_add_one(struct ib_device *device);
>>  static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
>> +static struct ib_client uverbs_client;
>>  
>>  static char *uverbs_devnode(const struct device *dev, umode_t *mode)
>>  {
>> @@ -217,6 +218,7 @@ void ib_uverbs_release_file(struct kref *ref)
>>  
>>  	if (file->disassociate_page)
>>  		__free_pages(file->disassociate_page, 0);
>> +	mutex_destroy(&file->disassociation_lock);
>>  	mutex_destroy(&file->umap_lock);
>>  	mutex_destroy(&file->ucontext_lock);
>>  	kfree(file);
>> @@ -700,6 +702,12 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
>>  		ret = PTR_ERR(ucontext);
>>  		goto out;
>>  	}
>> +
>> +	if (atomic_read(&file->disassociated)) {
> 
> I don't see any of the newly introduced locks here. If it is
> intentional, it needs to be documented.
> 

<...>

>> +		ret = -EPERM;
>> +		goto out;
>> +	}
>> +
>>  	vma->vm_ops = &rdma_umap_ops;
>>  	ret = ucontext->device->ops.mmap(ucontext, vma);
>>  out:
>> @@ -726,7 +734,7 @@ static void rdma_umap_open(struct vm_area_struct *vma)
>>  	/*
>>  	 * Disassociation already completed, the VMA should already be zapped.
>>  	 */
>> -	if (!ufile->ucontext)
>> +	if (!ufile->ucontext || atomic_read(&ufile->disassociated))
>>  		goto out_unlock;
>>  
>>  	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
>> @@ -822,6 +830,8 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
>>  	struct rdma_umap_priv *priv, *next_priv;
>>  
>>  	lockdep_assert_held(&ufile->hw_destroy_rwsem);
>> +	mutex_lock(&ufile->disassociation_lock);
>> +	atomic_set(&ufile->disassociated, 1);
> 
> Why do you use atomic_t and not regular bool?
> 

The original thought was that ib_uverbs_mmap() reads ufile->disassociated while
uverbs_user_mmap_disassociate() writes it, and there might be a racing. We tried
to use atomic_t to avoid racing without adding locks.

But I looked into the code again and now I think ufile->disassociated is not
sufficient to deal with the racing like this:

ib_uverbs_mmap()                                       uverbs_user_mmap_disassociate()
----------------                                       ------------------------------
atomic_read(&file->disassociated) == 0
                                                       atomic_set(&ufile->disassociated, 1)

                                                       all mmaps from the list are zapped

ucontext->device->ops.mmap(ucontext, vma)
rdma_user_mmap_io()
rdma_umap_priv_init() adds a new mmap to the list


So we may still need a lock, and ufile->disassociated can be bool now, something like:

@@ -700,9 +702,17 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
                ret = PTR_ERR(ucontext);
                goto out;
        }
+
+       mutex_lock(&file->disassociation_lock);
+       if (file->disassociated) {
+               ret = -EPERM;
+               goto out;
+       }
+
        vma->vm_ops = &rdma_umap_ops;
        ret = ucontext->device->ops.mmap(ucontext, vma);
 out:
+       mutex_unlock(&file->disassociation_lock);
        srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
        return ret;
 }

Similar changes on rdma_umap_open():

@@ -723,10 +733,12 @@ static void rdma_umap_open(struct vm_area_struct *vma)
        /* We are racing with disassociation */
        if (!down_read_trylock(&ufile->hw_destroy_rwsem))
                goto out_zap;
+
+       mutex_lock(&ufile->disassociation_lock);
        /*
         * Disassociation already completed, the VMA should already be zapped.
         */
-       if (!ufile->ucontext)
+       if (!ufile->ucontext || ufile->disassociated)
                goto out_unlock;

        priv = kzalloc(sizeof(*priv), GFP_KERNEL);
@@ -734,10 +746,12 @@ static void rdma_umap_open(struct vm_area_struct *vma)
                goto out_unlock;
        rdma_umap_priv_init(priv, vma, opriv->entry);

+       mutex_unlock(&ufile->disassociation_lock);
        up_read(&ufile->hw_destroy_rwsem);
        return;

 out_unlock:
+       mutex_unlock(&ufile->disassociation_lock);
        up_read(&ufile->hw_destroy_rwsem);
 out_zap:
        /*

>>  
>>  	while (1) {
>>  		struct mm_struct *mm = NULL;
>> @@ -847,8 +857,10 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
>>  			break;
>>  		}
>>  		mutex_unlock(&ufile->umap_lock);
>> -		if (!mm)
>> +		if (!mm) {
>> +			mutex_unlock(&ufile->disassociation_lock);
>>  			return;
>> +		}
>>  
>>  		/*
>>  		 * The umap_lock is nested under mmap_lock since it used within
>> @@ -878,8 +890,34 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
>>  		mmap_read_unlock(mm);
>>  		mmput(mm);
>>  	}
>> +
>> +	mutex_unlock(&ufile->disassociation_lock);
>>  }
>>  
>> +/**
>> + * rdma_user_mmap_disassociate() - Revoke mmaps for a device
>> + * @device: device to revoke
>> + *
>> + * This function should be called by drivers that need to disable mmaps for the
>> + * device, for instance because it is going to be reset.
>> + */
>> +void rdma_user_mmap_disassociate(struct ib_device *device)
>> +{
>> +	struct ib_uverbs_device *uverbs_dev =
>> +		ib_get_client_data(device, &uverbs_client);
>> +	struct ib_uverbs_file *ufile;
>> +
>> +	mutex_lock(&uverbs_dev->lists_mutex);
>> +	list_for_each_entry(ufile, &uverbs_dev->uverbs_file_list, list) {
>> +		down_read(&ufile->hw_destroy_rwsem);
> 
> I personally don't understand this locking scheme at all. I see newly
> introduced locks mixed together some old locks. 
> 

We must hold the rwsem because of the lockdep. The newly introduced lock
is also needed to prevent the racing that one thread is calling
rdma_user_mmap_disassociate(), while the other thread is calling
ib_uverbs_mmap() or rdma_umap_open().

Thanks,
Junxian

> Jason, do you agree with this proposed locking scheme?
> 
> Thanks
> 
>> +		if (ufile->ucontext && !atomic_read(&ufile->disassociated))
>> +			uverbs_user_mmap_disassociate(ufile);
>> +		up_read(&ufile->hw_destroy_rwsem);
>> +	}
>> +	mutex_unlock(&uverbs_dev->lists_mutex);
>> +}
>> +EXPORT_SYMBOL(rdma_user_mmap_disassociate);
>> +
>>  /*
>>   * ib_uverbs_open() does not need the BKL:
>>   *
>> @@ -949,6 +987,9 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
>>  	mutex_init(&file->umap_lock);
>>  	INIT_LIST_HEAD(&file->umaps);
>>  
>> +	mutex_init(&file->disassociation_lock);
>> +	atomic_set(&file->disassociated, 0);
>> +
>>  	filp->private_data = file;
>>  	list_add_tail(&file->list, &dev->uverbs_file_list);
>>  	mutex_unlock(&dev->lists_mutex);
>> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
>> index a1dcf812d787..09b80c8253e2 100644
>> --- a/include/rdma/ib_verbs.h
>> +++ b/include/rdma/ib_verbs.h
>> @@ -2948,6 +2948,14 @@ int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
>>  				      size_t length, u32 min_pgoff,
>>  				      u32 max_pgoff);
>>  
>> +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
>> +void rdma_user_mmap_disassociate(struct ib_device *device);
>> +#else
>> +static inline void rdma_user_mmap_disassociate(struct ib_device *device)
>> +{
>> +}
>> +#endif
>> +
>>  static inline int
>>  rdma_user_mmap_entry_insert_exact(struct ib_ucontext *ucontext,
>>  				  struct rdma_user_mmap_entry *entry,
>> -- 
>> 2.33.0
>>
>>




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux