[PATCH 3/3] drm/amdgpu: fix gpu reset issue

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Roger,

Could you try to move 'id->current_gpu_reset_count = 
atomic_read(&adev->gpu_reset_counter);' in amdgpu_vm_grab_id to 
no_flush_needed label.

Regards,
David Zhou

On 2017å¹´04æ??21æ?¥ 16:27, Christian König wrote:
> NAK, that is exactly what we wanted to avoid.
>
> We used to have an exclusive lock for this and it cause a whole bunch 
> of problems.
>
> Please elaborate why that should be necessary.
>
> Regards,
> Christian.
>
> Am 21.04.2017 um 09:08 schrieb Roger.He:
>> Change-Id: Ib77d33a09f348ebf2e3a9d7861411f4b951ebf7c
>> Signed-off-by: Roger.He <Hongbo.He at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  6 ++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 16 +++++++++++++++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  4 ++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c  |  7 ++++++-
>>   4 files changed, 31 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 71364f5..ab0ffa8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -1526,6 +1526,7 @@ struct amdgpu_device {
>>       atomic64_t            num_bytes_moved;
>>       atomic64_t            num_evictions;
>>       atomic_t            gpu_reset_counter;
>> +    atomic_t            gpu_reset_state;
>>         /* data for buffer migration throttling */
>>       struct {
>> @@ -1851,6 +1852,11 @@ amdgpu_get_sdma_instance(struct amdgpu_ring 
>> *ring)
>>   #define amdgpu_psp_check_fw_loading_status(adev, i) 
>> (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
>>     /* Common functions */
>> +static inline int amdgpu_device_is_reset(struct amdgpu_device *adev)
>> +{
>> +    return atomic_read(&adev->gpu_reset_state);
>> +}
>> +
>>   int amdgpu_gpu_reset(struct amdgpu_device *adev);
>>   bool amdgpu_need_backup(struct amdgpu_device *adev);
>>   void amdgpu_pci_config_reset(struct amdgpu_device *adev);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index f882496..0fb4716 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -1894,6 +1894,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>       mutex_init(&adev->grbm_idx_mutex);
>>       mutex_init(&adev->mn_lock);
>>       hash_init(adev->mn_hash);
>> +    atomic_set(&adev->gpu_reset_state, 0);
>>         amdgpu_check_arguments(adev);
>>   @@ -2655,6 +2656,18 @@ int amdgpu_sriov_gpu_reset(struct 
>> amdgpu_device *adev, bool voluntary)
>>   }
>>     /**
>> + * amdgpu_device_set_reset_state - set gpu reset state
>> + *
>> + * @adev: amdgpu device pointer
>> + * @state: true when start to reset gpu; false: reset done
>> + */
>> +static inline void amdgpu_device_set_reset_state(struct 
>> amdgpu_device *adev,
>> +                            bool state)
>> +{
>> +    atomic_set(&adev->gpu_reset_state, state);
>> +}
>> +
>> +/**
>>    * amdgpu_gpu_reset - reset the asic
>>    *
>>    * @adev: amdgpu device pointer
>> @@ -2678,7 +2691,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
>>       }
>>         atomic_inc(&adev->gpu_reset_counter);
>> -
>> +    amdgpu_device_set_reset_state(adev, true);
>>       /* block TTM */
>>       resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
>>       /* store modesetting */
>> @@ -2811,6 +2824,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
>>           dev_info(adev->dev, "GPU reset failed\n");
>>       }
>>   +    amdgpu_device_set_reset_state(adev, false);
>>       return r;
>>   }
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index ead00d7..8cc14af 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -679,11 +679,15 @@ long amdgpu_drm_ioctl(struct file *filp,
>>       struct drm_file *file_priv = filp->private_data;
>>       struct drm_device *dev;
>>       long ret;
>> +
>>       dev = file_priv->minor->dev;
>>       ret = pm_runtime_get_sync(dev->dev);
>>       if (ret < 0)
>>           return ret;
>>   +    while (amdgpu_device_is_reset(dev->dev_private))
>> +        msleep(100);
>> +
>>       ret = drm_ioctl(filp, cmd, arg);
>>         pm_runtime_mark_last_busy(dev->dev);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
>> index 2648291..22b8059 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
>> @@ -36,10 +36,15 @@
>>   long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, 
>> unsigned long arg)
>>   {
>>       unsigned int nr = DRM_IOCTL_NR(cmd);
>> +    struct drm_file *file_priv = filp->private_data;
>> +    struct amdgpu_device *adev = file_priv->minor->dev->dev_private;
>>       int ret;
>>   -    if (nr < DRM_COMMAND_BASE)
>> +    if (nr < DRM_COMMAND_BASE) {
>> +        while (amdgpu_device_is_reset(adev))
>> +            msleep(100);
>>           return drm_compat_ioctl(filp, cmd, arg);
>> +    }
>>         ret = amdgpu_drm_ioctl(filp, cmd, arg);
>
>



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux