for SRIOV strict mode gpu reset: In kms open we mark the latest adev->gpu_reset_counter in fpriv we return -ENODEV in cs_ioctl or info_ioctl if they found fpriv->gpu_reset_counter != adev->gpu_reset_counter. this way we prevent a potential bad process/FD from submitting cmds and notify userspace with -ENODEV. userspace should close all BO/ctx and re-open dri FD to re-create virtual memory system for this process Change-Id: Ib4c179f28a3d0783837566f29de07fc14aa9b9a4 Signed-off-by: Monk Liu <Monk.Liu at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 7 +++++++ 3 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index de9c164..b40d4ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -772,6 +772,7 @@ struct amdgpu_fpriv { struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; u32 vram_lost_counter; + int gpu_reset_counter; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9467cf6..6a1515e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1199,6 +1199,11 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (amdgpu_kms_vram_lost(adev, fpriv)) return -ENODEV; + if (amdgpu_sriov_vf(adev) && + amdgpu_sriov_reset_level == 1 && + fpriv->gpu_reset_counter < atomic_read(&adev->gpu_reset_counter)) + return -ENODEV; + parser.adev = adev; parser.filp = filp; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 282f45b..bd389cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -285,6 +285,11 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file if (amdgpu_kms_vram_lost(adev, fpriv)) return -ENODEV; + if (amdgpu_sriov_vf(adev) && + amdgpu_sriov_reset_level == 1 && + fpriv->gpu_reset_counter < atomic_read(&adev->gpu_reset_counter)) + return -ENODEV; + switch (info->query) { case AMDGPU_INFO_ACCEL_WORKING: ui32 = adev->accel_working; @@ -824,6 +829,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) goto out_suspend; } + fpriv->gpu_reset_counter = atomic_read(&adev->gpu_reset_counter); + r = amdgpu_vm_init(adev, &fpriv->vm, AMDGPU_VM_CONTEXT_GFX, 0); if (r) { -- 2.7.4