Add two sysfs interfaces for gfx and compute: gfx_reset_mask compute_reset_mask These interfaces are read-only and show the resets supported by the IP. For example, full adapter reset (mode1/mode2/BACO/etc), soft reset, queue reset, and pipe reset. V2: the sysfs node returns a text string instead of some flags (Christian) v3: add a generic helper which takes the ring as parameter and print the strings in the order they are applied (Christian) check amdgpu_gpu_recovery before creating sysfs file itself, and initialize supported_reset_types in IP version files (Lijo) Signed-off-by: Jesse Zhang <Jesse.Zhang@xxxxxxx> Suggested-by:Alex Deucher <alexander.deucher@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 37 ++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 66 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 ++ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 6 ++ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 14 +++++ drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 12 ++++ 7 files changed, 147 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 48c9b9b06905..aea1031d7b84 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -300,6 +300,12 @@ extern int amdgpu_wbrf; #define AMDGPU_RESET_VCE (1 << 13) #define AMDGPU_RESET_VCE1 (1 << 14) +/* reset mask */ +#define AMDGPU_RESET_TYPE_FULL (1 << 0) /* full adapter reset, mode1/mode2/BACO/etc. */ +#define AMDGPU_RESET_TYPE_SOFT_RESET (1 << 1) /* IP level soft reset */ +#define AMDGPU_RESET_TYPE_PER_QUEUE (1 << 2) /* per queue */ +#define AMDGPU_RESET_TYPE_PER_PIPE (1 << 3) /* per pipe */ + /* max cursor sizes (in pixels) */ #define CIK_CURSOR_WIDTH 128 #define CIK_CURSOR_HEIGHT 128 @@ -1466,6 +1472,8 @@ struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev); struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, struct dma_fence *gang); bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev); +ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring); +ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset); /* atpx handler */ #if defined(CONFIG_VGA_SWITCHEROO) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ef715b2bbcdb..cd1e3f018893 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -6684,3 +6684,40 @@ uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev, } return ret; } + +ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring) +{ + ssize_t size = 0; + + if (!ring) + return size; + + if (amdgpu_device_should_recover_gpu(ring->adev)) + size |= AMDGPU_RESET_TYPE_FULL; + + if (unlikely(!ring->adev->debug_disable_soft_recovery) && + !amdgpu_sriov_vf(ring->adev) && ring->funcs->soft_recovery) + size |= AMDGPU_RESET_TYPE_SOFT_RESET; + + return size; +} + +ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset) +{ + ssize_t size = 0; + + if (supported_reset & AMDGPU_RESET_TYPE_SOFT_RESET) + size += sysfs_emit_at(buf, size, "soft "); + + if (supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE) + size += sysfs_emit_at(buf, size, "queue "); + + if (supported_reset & AMDGPU_RESET_TYPE_PER_PIPE) + size += sysfs_emit_at(buf, size, "pipe "); + + if (supported_reset & AMDGPU_RESET_TYPE_FULL) + size += sysfs_emit_at(buf, size, "full "); + + size += sysfs_emit_at(buf, size, "\n"); + return size; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index e96984c53e72..6de1f3bf6863 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1588,6 +1588,32 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, return count; } +static ssize_t amdgpu_gfx_get_gfx_reset_mask(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + + if (!adev) + return -ENODEV; + + return amdgpu_show_reset_mask(buf, adev->gfx.gfx_supported_reset); +} + +static ssize_t amdgpu_gfx_get_compute_reset_mask(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + + if (!adev) + return -ENODEV; + + return amdgpu_show_reset_mask(buf, adev->gfx.compute_supported_reset); +} + static DEVICE_ATTR(run_cleaner_shader, 0200, NULL, amdgpu_gfx_set_run_cleaner_shader); @@ -1602,6 +1628,12 @@ static DEVICE_ATTR(current_compute_partition, 0644, static DEVICE_ATTR(available_compute_partition, 0444, amdgpu_gfx_get_available_compute_partition, NULL); +static DEVICE_ATTR(gfx_reset_mask, 0444, + amdgpu_gfx_get_gfx_reset_mask, NULL); + +static DEVICE_ATTR(compute_reset_mask, 0444, + amdgpu_gfx_get_compute_reset_mask, NULL); + int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) { struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr; @@ -1702,6 +1734,40 @@ void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, cleaner_shader_size); } +int amdgpu_gfx_sysfs_reset_mask_init(struct amdgpu_device *adev) +{ + int r = 0; + + if (!amdgpu_gpu_recovery) + return r; + + if (adev->gfx.num_gfx_rings) { + r = device_create_file(adev->dev, &dev_attr_gfx_reset_mask); + if (r) + return r; + } + + if (adev->gfx.num_compute_rings) { + r = device_create_file(adev->dev, &dev_attr_compute_reset_mask); + if (r) + return r; + } + + return r; +} + +void amdgpu_gfx_sysfs_reset_mask_fini(struct amdgpu_device *adev) +{ + if (!amdgpu_gpu_recovery) + return; + + if (adev->gfx.num_gfx_rings) + device_remove_file(adev->dev, &dev_attr_gfx_reset_mask); + + if (adev->gfx.num_compute_rings) + device_remove_file(adev->dev, &dev_attr_compute_reset_mask); +} + /** * amdgpu_gfx_kfd_sch_ctrl - Control the KFD scheduler from the KGD (Graphics Driver) * @adev: amdgpu_device pointer diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index f710178a21bc..fb0e1adf6766 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -424,6 +424,8 @@ struct amdgpu_gfx { /* reset mask */ uint32_t grbm_soft_reset; uint32_t srbm_soft_reset; + uint32_t gfx_supported_reset; + uint32_t compute_supported_reset; /* gfx off */ bool gfx_off_state; /* true: enabled, false: disabled */ @@ -582,6 +584,8 @@ void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev); void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work); void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring); void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring); +int amdgpu_gfx_sysfs_reset_mask_init(struct amdgpu_device *adev); +void amdgpu_gfx_sysfs_reset_mask_fini(struct amdgpu_device *adev); static inline const char *amdgpu_gfx_compute_mode_desc(int mode) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 9da95b25e158..446e37768397 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4806,6 +4806,9 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) } } } + /* TODO: Check the version that supports fully queue reset */ + adev->gfx.gfx_supported_reset |= + amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); ring_id = 0; /* set up the compute queues - allocate horizontally across pipes */ @@ -4825,6 +4828,9 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) } } } + /* TODO: Check the version that supports fully queue reset */ + adev->gfx.compute_supported_reset |= + amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE, 0); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 5aff8f72de9c..3b23402dfb47 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1560,6 +1560,11 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_v11_0_funcs; adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_v11_0_funcs; #endif + if ((adev->gfx.me_fw_version >= 2280) && + (adev->gfx.mec_fw_version >= 2410)) { + adev->gfx.compute_supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + adev->gfx.gfx_supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + } break; case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): @@ -1663,6 +1668,8 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) } } } + adev->gfx.gfx_supported_reset |= + amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); ring_id = 0; /* set up the compute queues - allocate horizontally across pipes */ @@ -1682,6 +1689,8 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) } } } + adev->gfx.compute_supported_reset |= + amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); if (!adev->enable_mes_kiq) { r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0); @@ -1721,6 +1730,10 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; + r = amdgpu_gfx_sysfs_reset_mask_init (adev); + if (r) + return r; + return 0; } @@ -1783,6 +1796,7 @@ static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) gfx_v11_0_free_microcode(adev); amdgpu_gfx_sysfs_isolation_shader_fini(adev); + amdgpu_gfx_sysfs_reset_mask_fini(adev); kfree(adev->gfx.ip_dump_core); kfree(adev->gfx.ip_dump_compute_queues); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 016290f00592..b9d5a79ba85c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1067,6 +1067,11 @@ static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block *ip_block) dev_err(adev->dev, "Failed to initialize cleaner shader\n"); } } + + if (adev->gfx.mec_fw_version >= 155) { + adev->gfx.compute_supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE; + } break; default: adev->gfx.enable_cleaner_shader = false; @@ -1157,6 +1162,9 @@ static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block *ip_block) return r; } + adev->gfx.compute_supported_reset |= + amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); + r = gfx_v9_4_3_gpu_early_init(adev); if (r) return r; @@ -1175,6 +1183,9 @@ static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; + r = amdgpu_gfx_sysfs_reset_mask_init(adev); + if (r) + return r; return 0; } @@ -1200,6 +1211,7 @@ static int gfx_v9_4_3_sw_fini(struct amdgpu_ip_block *ip_block) gfx_v9_4_3_free_microcode(adev); amdgpu_gfx_sysfs_fini(adev); amdgpu_gfx_sysfs_isolation_shader_fini(adev); + amdgpu_gfx_sysfs_reset_mask_fini(adev); kfree(adev->gfx.ip_dump_core); kfree(adev->gfx.ip_dump_compute_queues); -- 2.25.1