From: Mukul Joshi <mukul.joshi@xxxxxxx> GFX9.4.3 will support dynamic repartitioning of the GPU through sysfs. Add device repartitioning support in KFD to repartition GPU from one mode to other. Signed-off-by: Mukul Joshi <mukul.joshi@xxxxxxx> Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 10 ++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 13 +++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 22 +++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 5 +---- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 21 +++++++++++++++++++++ 5 files changed, 66 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 0f641bb30870..992af8b100cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -772,3 +772,13 @@ bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev) else return false; } + +int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev) +{ + return kgd2kfd_check_and_lock_kfd(); +} + +void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev) +{ + kgd2kfd_unlock_kfd(); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index df07e212c21e..9cc28fe32cdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -151,6 +151,8 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev); +int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev); +void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev); int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, @@ -373,6 +375,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd); void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd); void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask); +int kgd2kfd_check_and_lock_kfd(void); +void kgd2kfd_unlock_kfd(void); #else static inline int kgd2kfd_init(void) { @@ -438,5 +442,14 @@ static inline void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) { } + +static int kgd2kfd_check_and_lock_kfd(void) +{ + return 0; +} + +static void kgd2kfd_unlock_kfd(void) +{ +} #endif #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 2a781d3dcc05..a98c38a75504 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1155,10 +1155,30 @@ static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev, return -EINVAL; } + if (!adev->kfd.init_complete) + return -EPERM; + mutex_lock(&adev->gfx.partition_mutex); - ret = adev->gfx.funcs->switch_partition_mode(adev, mode); + if (mode == adev->gfx.funcs->query_partition_mode(adev)) + goto out; + + ret = amdgpu_amdkfd_check_and_lock_kfd(adev); + if (ret) + goto out; + + amdgpu_amdkfd_device_fini_sw(adev); + + adev->gfx.funcs->switch_partition_mode(adev, mode); + + amdgpu_amdkfd_device_probe(adev); + amdgpu_amdkfd_device_init(adev); + /* If KFD init failed, return failure */ + if (!adev->kfd.init_complete) + ret = -EIO; + amdgpu_amdkfd_unlock_kfd(adev); +out: mutex_unlock(&adev->gfx.partition_mutex); if (ret) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index d248c4f2e2c4..df9756df7941 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -680,7 +680,7 @@ static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev, static enum amdgpu_gfx_partition gfx_v9_4_3_query_compute_partition(struct amdgpu_device *adev) { - enum amdgpu_gfx_partition mode = AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE; + enum amdgpu_gfx_partition mode = adev->gfx.partition_mode; if (adev->nbio.funcs->get_compute_partition_mode) mode = adev->nbio.funcs->get_compute_partition_mode(adev); @@ -694,9 +694,6 @@ static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, u32 tmp = 0; int num_xcc_per_partition, i, num_xcc; - if (mode == adev->gfx.partition_mode) - return mode; - num_xcc = NUM_XCC(adev->gfx.xcc_mask); switch (mode) { case AMDGPU_SPX_PARTITION_MODE: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 7aa4f4ee7752..02a87650412d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -1348,6 +1348,27 @@ unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_node *node) kfd_get_num_sdma_engines(node); } +int kgd2kfd_check_and_lock_kfd(void) +{ + mutex_lock(&kfd_processes_mutex); + if (!hash_empty(kfd_processes_table) || kfd_is_locked()) { + mutex_unlock(&kfd_processes_mutex); + return -EBUSY; + } + + ++kfd_locked; + mutex_unlock(&kfd_processes_mutex); + + return 0; +} + +void kgd2kfd_unlock_kfd(void) +{ + mutex_lock(&kfd_processes_mutex); + --kfd_locked; + mutex_unlock(&kfd_processes_mutex); +} + #if defined(CONFIG_DEBUG_FS) /* This function will send a package to HIQ to hang the HWS -- 2.39.2