Adding support to checkpoint/restore GWS(Global Wave Sync) queues. Signed-off-by: David Yat Sin <david.yatsin@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 4 ++-- .../amd/amdkfd/kfd_process_queue_manager.c | 22 ++++++++++++++----- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index e1e2362841f8..b2c72ddd9c1c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1377,7 +1377,7 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep, goto out_unlock; } - retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL); + retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL, false); mutex_unlock(&p->mutex); args->first_gws = 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index f36062be9ca8..3ec32675210c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1102,7 +1102,7 @@ struct kfd_criu_queue_priv_data { uint32_t priority; uint32_t q_percent; uint32_t doorbell_id; - uint32_t is_gws; + uint32_t gws; uint32_t sdma_id; uint32_t eop_ring_buffer_size; uint32_t ctx_save_restore_area_size; @@ -1199,7 +1199,7 @@ int pqm_update_queue_properties(struct process_queue_manager *pqm, unsigned int int pqm_update_mqd(struct process_queue_manager *pqm, unsigned int qid, struct mqd_update_info *minfo); int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, - void *gws); + void *gws, bool criu_restore); struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, unsigned int qid); struct queue *pqm_get_user_queue(struct process_queue_manager *pqm, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 6eca9509f2e3..3eb9d43fdaac 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -91,7 +91,7 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) } int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, - void *gws) + void *gws, bool criu_restore) { struct kfd_dev *dev = NULL; struct process_queue_node *pqn; @@ -135,8 +135,14 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, pqn->q->gws = mem; pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0; - return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, - pqn->q, NULL); + ret = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, + pqn->q, NULL); + + if (!ret && criu_restore) { + dev->dqm->gws_queue_count++; + pdd->qpd.mapped_gws_queue = true; + } + return ret; } void kfd_process_dequeue_from_all_devices(struct kfd_process *p) @@ -636,6 +642,8 @@ static int criu_checkpoint_queue(struct kfd_process_device *pdd, q_data->ctx_save_restore_area_size = q->properties.ctx_save_restore_area_size; + q_data->gws = !!q->gws; + ret = pqm_checkpoint_mqd(&pdd->process->pqm, q->properties.queue_id, mqd, ctl_stack); if (ret) { pr_err("Failed checkpoint queue_mqd (%d)\n", ret); @@ -743,7 +751,6 @@ static void set_queue_properties_from_criu(struct queue_properties *qp, struct kfd_criu_queue_priv_data *q_data) { qp->is_interop = false; - qp->is_gws = q_data->is_gws; qp->queue_percent = q_data->q_percent; qp->priority = q_data->priority; qp->queue_address = q_data->q_address; @@ -826,12 +833,15 @@ int kfd_criu_restore_queue(struct kfd_process *p, NULL); if (ret) { pr_err("Failed to create new queue err:%d\n", ret); - ret = -EINVAL; + goto exit; } + if (q_data->gws) + ret = pqm_set_gws(&p->pqm, q_data->q_id, pdd->dev->gws, true); + exit: if (ret) - pr_err("Failed to create queue (%d)\n", ret); + pr_err("Failed to restore queue (%d)\n", ret); else pr_debug("Queue id %d was restored successfully\n", queue_id); -- 2.30.2