Introducing pause IOCTL. The CRIU amdgpu plugin is needs to call AMDKFD_IOC_CRIU_PAUSE(pause = 1) before starting dump and AMDKFD_IOC_CRIU_PAUSE(pause = 0) when dump is complete. This ensures that the queues are not modified between each CRIU dump ioctl. Signed-off-by: David Yat Sin <david.yatsin@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 23 +++++++++++++++++++++-- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 +++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 1 + 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 7ca56ed02694..821b7663fa5d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1990,6 +1990,14 @@ static int kfd_ioctl_criu_dumper(struct file *filep, goto err_unlock; } + /* Confirm all process queues are evicted */ + if (!p->queues_paused) { + pr_err("Cannot dump process when queues are not in evicted state\n"); + /* CRIU plugin did not call AMDKFD_IOC_CRIU_PAUSE before dumping */ + ret = -EINVAL; + goto err_unlock; + } + switch (args->type) { case KFD_CRIU_OBJECT_TYPE_PROCESS: ret = criu_dump_process(p, args); @@ -2318,9 +2326,20 @@ static int kfd_ioctl_criu_restorer(struct file *filep, static int kfd_ioctl_criu_pause(struct file *filep, struct kfd_process *p, void *data) { - pr_debug("Inside %s\n", __func__); + int ret; + struct kfd_ioctl_criu_pause_args *args = data; - return 0; + if (args->pause) + ret = kfd_process_evict_queues(p); + else + ret = kfd_process_restore_queues(p); + + if (ret) + pr_err("Failed to %s queues ret:%d\n", args->pause ? "evict" : "restore", ret); + else + p->queues_paused = !!(args->pause); + + return ret; } static int kfd_ioctl_criu_resume(struct file *filep, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 719982605587..0b8165729cde 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -857,6 +857,9 @@ struct kfd_process { bool svm_disabled; bool xnack_enabled; + + /* Queues are in paused stated because we are in the process of doing a CRIU checkpoint */ + bool queues_paused; }; #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index bbf21395fb06..e4cb2f778590 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1268,6 +1268,7 @@ static struct kfd_process *create_process(const struct task_struct *thread) process->lead_thread = thread->group_leader; process->n_pdds = 0; process->svm_disabled = false; + process->queues_paused = false; INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker); INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); process->last_restore_timestamp = get_jiffies_64(); -- 2.17.1