From: Rajneesh Bhardwaj <rajneesh.bhardwaj@xxxxxxx> Checkpoint-Restore in userspace (CRIU) is a powerful tool that can snapshot a running process and later restore it on same or a remote machine but expects the processes that have a device file (e.g. GPU) associated with them, provide necessary driver support to assist CRIU and its extensible plugin interface. Thus, In order to support the Checkpoint-Restore of any ROCm process, the AMD Radeon Open Compute Kernel driver, needs to provide a set of new APIs that provide necessary VRAM metadata and its contents to a userspace component (CRIU plugin) that can store it in form of image files. This introduces some new ioctls which will be used to checkpoint-Restore any KFD bound user process. KFD doesn't allow any arbitrary ioctl call unless it is called by the group leader process. Since these ioctls are expected to be called from a KFD criu plugin which has elevated ptrace attached privileges and CAP_SYS_ADMIN capabilities attached with the file descriptors so modify KFD to allow such calls. (API redesign suggested by Felix Kuehling and implemented by David Yat Sin) Signed-off-by: David Yat Sin <david.yatsin@xxxxxxx> Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@xxxxxxx> (cherry picked from commit 72f4907135aed9c037b9f442a6055b51733b518a) (cherry picked from commit 33ff4953c5352f51d57a77ba8ae6614b7993e70d) --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 70 ++++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 69 ++++++++++++++ include/uapi/linux/kfd_ioctl.h | 110 ++++++++++++++++++++++- 3 files changed, 247 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 059c3f1ca27d..a1b60d29aae1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -33,6 +33,7 @@ #include <linux/time.h> #include <linux/mm.h> #include <linux/mman.h> +#include <linux/ptrace.h> #include <linux/dma-buf.h> #include <asm/processor.h> #include "kfd_priv.h" @@ -1802,6 +1803,44 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) return -EPERM; } #endif +static int kfd_ioctl_criu_dumper(struct file *filep, + struct kfd_process *p, void *data) +{ + pr_debug("Inside %s\n", __func__); + + return 0; +} + +static int kfd_ioctl_criu_restorer(struct file *filep, + struct kfd_process *p, void *data) +{ + pr_debug("Inside %s\n", __func__); + + return 0; +} + +static int kfd_ioctl_criu_pause(struct file *filep, struct kfd_process *p, void *data) +{ + pr_debug("Inside %s\n", __func__); + + return 0; +} + +static int kfd_ioctl_criu_resume(struct file *filep, + struct kfd_process *p, void *data) +{ + pr_debug("Inside %s\n", __func__); + + return 0; +} + +static int kfd_ioctl_criu_process_info(struct file *filep, + struct kfd_process *p, void *data) +{ + pr_debug("Inside %s\n", __func__); + + return 0; +} #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ @@ -1906,6 +1945,21 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE, kfd_ioctl_set_xnack_mode, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_DUMPER, + kfd_ioctl_criu_dumper, KFD_IOC_FLAG_PTRACE_ATTACHED), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_RESTORER, + kfd_ioctl_criu_restorer, KFD_IOC_FLAG_ROOT_ONLY), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_PROCESS_INFO, + kfd_ioctl_criu_process_info, KFD_IOC_FLAG_PTRACE_ATTACHED), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_RESUME, + kfd_ioctl_criu_resume, KFD_IOC_FLAG_ROOT_ONLY), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_PAUSE, + kfd_ioctl_criu_pause, KFD_IOC_FLAG_PTRACE_ATTACHED), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) @@ -1920,6 +1974,7 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) char *kdata = NULL; unsigned int usize, asize; int retcode = -EINVAL; + bool ptrace_attached = false; if (nr >= AMDKFD_CORE_IOCTL_COUNT) goto err_i1; @@ -1945,7 +2000,15 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) * processes need to create their own KFD device context. */ process = filep->private_data; - if (process->lead_thread != current->group_leader) { + + rcu_read_lock(); + if ((ioctl->flags & KFD_IOC_FLAG_PTRACE_ATTACHED) && + ptrace_parent(process->lead_thread) == current) + ptrace_attached = true; + rcu_read_unlock(); + + if (process->lead_thread != current->group_leader + && !ptrace_attached) { dev_dbg(kfd_device, "Using KFD FD in wrong process\n"); retcode = -EBADF; goto err_i1; @@ -1960,6 +2023,11 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) goto err_i1; } + /* KFD_IOC_FLAG_ROOT_ONLY is only for CAP_SYS_ADMIN */ + if (unlikely((ioctl->flags & KFD_IOC_FLAG_ROOT_ONLY) && + !capable(CAP_SYS_ADMIN))) + return -EACCES; + if (cmd & (IOC_IN | IOC_OUT)) { if (asize <= sizeof(stack_kdata)) { kdata = stack_kdata; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 64552f6b8ba4..768cc3fe95d2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -121,7 +121,35 @@ */ #define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512 +/** + * enum kfd_ioctl_flags - KFD ioctl flags + * Various flags that can be set in &amdkfd_ioctl_desc.flags to control how + * userspace can use a given ioctl. + */ +enum kfd_ioctl_flags { + /** + * @KFD_IOC_FLAG_ROOT_ONLY: + * Certain KFD ioctls such as AMDKFD_IOC_CRIU_RESTORER can potentially + * perform privileged operations and load arbitrary data into MQDs and + * eventually HQD registers when the queue is mapped by HWS. In order to + * prevent this we should perform additional security checks. In other + * cases, certain ioctls such as AMDKFD_IOC_CRIU_RESUME might be called + * by an external process e.g. CRIU restore process, for each resuming + * tasks and thus require elevated privileges. + * + * This is equivalent to callers with the SYSADMIN capability. + */ + KFD_IOC_FLAG_ROOT_ONLY = BIT(0), + /** + * @KFD_IOC_FLAG_PTRACE_ATTACHED: + * Certain KFD ioctls such as AMDKFD_IOC_CRIU_HELPER and + * AMDKFD_IOC_CRIU_DUMPER are expected to be called during a Checkpoint + * operation triggered by CRIU. Since, these are expected to be called + * from a PTRACE attached context, we must authenticate these. + */ + KFD_IOC_FLAG_PTRACE_ATTACHED = BIT(1), +}; /* * Kernel module parameter to specify maximum number of supported queues per * device @@ -977,6 +1005,47 @@ void kfd_process_set_trap_handler(struct qcm_process_device *qpd, uint64_t tba_addr, uint64_t tma_addr); +/* CRIU */ +/* + * Need to increment KFD_CRIU_PRIV_VERSION each time a change is made to any of the CRIU private + * structures: + * kfd_criu_process_priv_data + * kfd_criu_device_priv_data + * kfd_criu_bo_priv_data + * kfd_criu_queue_priv_data + * kfd_criu_event_priv_data + * kfd_criu_svm_range_priv_data + */ + +#define KFD_CRIU_PRIV_VERSION 1 + +struct kfd_criu_process_priv_data { + uint32_t version; +}; + +struct kfd_criu_device_priv_data { + /* For future use */ + uint64_t reserved; +}; + +struct kfd_criu_bo_priv_data { + uint64_t reserved; +}; + +struct kfd_criu_svm_range_priv_data { + uint64_t reserved; +}; + +struct kfd_criu_queue_priv_data { + uint64_t reserved; +}; + +struct kfd_criu_event_priv_data { + uint64_t reserved; +}; + +/* CRIU - End */ + /* Queue Context Management */ int init_queue(struct queue **q, const struct queue_properties *properties); void uninit_queue(struct queue *q); diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 3cb5b5dd9f77..19489e2ca58e 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -467,6 +467,99 @@ struct kfd_ioctl_smi_events_args { __u32 anon_fd; /* from KFD */ }; +struct kfd_criu_process_bucket { + __u64 priv_data_offset; + __u64 priv_data_size; +}; + +struct kfd_criu_device_bucket { + __u64 priv_data_offset; + __u64 priv_data_size; + __u32 user_gpu_id; + __u32 actual_gpu_id; + __u32 drm_fd; + __u32 pad; +}; + +struct kfd_criu_bo_bucket { + __u64 priv_data_offset; + __u64 priv_data_size; + __u64 addr; + __u64 size; + __u64 offset; + __u64 restored_offset; + __u32 gpu_id; + __u32 alloc_flags; + __u32 dmabuf_fd; + __u32 pad; +}; + +struct kfd_criu_queue_bucket { + __u64 priv_data_offset; + __u64 priv_data_size; + __u32 gpu_id; + __u32 pad; +}; + +struct kfd_criu_event_bucket { + __u64 priv_data_offset; + __u64 priv_data_size; + __u32 gpu_id; + __u32 pad; +}; + +struct kfd_ioctl_criu_process_info_args { + __u64 process_priv_data_size; + __u64 bos_priv_data_size; + __u64 devices_priv_data_size; + __u64 queues_priv_data_size; + __u64 events_priv_data_size; + __u64 svm_ranges_priv_data_size; + __u64 total_bos; + __u64 total_svm_ranges; + __u32 total_devices; + __u32 total_queues; + __u32 total_events; + __u32 task_pid; +}; + +struct kfd_ioctl_criu_pause_args { + __u32 pause; + __u32 pad; +}; + +enum kfd_criu_object_type { + KFD_CRIU_OBJECT_TYPE_PROCESS = 0, + KFD_CRIU_OBJECT_TYPE_DEVICE = 1, + KFD_CRIU_OBJECT_TYPE_BO = 2, + KFD_CRIU_OBJECT_TYPE_QUEUE = 3, + KFD_CRIU_OBJECT_TYPE_EVENT = 4, + KFD_CRIU_OBJECT_TYPE_SVM_RANGE = 5, +}; + +struct kfd_ioctl_criu_dumper_args { + __u64 num_objects; + __u64 objects; + __u64 objects_size; + __u64 objects_index_start; + __u32 type; /* enum kfd_criu_object_type */ + __u32 pad; +}; + +struct kfd_ioctl_criu_restorer_args { + __u64 num_objects; + __u64 objects; + __u64 objects_size; + __u64 objects_index_start; + __u32 type; /* enum kfd_criu_object_type */ + __u32 pad; +}; + +struct kfd_ioctl_criu_resume_args { + __u32 pid; /* to KFD */ + __u32 pad; +}; + /* Register offset inside the remapped mmio page */ enum kfd_mmio_remap { @@ -740,7 +833,22 @@ struct kfd_ioctl_set_xnack_mode_args { #define AMDKFD_IOC_SET_XNACK_MODE \ AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args) +#define AMDKFD_IOC_CRIU_DUMPER \ + AMDKFD_IOWR(0x22, struct kfd_ioctl_criu_dumper_args) + +#define AMDKFD_IOC_CRIU_RESTORER \ + AMDKFD_IOWR(0x23, struct kfd_ioctl_criu_restorer_args) + +#define AMDKFD_IOC_CRIU_PROCESS_INFO \ + AMDKFD_IOWR(0x24, struct kfd_ioctl_criu_process_info_args) + +#define AMDKFD_IOC_CRIU_RESUME \ + AMDKFD_IOWR(0x25, struct kfd_ioctl_criu_resume_args) + +#define AMDKFD_IOC_CRIU_PAUSE \ + AMDKFD_IOWR(0x26, struct kfd_ioctl_criu_pause_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x22 +#define AMDKFD_COMMAND_END 0x27 #endif -- 2.17.1