Am 2021-08-19 um 9:37 a.m. schrieb David Yat Sin: > Add support to existing CRIU ioctl's to save and restore events during > criu checkpoint and restore. > > Signed-off-by: David Yat Sin <david.yatsin@xxxxxxx> > --- > drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 130 +++++++----- > drivers/gpu/drm/amd/amdkfd/kfd_events.c | 253 ++++++++++++++++++++--- > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 25 ++- > 3 files changed, 329 insertions(+), 79 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > index 19f16e3dd769..c8f523d8ab81 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > @@ -1008,51 +1008,11 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, > * through the event_page_offset field. > */ > if (args->event_page_offset) { > - struct kfd_dev *kfd; > - struct kfd_process_device *pdd; > - void *mem, *kern_addr; > - uint64_t size; > - > - if (p->signal_page) { > - pr_err("Event page is already set\n"); > - return -EINVAL; > - } > - > - kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset)); > - if (!kfd) { > - pr_err("Getting device by id failed in %s\n", __func__); > - return -EINVAL; > - } > - > mutex_lock(&p->mutex); > - pdd = kfd_bind_process_to_device(kfd, p); > - if (IS_ERR(pdd)) { > - err = PTR_ERR(pdd); > - goto out_unlock; > - } > - > - mem = kfd_process_device_translate_handle(pdd, > - GET_IDR_HANDLE(args->event_page_offset)); > - if (!mem) { > - pr_err("Can't find BO, offset is 0x%llx\n", > - args->event_page_offset); > - err = -EINVAL; > - goto out_unlock; > - } > + err = kfd_kmap_event_page(p, args->event_page_offset); > mutex_unlock(&p->mutex); > - > - err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd, > - mem, &kern_addr, &size); > - if (err) { > - pr_err("Failed to map event page to kernel\n"); > - return err; > - } > - > - err = kfd_event_page_set(p, kern_addr, size); > - if (err) { > - pr_err("Failed to set event page\n"); > + if (err) > return err; > - } > } > > err = kfd_event_create(filp, p, args->event_type, > @@ -1061,10 +1021,7 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, > &args->event_page_offset, > &args->event_slot_index); > > - return err; > - > -out_unlock: > - mutex_unlock(&p->mutex); > + pr_debug("Created event (id:0x%08x) (%s)\n", args->event_id, __func__); > return err; > } > > @@ -2208,6 +2165,41 @@ static int criu_dump_queues(struct kfd_process *p, struct kfd_ioctl_criu_dumper_ > return ret; > } > > +static int criu_dump_events(struct kfd_process *p, struct kfd_ioctl_criu_dumper_args *args) > +{ > + struct kfd_criu_event_bucket *ev_buckets; > + uint32_t num_events; > + int ret = 0; > + > + num_events = kfd_get_num_events(p); > + if (args->num_objects != num_events) { > + pr_err("Mismatch with number of events (current:%d user:%lld)\n", > + num_events, args->num_objects); > + > + } > + > + if (args->objects_size != args->num_objects * > + (sizeof(*ev_buckets) + sizeof(struct kfd_criu_event_priv_data))) { > + pr_err("Invalid objects size for events\n"); > + return -EINVAL; > + } > + > + ev_buckets = kvzalloc(args->objects_size, GFP_KERNEL); > + if (!ev_buckets) > + return -ENOMEM; > + > + ret = kfd_event_dump(p, ev_buckets, args->num_objects); > + if (!ret) { > + ret = copy_to_user((void __user *)args->objects, ev_buckets, args->objects_size); > + if (ret) { > + pr_err("Failed to copy events information to user\n"); > + ret = -EFAULT; > + } > + } > + kvfree(ev_buckets); > + return ret; > +} > + > static int kfd_ioctl_criu_dumper(struct file *filep, > struct kfd_process *p, void *data) > { > @@ -2246,6 +2238,8 @@ static int kfd_ioctl_criu_dumper(struct file *filep, > ret = criu_dump_queues(p, args); > break; > case KFD_CRIU_OBJECT_TYPE_EVENT: > + ret = criu_dump_events(p, args); > + break; > case KFD_CRIU_OBJECT_TYPE_DEVICE: > case KFD_CRIU_OBJECT_TYPE_SVM_RANGE: > default: > @@ -2676,6 +2670,40 @@ static int criu_restore_queues(struct kfd_process *p, > return ret; > } > > +static int criu_restore_events(struct file *filp, struct kfd_process *p, > + struct kfd_ioctl_criu_restorer_args *args) > +{ > + int ret = 0, i; > + uint8_t *objects, *private_data; > + struct kfd_criu_event_bucket *ev_buckets; > + > + objects = kvzalloc(args->objects_size, GFP_KERNEL); > + if (!objects) > + return -ENOMEM; > + > + ret = copy_from_user(objects, (void __user *)args->objects, args->objects_size); > + if (ret) { > + pr_err("Failed to copy event information from user\n"); > + ret = -EFAULT; > + goto exit; > + } > + > + ev_buckets = (struct kfd_criu_event_bucket *) objects; > + private_data = (void *)(ev_buckets + args->num_objects); > + > + for (i = 0; i < args->num_objects; i++) { > + ret = kfd_event_restore(filp, p, &ev_buckets[i], private_data); > + if (ret) { > + pr_err("Failed to restore event (%d)\n", ret); > + goto exit; > + } > + } > + > +exit: > + kvfree(ev_buckets); > + return ret; > +} > + > static int kfd_ioctl_criu_restorer(struct file *filep, > struct kfd_process *p, void *data) > { > @@ -2698,6 +2726,8 @@ static int kfd_ioctl_criu_restorer(struct file *filep, > ret = criu_restore_queues(p, args); > break; > case KFD_CRIU_OBJECT_TYPE_EVENT: > + ret = criu_restore_events(filep, p, args); > + break; > case KFD_CRIU_OBJECT_TYPE_DEVICE: > case KFD_CRIU_OBJECT_TYPE_SVM_RANGE: > default: > @@ -2799,9 +2829,13 @@ static int kfd_ioctl_criu_process_info(struct file *filep, > args->queues_priv_data_size = queues_extra_data_size + > (args->total_queues * sizeof(struct kfd_criu_queue_priv_data)); > > - dev_dbg(kfd_device, "Num of bos:%llu queues:%u\n", > + args->total_events = kfd_get_num_events(p); > + args->events_priv_data_size = args->total_events * sizeof(struct kfd_criu_event_priv_data); > + > + dev_dbg(kfd_device, "Num of bos:%llu queues:%u events:%u\n", > args->total_bos, > - args->total_queues); > + args->total_queues, > + args->total_events); > err_unlock: > mutex_unlock(&p->mutex); > return ret; > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c > index ba2c2ce0c55a..18362478e351 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c > @@ -53,9 +53,9 @@ struct kfd_signal_page { > uint64_t *kernel_address; > uint64_t __user *user_address; > bool need_to_free_pages; > + uint64_t user_handle; /* Needed for CRIU dumped and restore */ > }; > > - > static uint64_t *page_slots(struct kfd_signal_page *page) > { > return page->kernel_address; > @@ -92,7 +92,8 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) > } > > static int allocate_event_notification_slot(struct kfd_process *p, > - struct kfd_event *ev) > + struct kfd_event *ev, > + const int *restore_id) > { > int id; > > @@ -104,14 +105,19 @@ static int allocate_event_notification_slot(struct kfd_process *p, > p->signal_mapped_size = 256*8; > } > > - /* > - * Compatibility with old user mode: Only use signal slots > - * user mode has mapped, may be less than > - * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase > - * of the event limit without breaking user mode. > - */ > - id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8, > - GFP_KERNEL); > + if (restore_id) { > + id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1, > + GFP_KERNEL); > + } else { > + /* > + * Compatibility with old user mode: Only use signal slots > + * user mode has mapped, may be less than > + * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase > + * of the event limit without breaking user mode. > + */ > + id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8, > + GFP_KERNEL); > + } > if (id < 0) > return id; > > @@ -178,9 +184,8 @@ static struct kfd_event *lookup_signaled_event_by_partial_id( > return ev; > } > > -static int create_signal_event(struct file *devkfd, > - struct kfd_process *p, > - struct kfd_event *ev) > +static int create_signal_event(struct file *devkfd, struct kfd_process *p, > + struct kfd_event *ev, const int *restore_id) > { > int ret; > > @@ -193,7 +198,7 @@ static int create_signal_event(struct file *devkfd, > return -ENOSPC; > } > > - ret = allocate_event_notification_slot(p, ev); > + ret = allocate_event_notification_slot(p, ev, restore_id); > if (ret) { > pr_warn("Signal event wasn't created because out of kernel memory\n"); > return ret; > @@ -209,16 +214,22 @@ static int create_signal_event(struct file *devkfd, > return 0; > } > > -static int create_other_event(struct kfd_process *p, struct kfd_event *ev) > +static int create_other_event(struct kfd_process *p, struct kfd_event *ev, const int *restore_id) > { > - /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an > - * intentional integer overflow to -1 without a compiler > - * warning. idr_alloc treats a negative value as "maximum > - * signed integer". > - */ > - int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID, > - (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1, > - GFP_KERNEL); > + int id; > + > + if (restore_id) > + id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1, > + GFP_KERNEL); > + else > + /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an > + * intentional integer overflow to -1 without a compiler > + * warning. idr_alloc treats a negative value as "maximum > + * signed integer". > + */ > + id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID, > + (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1, > + GFP_KERNEL); > > if (id < 0) > return id; > @@ -295,8 +306,8 @@ static bool event_can_be_cpu_signaled(const struct kfd_event *ev) > return ev->type == KFD_EVENT_TYPE_SIGNAL; > } > > -int kfd_event_page_set(struct kfd_process *p, void *kernel_address, > - uint64_t size) > +static int kfd_event_page_set(struct kfd_process *p, void *kernel_address, > + uint64_t size, uint64_t user_handle) > { > struct kfd_signal_page *page; > > @@ -315,10 +326,55 @@ int kfd_event_page_set(struct kfd_process *p, void *kernel_address, > > p->signal_page = page; > p->signal_mapped_size = size; > - > + p->signal_page->user_handle = user_handle; > return 0; > } > > +int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset) This function should be static. I also think that this function and criu_dump/restore_events could be moved into kfd_events.c. Regards, Felix > +{ > + struct kfd_dev *kfd; > + struct kfd_process_device *pdd; > + void *mem, *kern_addr; > + uint64_t size; > + int err = 0; > + > + if (p->signal_page) { > + pr_err("Event page is already set\n"); > + return -EINVAL; > + } > + > + kfd = kfd_device_by_id(GET_GPU_ID(event_page_offset)); > + if (!kfd) { > + pr_err("Getting device by id failed in %s\n", __func__); > + return -EINVAL; > + } > + > + pdd = kfd_bind_process_to_device(kfd, p); > + if (IS_ERR(pdd)) > + return PTR_ERR(pdd); > + > + mem = kfd_process_device_translate_handle(pdd, > + GET_IDR_HANDLE(event_page_offset)); > + if (!mem) { > + pr_err("Can't find BO, offset is 0x%llx\n", event_page_offset); > + return -EINVAL; > + } > + > + err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd, > + mem, &kern_addr, &size); > + if (err) { > + pr_err("Failed to map event page to kernel\n"); > + return err; > + } > + > + err = kfd_event_page_set(p, kern_addr, size, event_page_offset); > + if (err) { > + pr_err("Failed to set event page\n"); > + return err; > + } > + return err; > +} > + > int kfd_event_create(struct file *devkfd, struct kfd_process *p, > uint32_t event_type, bool auto_reset, uint32_t node_id, > uint32_t *event_id, uint32_t *event_trigger_data, > @@ -343,14 +399,14 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, > switch (event_type) { > case KFD_EVENT_TYPE_SIGNAL: > case KFD_EVENT_TYPE_DEBUG: > - ret = create_signal_event(devkfd, p, ev); > + ret = create_signal_event(devkfd, p, ev, NULL); > if (!ret) { > *event_page_offset = KFD_MMAP_TYPE_EVENTS; > *event_slot_index = ev->event_id; > } > break; > default: > - ret = create_other_event(p, ev); > + ret = create_other_event(p, ev, NULL); > break; > } > > @@ -366,6 +422,147 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, > return ret; > } > > +int kfd_event_restore(struct file *devkfd, struct kfd_process *p, > + struct kfd_criu_event_bucket *ev_bucket, > + uint8_t *priv_datas) > +{ > + int ret = 0; > + struct kfd_criu_event_priv_data *ev_priv; > + struct kfd_event *ev; > + > + ev_priv = (struct kfd_criu_event_priv_data *)(priv_datas + ev_bucket->priv_data_offset); > + > + if (ev_priv->user_handle) { > + ret = kfd_kmap_event_page(p, ev_priv->user_handle); > + if (ret) > + return ret; > + } > + > + ev = kzalloc(sizeof(*ev), GFP_KERNEL); > + if (!ev) > + return -ENOMEM; > + > + ev->type = ev_priv->type; > + ev->auto_reset = ev_priv->auto_reset; > + ev->signaled = ev_priv->signaled; > + > + init_waitqueue_head(&ev->wq); > + > + mutex_lock(&p->event_mutex); > + switch (ev->type) { > + case KFD_EVENT_TYPE_SIGNAL: > + case KFD_EVENT_TYPE_DEBUG: > + ret = create_signal_event(devkfd, p, ev, &ev_priv->event_id); > + break; > + case KFD_EVENT_TYPE_MEMORY: > + memcpy(&ev->memory_exception_data, > + &ev_priv->memory_exception_data, > + sizeof(struct kfd_hsa_memory_exception_data)); > + > + ev->memory_exception_data.gpu_id = ev_bucket->gpu_id; > + ret = create_other_event(p, ev, &ev_priv->event_id); > + break; > + case KFD_EVENT_TYPE_HW_EXCEPTION: > + memcpy(&ev->hw_exception_data, > + &ev_priv->hw_exception_data, > + sizeof(struct kfd_hsa_hw_exception_data)); > + > + ev->hw_exception_data.gpu_id = ev_bucket->gpu_id; > + ret = create_other_event(p, ev, &ev_priv->event_id); > + break; > + } > + > + if (ret) > + kfree(ev); > + > + mutex_unlock(&p->event_mutex); > + > + return ret; > +} > + > +int kfd_event_dump(struct kfd_process *p, > + struct kfd_criu_event_bucket *ev_buckets, > + uint32_t num_events) > +{ > + struct kfd_event *ev; > + struct kfd_criu_event_priv_data *ev_privs; > + uint32_t ev_id; > + int i = 0; > + > + /* Private data for first event starts after all ev_buckets */ > + ev_privs = (struct kfd_criu_event_priv_data *)((uint8_t *)ev_buckets + > + (num_events * (sizeof(*ev_buckets)))); > + > + > + idr_for_each_entry(&p->event_idr, ev, ev_id) { > + struct kfd_criu_event_bucket *ev_bucket; > + struct kfd_criu_event_priv_data *ev_priv; > + > + if (i >= num_events) { > + pr_err("Number of events exceeds number allocated\n"); > + return -ENOMEM; > + } > + > + ev_bucket = &ev_buckets[i]; > + > + /* Currently, all events have same size of private_data, but the current ioctl's > + * and CRIU plugin supports private_data of variable sizes > + */ > + ev_priv = &ev_privs[i]; > + > + ev_bucket->priv_data_offset = i * sizeof(*ev_priv); > + ev_bucket->priv_data_size = sizeof(*ev_priv); > + > + /* We store the user_handle with the first event */ > + if (i == 0 && p->signal_page) > + ev_priv->user_handle = p->signal_page->user_handle; > + > + ev_priv->event_id = ev->event_id; > + ev_priv->auto_reset = ev->auto_reset; > + ev_priv->type = ev->type; > + ev_priv->signaled = ev->signaled; > + > + /* We store the gpu_id in the bucket section so that the userspace CRIU plugin can > + * modify it if needed. > + */ > + if (ev_priv->type == KFD_EVENT_TYPE_MEMORY) { > + memcpy(&ev_priv->memory_exception_data, > + &ev->memory_exception_data, > + sizeof(struct kfd_hsa_memory_exception_data)); > + > + ev_bucket->gpu_id = ev_priv->memory_exception_data.gpu_id; > + } else if (ev_priv->type == KFD_EVENT_TYPE_HW_EXCEPTION) { > + memcpy(&ev_priv->hw_exception_data, > + &ev->hw_exception_data, > + sizeof(struct kfd_hsa_hw_exception_data)); > + > + ev_bucket->gpu_id = ev_priv->hw_exception_data.gpu_id; > + } else > + ev_bucket->gpu_id = 0; > + > + pr_debug("Dumped event[%d] id = 0x%08x auto_reset = %x type = %x signaled = %x\n", > + i, > + ev_priv->event_id, > + ev_priv->auto_reset, > + ev_priv->type, > + ev_priv->signaled); > + i++; > + } > + return 0; > +} > + > +int kfd_get_num_events(struct kfd_process *p) > +{ > + struct kfd_event *ev; > + uint32_t id; > + u32 num_events = 0; > + > + idr_for_each_entry(&p->event_idr, ev, id) > + num_events++; > + > + return num_events++; > +} > + > /* Assumes that p is current. */ > int kfd_event_destroy(struct kfd_process *p, uint32_t event_id) > { > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > index 7ed6f831109d..bf10a5305ef7 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > @@ -1069,9 +1069,26 @@ struct kfd_criu_queue_priv_data { > }; > > struct kfd_criu_event_priv_data { > - uint64_t reserved; > + uint64_t user_handle; > + uint32_t event_id; > + uint32_t auto_reset; > + uint32_t type; > + uint32_t signaled; > + > + union { > + struct kfd_hsa_memory_exception_data memory_exception_data; > + struct kfd_hsa_hw_exception_data hw_exception_data; > + }; > }; > > +int kfd_event_restore(struct file *devkfd, struct kfd_process *p, > + struct kfd_criu_event_bucket *ev_bucket, > + uint8_t *priv_datas); > + > +int kfd_event_dump(struct kfd_process *p, > + struct kfd_criu_event_bucket *ev_buckets, > + uint32_t num_events); > + > /* CRIU - End */ > > /* Queue Context Management */ > @@ -1238,12 +1255,14 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, > void kfd_signal_hw_exception_event(u32 pasid); > int kfd_set_event(struct kfd_process *p, uint32_t event_id); > int kfd_reset_event(struct kfd_process *p, uint32_t event_id); > -int kfd_event_page_set(struct kfd_process *p, void *kernel_address, > - uint64_t size); > +int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset); > + > int kfd_event_create(struct file *devkfd, struct kfd_process *p, > uint32_t event_type, bool auto_reset, uint32_t node_id, > uint32_t *event_id, uint32_t *event_trigger_data, > uint64_t *event_page_offset, uint32_t *event_slot_index); > + > +int kfd_get_num_events(struct kfd_process *p); > int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); > > void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,