From: Rajneesh Bhardwaj <rajneesh.bhardwaj@xxxxxxx> KFD buffer objects do not associate a GEM handle with them so cannot directly be used with libdrm to initiate a system dma (sDMA) operation to speedup the checkpoint and restore operation so export them as dmabuf objects and use with libdrm helper (amdgpu_bo_import) to further process the sdma command submissions. With sDMA, we see huge improvement in checkpoint and restore operations compared to the generic pci based access via host data path. Suggested-by: Felix Kuehling <felix.kuehling@xxxxxxx> Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@xxxxxxx> Signed-off-by: David Yat Sin <david.yatsin@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 57 ++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 90e4d4ce4398..ead4cb37377b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -35,6 +35,7 @@ #include <linux/mman.h> #include <linux/ptrace.h> #include <linux/dma-buf.h> +#include <linux/fdtable.h> #include <asm/processor.h> #include "kfd_priv.h" #include "kfd_device_queue_manager.h" @@ -43,6 +44,7 @@ #include "amdgpu_amdkfd.h" #include "kfd_smi_events.h" #include "amdgpu_object.h" +#include "amdgpu_dma_buf.h" static long kfd_ioctl(struct file *, unsigned int, unsigned long); static int kfd_open(struct inode *, struct file *); @@ -1900,6 +1902,33 @@ uint64_t get_process_num_bos(struct kfd_process *p) return num_of_bos; } +static int criu_get_prime_handle(struct drm_gem_object *gobj, int flags, + u32 *shared_fd) +{ + struct dma_buf *dmabuf; + int ret; + + dmabuf = amdgpu_gem_prime_export(gobj, flags); + if (IS_ERR(dmabuf)) { + ret = PTR_ERR(dmabuf); + pr_err("dmabuf export failed for the BO\n"); + return ret; + } + + ret = dma_buf_fd(dmabuf, flags); + if (ret < 0) { + pr_err("dmabuf create fd failed, ret:%d\n", ret); + goto out_free_dmabuf; + } + + *shared_fd = ret; + return 0; + +out_free_dmabuf: + dma_buf_put(dmabuf); + return ret; +} + static int criu_dump_bos(struct kfd_process *p, struct kfd_ioctl_criu_dumper_args *args) { struct kfd_criu_bo_bucket *bo_buckets; @@ -1969,6 +1998,14 @@ static int criu_dump_bos(struct kfd_process *p, struct kfd_ioctl_criu_dumper_arg goto exit; } } + if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { + ret = criu_get_prime_handle(&dumper_bo->tbo.base, + bo_bucket->alloc_flags & + KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0, + &bo_bucket->dmabuf_fd); + if (ret) + goto exit; + } if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) bo_bucket->offset = KFD_MMAP_TYPE_DOORBELL | KFD_MMAP_GPU_ID(pdd->dev->id); @@ -1998,6 +2035,11 @@ static int criu_dump_bos(struct kfd_process *p, struct kfd_ioctl_criu_dumper_arg } exit: + while (ret && bo_index--) { + if (bo_buckets[bo_index].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) + close_fd(bo_buckets[bo_index].dmabuf_fd); + } + kvfree(bo_buckets); return ret; } @@ -2516,6 +2558,7 @@ static int criu_restore_bos(struct kfd_process *p, struct kfd_ioctl_criu_restore struct kfd_criu_bo_priv_data *bo_priv; struct kfd_dev *dev; struct kfd_process_device *pdd; + struct kgd_mem *kgd_mem; void *mem; u64 offset; int idr_handle; @@ -2663,6 +2706,16 @@ static int criu_restore_bos(struct kfd_process *p, struct kfd_ioctl_criu_restore } pr_debug("map memory was successful for the BO\n"); + /* create the dmabuf object and export the bo */ + kgd_mem = (struct kgd_mem *)mem; + if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { + ret = criu_get_prime_handle(&kgd_mem->bo->tbo.base, + DRM_RDWR, + &bo_bucket->dmabuf_fd); + if (ret) + goto exit; + } + } /* done */ /* Flush TLBs after waiting for the page table updates to complete */ @@ -2687,6 +2740,10 @@ static int criu_restore_bos(struct kfd_process *p, struct kfd_ioctl_criu_restore ret = -EFAULT; exit: + while (ret && i--) { + if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) + close_fd(bo_buckets[i].dmabuf_fd); + } kvfree(objects); return ret; } -- 2.17.1