KFD buffer objects do not associate a GEM handle with them so cannot directly be used with libdrm to initiate a system dma (sDMA) operation to speedup the checkpoint and restore operation so export them as dmabuf objects and use with libdrm helper (amdgpu_bo_import) to further process the sdma command submissions. With sDMA, we see huge improvement in checkpoint and restore operations compared to the generic pci based access via host data path. Suggested-by: Felix Kuehling <felix.kuehling@xxxxxxx> Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@xxxxxxx> Signed-off-by: David Yat Sin <david.yatsin@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 71 +++++++++++++++++++++++- 1 file changed, 69 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 69edeaf3893e..ab5107a3fe36 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -35,6 +35,7 @@ #include <linux/mman.h> #include <linux/ptrace.h> #include <linux/dma-buf.h> +#include <linux/fdtable.h> #include <asm/processor.h> #include "kfd_priv.h" #include "kfd_device_queue_manager.h" @@ -42,6 +43,7 @@ #include "kfd_svm.h" #include "amdgpu_amdkfd.h" #include "kfd_smi_events.h" +#include "amdgpu_dma_buf.h" static long kfd_ioctl(struct file *, unsigned int, unsigned long); static int kfd_open(struct inode *, struct file *); @@ -1936,6 +1938,33 @@ uint32_t get_process_num_bos(struct kfd_process *p) return num_of_bos; } +static int criu_get_prime_handle(struct drm_gem_object *gobj, int flags, + u32 *shared_fd) +{ + struct dma_buf *dmabuf; + int ret; + + dmabuf = amdgpu_gem_prime_export(gobj, flags); + if (IS_ERR(dmabuf)) { + ret = PTR_ERR(dmabuf); + pr_err("dmabuf export failed for the BO\n"); + return ret; + } + + ret = dma_buf_fd(dmabuf, flags); + if (ret < 0) { + pr_err("dmabuf create fd failed, ret:%d\n", ret); + goto out_free_dmabuf; + } + + *shared_fd = ret; + return 0; + +out_free_dmabuf: + dma_buf_put(dmabuf); + return ret; +} + static int criu_checkpoint_bos(struct kfd_process *p, uint32_t num_bos, uint8_t __user *user_bos, @@ -1997,6 +2026,14 @@ static int criu_checkpoint_bos(struct kfd_process *p, goto exit; } } + if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { + ret = criu_get_prime_handle(&dumper_bo->tbo.base, + bo_bucket->alloc_flags & + KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0, + &bo_bucket->dmabuf_fd); + if (ret) + goto exit; + } if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) bo_bucket->offset = KFD_MMAP_TYPE_DOORBELL | KFD_MMAP_GPU_ID(pdd->dev->id); @@ -2041,6 +2078,10 @@ static int criu_checkpoint_bos(struct kfd_process *p, *priv_offset += num_bos * sizeof(*bo_privs); exit: + while (ret && bo_index--) { + if (bo_buckets[bo_index].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) + close_fd(bo_buckets[bo_index].dmabuf_fd); + } kvfree(bo_buckets); kvfree(bo_privs); @@ -2141,16 +2182,28 @@ static int criu_checkpoint(struct file *filep, ret = kfd_criu_checkpoint_queues(p, (uint8_t __user *)args->priv_data, &priv_offset); if (ret) - goto exit_unlock; + goto close_bo_fds; ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data, &priv_offset); if (ret) - goto exit_unlock; + goto close_bo_fds; /* TODO: Dump SVM-Ranges */ } +close_bo_fds: + if (ret) { + /* If IOCTL returns err, user assumes all FDs opened in criu_dump_bos are closed */ + uint32_t i; + struct kfd_criu_bo_bucket *bo_buckets = (struct kfd_criu_bo_bucket *) args->bos; + + for (i = 0; i < num_bos; i++) { + if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) + close_fd(bo_buckets[i].dmabuf_fd); + } + } + exit_unlock: mutex_unlock(&p->mutex); if (ret) @@ -2345,6 +2398,7 @@ static int criu_restore_bos(struct kfd_process *p, struct kfd_criu_bo_priv_data *bo_priv; struct kfd_dev *dev; struct kfd_process_device *pdd; + struct kgd_mem *kgd_mem; void *mem; u64 offset; int idr_handle; @@ -2484,6 +2538,15 @@ static int criu_restore_bos(struct kfd_process *p, } pr_debug("map memory was successful for the BO\n"); + /* create the dmabuf object and export the bo */ + kgd_mem = (struct kgd_mem *)mem; + if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { + ret = criu_get_prime_handle(&kgd_mem->bo->tbo.base, + DRM_RDWR, + &bo_bucket->dmabuf_fd); + if (ret) + goto exit; + } } /* done */ if (flush_tlbs) { @@ -2511,6 +2574,10 @@ static int criu_restore_bos(struct kfd_process *p, ret = -EFAULT; exit: + while (ret && i--) { + if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) + close_fd(bo_buckets[i].dmabuf_fd); + } kvfree(bo_buckets); kvfree(bo_privs); return ret; -- 2.17.1