Add GPUSVM copy SRAM / VRAM vfunc functions and connect to migration layer. Signed-off-by: Matthew Brost <matthew.brost@xxxxxxxxx> Me: Fix vram_addr == 0 case --- drivers/gpu/drm/xe/xe_svm.c | 153 ++++++++++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 258a94e83e57..6c690ba827e7 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -6,6 +6,7 @@ #include "drm_gpusvm.h" #include "xe_gt_tlb_invalidation.h" +#include "xe_migrate.h" #include "xe_pt.h" #include "xe_svm.h" #include "xe_vm.h" @@ -270,9 +271,161 @@ static void xe_svm_garbage_collector_work_func(struct work_struct *w) up_write(&vm->lock); } +static struct xe_mem_region *page_to_mr(struct page *page) +{ + return container_of(page->pgmap, struct xe_mem_region, pagemap); +} + +static struct xe_tile *mr_to_tile(struct xe_mem_region *mr) +{ + return container_of(mr, struct xe_tile, mem.vram); +} + +static u64 xe_mem_region_page_to_dpa(struct xe_mem_region *mr, + struct page *page) +{ + u64 dpa; + struct xe_tile *tile = mr_to_tile(mr); + u64 pfn = page_to_pfn(page); + u64 offset; + + xe_tile_assert(tile, is_device_private_page(page)); + xe_tile_assert(tile, (pfn << PAGE_SHIFT) >= mr->hpa_base); + + offset = (pfn << PAGE_SHIFT) - mr->hpa_base; + dpa = mr->dpa_base + offset; + + return dpa; +} + +enum xe_svm_copy_dir { + XE_SVM_COPY_TO_VRAM, + XE_SVM_COPY_TO_SRAM, +}; + +static int xe_svm_copy(struct drm_gpusvm *gpusvm, struct page **pages, + dma_addr_t *dma_addr, unsigned long npages, + const enum xe_svm_copy_dir dir) +{ + struct xe_vm *vm = gpusvm_to_vm(gpusvm); + struct xe_mem_region *mr = NULL; + struct xe_tile *tile; + struct dma_fence *fence = NULL; + unsigned long i; +#define VRAM_ADDR_INVALID ~0x0ull + u64 vram_addr = VRAM_ADDR_INVALID; + int err = 0, pos = 0; + bool sram = dir == XE_SVM_COPY_TO_SRAM; + + for (i = 0; i < npages; ++i) { + struct page *spage = pages[i]; + struct dma_fence *__fence; + u64 __vram_addr; + bool match = false, chunk, last; + + chunk = (i - pos) == (SZ_2M / PAGE_SIZE); + last = (i + 1) == npages; + + if (!dma_addr[i] && vram_addr == VRAM_ADDR_INVALID) + continue; + + if (!mr) { + mr = page_to_mr(spage); + tile = mr_to_tile(mr); + } + + if (dma_addr[i]) { + __vram_addr = xe_mem_region_page_to_dpa(mr, spage); + if (vram_addr == VRAM_ADDR_INVALID) { + vram_addr = __vram_addr; + pos = i; + } + + xe_assert(vm->xe, __vram_addr != VRAM_ADDR_INVALID); + xe_assert(vm->xe, vram_addr != VRAM_ADDR_INVALID); + + match = vram_addr + PAGE_SIZE * (i - pos) == __vram_addr; + } + + if (!match || chunk || last) { + int incr = (match && last) ? 1 : 0; + + if (vram_addr != VRAM_ADDR_INVALID) { + if (sram) + __fence = xe_migrate_from_vram(tile->migrate, + i - pos + incr, + vram_addr, + dma_addr + pos); + else + __fence = xe_migrate_to_vram(tile->migrate, + i - pos + incr, + dma_addr + pos, + vram_addr); + if (IS_ERR(__fence)) { + err = PTR_ERR(__fence); + goto err_out; + } + + dma_fence_put(fence); + fence = __fence; + } + + if (dma_addr[i]) { + vram_addr = __vram_addr; + pos = i; + } else { + vram_addr = VRAM_ADDR_INVALID; + } + + if (!match && last && dma_addr[i]) { + if (sram) + __fence = xe_migrate_from_vram(tile->migrate, 1, + vram_addr, + dma_addr + pos); + else + __fence = xe_migrate_to_vram(tile->migrate, 1, + dma_addr + pos, + vram_addr); + if (IS_ERR(__fence)) { + err = PTR_ERR(__fence); + goto err_out; + } + + dma_fence_put(fence); + fence = __fence; + } + } + } + +err_out: + if (fence) { + dma_fence_wait(fence, false); + dma_fence_put(fence); + } + + return err; +#undef VRAM_ADDR_INVALID +} + +static int xe_svm_copy_to_vram(struct drm_gpusvm *gpusvm, struct page **pages, + dma_addr_t *dma_addr, unsigned long npages) +{ + return xe_svm_copy(gpusvm, pages, dma_addr, npages, + XE_SVM_COPY_TO_VRAM); +} + +static int xe_svm_copy_to_sram(struct drm_gpusvm *gpusvm, struct page **pages, + dma_addr_t *dma_addr, unsigned long npages) +{ + return xe_svm_copy(gpusvm, pages, dma_addr, npages, + XE_SVM_COPY_TO_SRAM); +} + static const struct drm_gpusvm_ops gpusvm_ops = { .range_alloc = xe_svm_range_alloc, .range_free = xe_svm_range_free, + .copy_to_vram = xe_svm_copy_to_vram, + .copy_to_sram = xe_svm_copy_to_sram, .invalidate = xe_svm_invalidate, }; -- 2.34.1