On Tue, 2024-10-15 at 20:25 -0700, Matthew Brost wrote: > Add GPUSVM devic memory copy vfunc functions and connect to migration s/devic/device > > layer. > > v2: > - Allow NULL device pages in xe_svm_copy > - Use new drm_gpusvm_devmem_ops > > Signed-off-by: Matthew Brost <matthew.brost@xxxxxxxxx> > --- > drivers/gpu/drm/xe/xe_svm.c | 150 > ++++++++++++++++++++++++++++++++++++ > 1 file changed, 150 insertions(+) > > diff --git a/drivers/gpu/drm/xe/xe_svm.c > b/drivers/gpu/drm/xe/xe_svm.c > index 22e6341117bd..b33fd42d035b 100644 > --- a/drivers/gpu/drm/xe/xe_svm.c > +++ b/drivers/gpu/drm/xe/xe_svm.c > @@ -6,6 +6,7 @@ > #include "drm_gpusvm.h" > > #include "xe_gt_tlb_invalidation.h" > +#include "xe_migrate.h" > #include "xe_pt.h" > #include "xe_svm.h" > #include "xe_vm.h" > @@ -269,6 +270,155 @@ static void > xe_svm_garbage_collector_work_func(struct work_struct *w) > up_write(&vm->lock); > } > > +static struct xe_mem_region *page_to_mr(struct page *page) > +{ > + return container_of(page->pgmap, struct xe_mem_region, > pagemap); > +} > + > +static struct xe_tile *mr_to_tile(struct xe_mem_region *mr) > +{ > + return container_of(mr, struct xe_tile, mem.vram); > +} > + > +static u64 xe_mem_region_page_to_dpa(struct xe_mem_region *mr, > + struct page *page) > +{ > + u64 dpa; > + struct xe_tile *tile = mr_to_tile(mr); > + u64 pfn = page_to_pfn(page); > + u64 offset; > + > + xe_tile_assert(tile, is_device_private_page(page)); > + xe_tile_assert(tile, (pfn << PAGE_SHIFT) >= mr->hpa_base); > + > + offset = (pfn << PAGE_SHIFT) - mr->hpa_base; > + dpa = mr->dpa_base + offset; > + > + return dpa; > +} > + > +enum xe_svm_copy_dir { > + XE_SVM_COPY_TO_VRAM, > + XE_SVM_COPY_TO_SRAM, > +}; > + > +static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, > + unsigned long npages, const enum > xe_svm_copy_dir dir) > +{ > + struct xe_mem_region *mr = NULL; > + struct xe_tile *tile; > + struct dma_fence *fence = NULL; > + unsigned long i; > +#define VRAM_ADDR_INVALID ~0x0ull > + u64 vram_addr = VRAM_ADDR_INVALID; > + int err = 0, pos = 0; > + bool sram = dir == XE_SVM_COPY_TO_SRAM; > + > + for (i = 0; i < npages; ++i) { > + struct page *spage = pages[i]; > + struct dma_fence *__fence; > + u64 __vram_addr; > + bool match = false, chunk, last; > + > + chunk = (i - pos) == (SZ_2M / PAGE_SIZE); > + last = (i + 1) == npages; > + > + if (!dma_addr[i] && vram_addr == VRAM_ADDR_INVALID) > + continue; > + > + if (!mr && spage) { > + mr = page_to_mr(spage); > + tile = mr_to_tile(mr); > + } > + > + if (dma_addr[i] && spage) { > + __vram_addr = xe_mem_region_page_to_dpa(mr, > spage); > + if (vram_addr == VRAM_ADDR_INVALID) { > + vram_addr = __vram_addr; > + pos = i; > + } > + > + match = vram_addr + PAGE_SIZE * (i - pos) == > __vram_addr; > + } > + > + if (!match || chunk || last) { > + int incr = (match && last) ? 1 : 0; > + > + if (vram_addr != VRAM_ADDR_INVALID) { > + if (sram) > + __fence = > xe_migrate_from_vram(tile->migrate, > + > i - pos + incr, > + > vram_addr, > + > dma_addr + pos); > + else > + __fence = > xe_migrate_to_vram(tile->migrate, > + > i - pos + incr, > + > dma_addr + pos, > + > vram_addr); > + if (IS_ERR(__fence)) { > + err = PTR_ERR(__fence); > + goto err_out; > + } > + > + dma_fence_put(fence); > + fence = __fence; > + } > + > + if (dma_addr[i] && spage) { > + vram_addr = __vram_addr; > + pos = i; > + } else { > + vram_addr = VRAM_ADDR_INVALID; > + } > + > + if (!match && last && dma_addr[i] && spage) > { > + if (sram) > + __fence = > xe_migrate_from_vram(tile->migrate, 1, > + > vram_addr, > + > dma_addr + pos); > + else > + __fence = > xe_migrate_to_vram(tile->migrate, 1, > + > dma_addr + pos, > + > vram_addr); > + if (IS_ERR(__fence)) { > + err = PTR_ERR(__fence); > + goto err_out; > + } > + > + dma_fence_put(fence); > + fence = __fence; > + } I think the flow in this function is a bit hard to follow. Could it perhaps be simplified? If not, perhaps add a comment to the function what it expects from the input arguments and the possible corner cases that complicates it? > + } > + } > + > +err_out: > + if (fence) { > + dma_fence_wait(fence, false); > + dma_fence_put(fence); > + } > + > + return err; > +#undef VRAM_ADDR_INVALID > +} > + > +static int xe_svm_copy_to_devmem(struct page **pages, dma_addr_t > *dma_addr, > + unsigned long npages) > +{ > + return xe_svm_copy(pages, dma_addr, npages, > XE_SVM_COPY_TO_VRAM); > +} > + > +static int xe_svm_copy_to_ram(struct page **pages, dma_addr_t > *dma_addr, > + unsigned long npages) > +{ > + return xe_svm_copy(pages, dma_addr, npages, > XE_SVM_COPY_TO_SRAM); > +} > + > +__maybe_unused Is this __maybe_unused to be removed in a follow-up patch? If so could you add a comment stating that? > +static const struct drm_gpusvm_devmem_ops gpusvm_devmem_ops = { > + .copy_to_devmem = xe_svm_copy_to_devmem, > + .copy_to_ram = xe_svm_copy_to_ram, > +}; > + > static const struct drm_gpusvm_ops gpusvm_ops = { > .range_alloc = xe_svm_range_alloc, > .range_free = xe_svm_range_free, Thanks, Thomas