On Mon, Dec 02, 2024 at 11:13:55AM +0100, Thomas Hellström wrote: > On Tue, 2024-10-15 at 20:25 -0700, Matthew Brost wrote: > > Add GPUSVM devic memory copy vfunc functions and connect to migration > > s/devic/device Yes. > > > > > layer. > > > > v2: > > - Allow NULL device pages in xe_svm_copy > > - Use new drm_gpusvm_devmem_ops > > > > Signed-off-by: Matthew Brost <matthew.brost@xxxxxxxxx> > > --- > > drivers/gpu/drm/xe/xe_svm.c | 150 > > ++++++++++++++++++++++++++++++++++++ > > 1 file changed, 150 insertions(+) > > > > diff --git a/drivers/gpu/drm/xe/xe_svm.c > > b/drivers/gpu/drm/xe/xe_svm.c > > index 22e6341117bd..b33fd42d035b 100644 > > --- a/drivers/gpu/drm/xe/xe_svm.c > > +++ b/drivers/gpu/drm/xe/xe_svm.c > > @@ -6,6 +6,7 @@ > > #include "drm_gpusvm.h" > > > > #include "xe_gt_tlb_invalidation.h" > > +#include "xe_migrate.h" > > #include "xe_pt.h" > > #include "xe_svm.h" > > #include "xe_vm.h" > > @@ -269,6 +270,155 @@ static void > > xe_svm_garbage_collector_work_func(struct work_struct *w) > > up_write(&vm->lock); > > } > > > > +static struct xe_mem_region *page_to_mr(struct page *page) > > +{ > > + return container_of(page->pgmap, struct xe_mem_region, > > pagemap); > > +} > > + > > +static struct xe_tile *mr_to_tile(struct xe_mem_region *mr) > > +{ > > + return container_of(mr, struct xe_tile, mem.vram); > > +} > > + > > +static u64 xe_mem_region_page_to_dpa(struct xe_mem_region *mr, > > + struct page *page) > > +{ > > + u64 dpa; > > + struct xe_tile *tile = mr_to_tile(mr); > > + u64 pfn = page_to_pfn(page); > > + u64 offset; > > + > > + xe_tile_assert(tile, is_device_private_page(page)); > > + xe_tile_assert(tile, (pfn << PAGE_SHIFT) >= mr->hpa_base); > > + > > + offset = (pfn << PAGE_SHIFT) - mr->hpa_base; > > + dpa = mr->dpa_base + offset; > > + > > + return dpa; > > +} > > + > > +enum xe_svm_copy_dir { > > + XE_SVM_COPY_TO_VRAM, > > + XE_SVM_COPY_TO_SRAM, > > +}; > > + > > +static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, > > + unsigned long npages, const enum > > xe_svm_copy_dir dir) > > +{ > > + struct xe_mem_region *mr = NULL; > > + struct xe_tile *tile; > > + struct dma_fence *fence = NULL; > > + unsigned long i; > > +#define VRAM_ADDR_INVALID ~0x0ull > > + u64 vram_addr = VRAM_ADDR_INVALID; > > + int err = 0, pos = 0; > > + bool sram = dir == XE_SVM_COPY_TO_SRAM; > > + > > + for (i = 0; i < npages; ++i) { > > + struct page *spage = pages[i]; > > + struct dma_fence *__fence; > > + u64 __vram_addr; > > + bool match = false, chunk, last; > > + > > + chunk = (i - pos) == (SZ_2M / PAGE_SIZE); > > + last = (i + 1) == npages; > > + > > + if (!dma_addr[i] && vram_addr == VRAM_ADDR_INVALID) > > + continue; > > + > > + if (!mr && spage) { > > + mr = page_to_mr(spage); > > + tile = mr_to_tile(mr); > > + } > > + > > + if (dma_addr[i] && spage) { > > + __vram_addr = xe_mem_region_page_to_dpa(mr, > > spage); > > + if (vram_addr == VRAM_ADDR_INVALID) { > > + vram_addr = __vram_addr; > > + pos = i; > > + } > > + > > + match = vram_addr + PAGE_SIZE * (i - pos) == > > __vram_addr; > > + } > > + > > + if (!match || chunk || last) { > > + int incr = (match && last) ? 1 : 0; > > + > > + if (vram_addr != VRAM_ADDR_INVALID) { > > + if (sram) > > + __fence = > > xe_migrate_from_vram(tile->migrate, > > + > > i - pos + incr, > > + > > vram_addr, > > + > > dma_addr + pos); > > + else > > + __fence = > > xe_migrate_to_vram(tile->migrate, > > + > > i - pos + incr, > > + > > dma_addr + pos, > > + > > vram_addr); > > + if (IS_ERR(__fence)) { > > + err = PTR_ERR(__fence); > > + goto err_out; > > + } > > + > > + dma_fence_put(fence); > > + fence = __fence; > > + } > > + > > + if (dma_addr[i] && spage) { > > + vram_addr = __vram_addr; > > + pos = i; > > + } else { > > + vram_addr = VRAM_ADDR_INVALID; > > + } > > + > > + if (!match && last && dma_addr[i] && spage) > > { > > + if (sram) > > + __fence = > > xe_migrate_from_vram(tile->migrate, 1, > > + > > vram_addr, > > + > > dma_addr + pos); > > + else > > + __fence = > > xe_migrate_to_vram(tile->migrate, 1, > > + > > dma_addr + pos, > > + > > vram_addr); > > + if (IS_ERR(__fence)) { > > + err = PTR_ERR(__fence); > > + goto err_out; > > + } > > + > > + dma_fence_put(fence); > > + fence = __fence; > > + } > > I think the flow in this function is a bit hard to follow. Could it > perhaps be simplified? If not, perhaps add a comment to the function > what it expects from the input arguments and the possible corner cases > that complicates it? > Maybe? It may need to updated to do clears to in the next rev, so let me play around with this. At minimum can add comments + kernel doc. Matt > > > + } > > + } > > + > > +err_out: > > + if (fence) { > > + dma_fence_wait(fence, false); > > + dma_fence_put(fence); > > + } > > + > > + return err; > > +#undef VRAM_ADDR_INVALID > > +} > > + > > +static int xe_svm_copy_to_devmem(struct page **pages, dma_addr_t > > *dma_addr, > > + unsigned long npages) > > +{ > > + return xe_svm_copy(pages, dma_addr, npages, > > XE_SVM_COPY_TO_VRAM); > > +} > > + > > +static int xe_svm_copy_to_ram(struct page **pages, dma_addr_t > > *dma_addr, > > + unsigned long npages) > > +{ > > + return xe_svm_copy(pages, dma_addr, npages, > > XE_SVM_COPY_TO_SRAM); > > +} > > + > > +__maybe_unused > > Is this __maybe_unused to be removed in a follow-up patch? If so could > you add a comment stating that? > > > +static const struct drm_gpusvm_devmem_ops gpusvm_devmem_ops = { > > + .copy_to_devmem = xe_svm_copy_to_devmem, > > + .copy_to_ram = xe_svm_copy_to_ram, > > +}; > > + > > static const struct drm_gpusvm_ops gpusvm_ops = { > > .range_alloc = xe_svm_range_alloc, > > .range_free = xe_svm_range_free, > > Thanks, > Thomas > >