On Fri, Oct 18, 2024 at 10:08:06AM +0100, Matthew Auld wrote: > On 18/10/2024 00:39, Matthew Brost wrote: > > Non-contiguous VRAM cannot be mapped in Xe nor can non-visible VRAM > > easily be accessed. Add ttm_bo_access, which is similar to > > ttm_bo_vm_access, to access such memory. > > Is the plan to roll this out also to object error capture and clear color > access? Those places seem to be using ttm vmap/kmap which only seems to work > with contiguous VRAM, but in those cases we are mapping userspace objects > which are potentially not contiguous so I assume that stuff is also quite > broken atm? > I quickly sent this out without checking the error capture code, but that seems to be broken but no one is complaining? Seems odd. Let me look into this a bit more before posting a proper series. Will also update the error capture if needed. > > > > Visible VRAM access is only supported at the momement but a follow up > > can add GPU access to non-visible VRAM. > > > > Suggested-by: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx> > > Signed-off-by: Matthew Brost <matthew.brost@xxxxxxxxx> > > --- > > drivers/gpu/drm/ttm/ttm_bo_vm.c | 20 +++++++++----- > > drivers/gpu/drm/xe/xe_bo.c | 48 +++++++++++++++++++++++++++++++++ > > include/drm/ttm/ttm_bo.h | 2 ++ > > 3 files changed, 64 insertions(+), 6 deletions(-) > > > > diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c > > index 2c699ed1963a..b53cc064da44 100644 > > --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c > > +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c > > @@ -405,13 +405,9 @@ static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo, > > return len; > > } > > -int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, > > - void *buf, int len, int write) > > +int ttm_bo_access(struct ttm_buffer_object *bo, unsigned long offset, > > + void *buf, int len, int write) > > { > > - struct ttm_buffer_object *bo = vma->vm_private_data; > > - unsigned long offset = (addr) - vma->vm_start + > > - ((vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node)) > > - << PAGE_SHIFT); > > int ret; > > if (len < 1 || (offset + len) > bo->base.size) > > @@ -439,6 +435,18 @@ int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, > > return ret; > > } > > +EXPORT_SYMBOL(ttm_bo_access); > > + > > +int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, > > + void *buf, int len, int write) > > +{ > > + struct ttm_buffer_object *bo = vma->vm_private_data; > > + unsigned long offset = (addr) - vma->vm_start + > > + ((vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node)) > > + << PAGE_SHIFT); > > + > > + return ttm_bo_access(bo, offset, buf, len, write); > > +} > > EXPORT_SYMBOL(ttm_bo_vm_access); > > static const struct vm_operations_struct ttm_bo_vm_ops = { > > diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c > > index 5b232f2951b1..267f3b03a6d0 100644 > > --- a/drivers/gpu/drm/xe/xe_bo.c > > +++ b/drivers/gpu/drm/xe/xe_bo.c > > @@ -1111,6 +1111,53 @@ static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo) > > } > > } > > +static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo, > > + unsigned long offset, void *buf, int len, > > + int write) > > +{ > > + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); > > + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); > > + struct iosys_map vmap; > > + struct xe_res_cursor cursor; > > + struct xe_mem_region *vram; > > + void __iomem *virtual; > > + int bytes_left = len; > > + > > + xe_bo_assert_held(bo); > > I think we need rpm somewhere? Although bo lock might make this tricky. > Do we? OFC if we interact with hardware we should but also if hardware is powered off a BO shouldn't be in VRAM. Maybe do a get_if_awake and bail otherwise? Matt > > + > > + if (!mem_type_is_vram(ttm_bo->resource->mem_type)) > > + return -EIO; > > + > > + /* FIXME: Use GPU for non-visible VRAM */ > > + if (!(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS)) > > + return -EINVAL; > > + > > + vram = res_to_mem_region(ttm_bo->resource); > > + xe_res_first(ttm_bo->resource, offset & ~PAGE_MASK, 0, &cursor); > > + > > + do { > > + int wcount = PAGE_SIZE - (offset & PAGE_MASK) > bytes_left ? > > + bytes_left : PAGE_SIZE - (offset & PAGE_MASK); > > + > > + virtual = (u8 __force *)vram->mapping + cursor.start; > > + > > + iosys_map_set_vaddr_iomem(&vmap, (void __iomem *)virtual); > > + if (write) > > + xe_map_memcpy_to(xe, &vmap, offset & PAGE_MASK, buf, > > + wcount); > > + else > > + xe_map_memcpy_from(xe, buf, &vmap, offset & PAGE_MASK, > > + wcount); > > + > > + offset += wcount; > > + buf += wcount; > > + bytes_left -= wcount; > > + xe_res_next(&cursor, PAGE_SIZE); > > + } while (bytes_left); > > + > > + return len; > > +} > > + > > const struct ttm_device_funcs xe_ttm_funcs = { > > .ttm_tt_create = xe_ttm_tt_create, > > .ttm_tt_populate = xe_ttm_tt_populate, > > @@ -1120,6 +1167,7 @@ const struct ttm_device_funcs xe_ttm_funcs = { > > .move = xe_bo_move, > > .io_mem_reserve = xe_ttm_io_mem_reserve, > > .io_mem_pfn = xe_ttm_io_mem_pfn, > > + .access_memory = xe_ttm_access_memory, > > .release_notify = xe_ttm_bo_release_notify, > > .eviction_valuable = ttm_bo_eviction_valuable, > > .delete_mem_notify = xe_ttm_bo_delete_mem_notify, > > diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h > > index 5804408815be..8ea11cd8df39 100644 > > --- a/include/drm/ttm/ttm_bo.h > > +++ b/include/drm/ttm/ttm_bo.h > > @@ -421,6 +421,8 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo); > > int ttm_bo_evict_first(struct ttm_device *bdev, > > struct ttm_resource_manager *man, > > struct ttm_operation_ctx *ctx); > > +int ttm_bo_access(struct ttm_buffer_object *bo, unsigned long offset, > > + void *buf, int len, int write); > > vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, > > struct vm_fault *vmf); > > vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,