On Mon, Mar 03 2025 at 14:00, Jonathan Cavitt wrote: > Add support for userspace to request a list of observed failed pagefaults from > a specified VM. > > v2: > - Only allow querying of failed pagefaults (Matt Brost) > > v3: > - Remove unnecessary size parameter from helper function, as it > is a property of the arguments. (jcavitt) > - Remove unnecessary copy_from_user (Jainxun) > - Set address_precision to 1 (Jainxun) > - Report max size instead of dynamic size for memory allocation > purposes. Total memory usage is reported separately. > > Signed-off-by: Jonathan Cavitt <jonathan.cavitt@xxxxxxxxx> > Suggested-by: Matthew Brost <matthew.brost@xxxxxxxxx> > CC: Jainxun Zhang <jianxun.zhang@xxxxxxxxx> > --- > drivers/gpu/drm/xe/xe_device.c | 3 ++ > drivers/gpu/drm/xe/xe_vm.c | 77 > ++++++++++++++++++++++++++++++++++ > drivers/gpu/drm/xe/xe_vm.h | 2 + > 3 files changed, 82 insertions(+) > > diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c > index 9454b51f7ad8..43accae152ff 100644 > --- a/drivers/gpu/drm/xe/xe_device.c > +++ b/drivers/gpu/drm/xe/xe_device.c > @@ -193,6 +193,9 @@ static const struct drm_ioctl_desc xe_ioctls[] = { > DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, > xe_wait_user_fence_ioctl, > DRM_RENDER_ALLOW), > DRM_IOCTL_DEF_DRV(XE_OBSERVATION, xe_observation_ioctl, > DRM_RENDER_ALLOW), > + DRM_IOCTL_DEF_DRV(XE_VM_GET_PROPERTY, > xe_vm_get_property_ioctl, > + DRM_RENDER_ALLOW), > + > }; > > static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index > 6211b971bbbd..139fcecf56c6 100644 > --- a/drivers/gpu/drm/xe/xe_vm.c > +++ b/drivers/gpu/drm/xe/xe_vm.c > @@ -3234,6 +3234,83 @@ int xe_vm_bind_ioctl(struct drm_device *dev, > void *data, struct drm_file *file) > return err; > } > > +static u32 xe_vm_get_property_size(struct xe_vm *vm, u32 property) { > + switch (property) { > + case DRM_XE_VM_GET_PROPERTY_FAULTS: > + return MAX_PFS * sizeof(struct drm_xe_pf); > + default: > + return -EINVAL; Since this is a negative value, should we change the return value to be int instead of u32? Shuicheng > + } > +} > + > +static int fill_property_pfs(struct xe_vm *vm, > + struct drm_xe_vm_get_property *args) { > + struct drm_xe_pf __user *usr_ptr = u64_to_user_ptr(args->ptr); > + struct drm_xe_pf *fault_list; > + struct drm_xe_pf *fault; > + struct xe_vm_pf_entry *entry; > + u32 size = args->size; > + int i = 0; > + > + fault_list = kzalloc(size, GFP_KERNEL); > + if (!fault_list) > + return -ENOMEM; > + > + spin_lock(&vm->pfs.lock); > + list_for_each_entry(entry, &vm->pfs.list, list) { > + struct xe_pagefault *pf = entry->pf; > + > + fault = &fault_list[i++]; > + fault->address = pf->page_addr; > + fault->address_type = pf->address_type; > + fault->address_precision = 1; > + } > + args->value = vm->pfs.len; > + spin_unlock(&vm->pfs.lock); > + > + if (copy_to_user(usr_ptr, &fault_list, size)) > + return -EFAULT; If return here, there will be memory leak of the fault_list. Shuicheng > + kfree(fault_list); > + > + return 0; > +} > + > +int xe_vm_get_property_ioctl(struct drm_device *drm, void *data, > + struct drm_file *file) > +{ > + struct xe_device *xe = to_xe_device(drm); > + struct xe_file *xef = to_xe_file(file); > + struct drm_xe_vm_get_property *args = data; > + struct xe_vm *vm; > + u32 size; > + > + if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) > + return -EINVAL; > + > + vm = xe_vm_lookup(xef, args->vm_id); > + if (XE_IOCTL_DBG(xe, !vm)) > + return -ENOENT; > + > + size = xe_vm_get_property_size(vm, args->property); > + if (size < 0) { > + return size; > + } else if (args->size != size) { > + if (args->size) > + return -EINVAL; > + args->size = size; > + return 0; > + } > + > + switch (args->property) { > + case DRM_XE_VM_GET_PROPERTY_FAULTS: > + return fill_property_pfs(vm, args); > + default: > + return -EINVAL; > + } > +} > + > /** > * xe_vm_bind_kernel_bo - bind a kernel BO to a VM > * @vm: VM to bind the BO to > diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index > 4d94ab5c8ea4..bf6604465aa3 100644 > --- a/drivers/gpu/drm/xe/xe_vm.h > +++ b/drivers/gpu/drm/xe/xe_vm.h > @@ -184,6 +184,8 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void > *data, > struct drm_file *file); > int xe_vm_bind_ioctl(struct drm_device *dev, void *data, > struct drm_file *file); > +int xe_vm_get_property_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file); > > void xe_vm_close_and_put(struct xe_vm *vm); > > -- > 2.43.0