Debugger needs access to the client's vm to read and write. For example inspecting ISA/ELF and setting up breakpoints. Add ioctl to open target vm with debugger client and vm_handle and hook up pread/pwrite possibility. Open will take timeout argument so that standard fsync can be used for explicit flushing between cpu/gpu for the target vm. Implement this for bo backed storage. userptr will be done in following patch. v2: - checkpatch (Maciej) - 32bit fixes (Andrzej) - bo_vmap (Mika) - fix vm leak if can't allocate k_buffer (Mika) - assert vm write held for vma (Matthew) v3: - fw ref, ttm_bo_access - timeout boundary check (Dominik) - dont try to copy to user on zero bytes (Mika) Cc: Matthew Brost <matthew.brost@xxxxxxxxx> Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 24 ++ drivers/gpu/drm/xe/xe_eudebug.c | 442 +++++++++++++++++++++++++++ include/uapi/drm/xe_drm_eudebug.h | 19 ++ 3 files changed, 485 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 5fcf06835ef0..4c620f95b466 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -551,6 +551,30 @@ #define CCS_MODE_CSLICE(cslice, ccs) \ ((ccs) << ((cslice) * CCS_MODE_CSLICE_WIDTH)) +#define RCU_ASYNC_FLUSH XE_REG(0x149fc) +#define RCU_ASYNC_FLUSH_IN_PROGRESS REG_BIT(31) +#define RCU_ASYNC_FLUSH_ENGINE_ID_SHIFT 28 +#define RCU_ASYNC_FLUSH_ENGINE_ID_DECODE1 REG_BIT(26) +#define RCU_ASYNC_FLUSH_AMFS REG_BIT(8) +#define RCU_ASYNC_FLUSH_PREFETCH REG_BIT(7) +#define RCU_ASYNC_FLUSH_DATA_PORT REG_BIT(6) +#define RCU_ASYNC_FLUSH_DATA_CACHE REG_BIT(5) +#define RCU_ASYNC_FLUSH_HDC_PIPELINE REG_BIT(4) +#define RCU_ASYNC_INVALIDATE_HDC_PIPELINE REG_BIT(3) +#define RCU_ASYNC_INVALIDATE_CONSTANT_CACHE REG_BIT(2) +#define RCU_ASYNC_INVALIDATE_TEXTURE_CACHE REG_BIT(1) +#define RCU_ASYNC_INVALIDATE_INSTRUCTION_CACHE REG_BIT(0) +#define RCU_ASYNC_FLUSH_AND_INVALIDATE_ALL ( \ + RCU_ASYNC_FLUSH_AMFS | \ + RCU_ASYNC_FLUSH_PREFETCH | \ + RCU_ASYNC_FLUSH_DATA_PORT | \ + RCU_ASYNC_FLUSH_DATA_CACHE | \ + RCU_ASYNC_FLUSH_HDC_PIPELINE | \ + RCU_ASYNC_INVALIDATE_HDC_PIPELINE | \ + RCU_ASYNC_INVALIDATE_CONSTANT_CACHE | \ + RCU_ASYNC_INVALIDATE_TEXTURE_CACHE | \ + RCU_ASYNC_INVALIDATE_INSTRUCTION_CACHE) + #define RCU_DEBUG_1 XE_REG(0x14a00) #define RCU_DEBUG_1_ENGINE_STATUS REG_GENMASK(2, 0) #define RCU_DEBUG_1_RUNALONE_ACTIVE REG_BIT(2) diff --git a/drivers/gpu/drm/xe/xe_eudebug.c b/drivers/gpu/drm/xe/xe_eudebug.c index 3cf3616e546d..9d87df75348b 100644 --- a/drivers/gpu/drm/xe/xe_eudebug.c +++ b/drivers/gpu/drm/xe/xe_eudebug.c @@ -5,9 +5,12 @@ #include <linux/anon_inodes.h> #include <linux/delay.h> +#include <linux/file.h> #include <linux/poll.h> #include <linux/uaccess.h> +#include <linux/vmalloc.h> +#include <drm/drm_drv.h> #include <drm/drm_managed.h> #include <generated/xe_wa_oob.h> @@ -16,6 +19,7 @@ #include "regs/xe_engine_regs.h" #include "xe_assert.h" +#include "xe_bo.h" #include "xe_device.h" #include "xe_eudebug.h" #include "xe_eudebug_types.h" @@ -1222,6 +1226,8 @@ static long xe_eudebug_eu_control(struct xe_eudebug *d, const u64 arg) return ret; } +static long xe_eudebug_vm_open_ioctl(struct xe_eudebug *d, unsigned long arg); + static long xe_eudebug_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -1246,6 +1252,11 @@ static long xe_eudebug_ioctl(struct file *file, ret = xe_eudebug_ack_event_ioctl(d, cmd, arg); eu_dbg(d, "ioctl cmd=EVENT_ACK ret=%ld\n", ret); break; + case DRM_XE_EUDEBUG_IOCTL_VM_OPEN: + ret = xe_eudebug_vm_open_ioctl(d, arg); + eu_dbg(d, "ioctl cmd=VM_OPEN ret=%ld\n", ret); + break; + default: ret = -EINVAL; } @@ -3038,3 +3049,434 @@ void xe_eudebug_ufence_fini(struct xe_user_fence *ufence) xe_eudebug_put(ufence->eudebug.debugger); ufence->eudebug.debugger = NULL; } + +static int xe_eudebug_vma_access(struct xe_vma *vma, u64 offset_in_vma, + void *buf, u64 len, bool write) +{ + struct xe_bo *bo; + u64 bytes; + + lockdep_assert_held_write(&xe_vma_vm(vma)->lock); + + if (XE_WARN_ON(offset_in_vma >= xe_vma_size(vma))) + return -EINVAL; + + bytes = min_t(u64, len, xe_vma_size(vma) - offset_in_vma); + if (!bytes) + return 0; + + bo = xe_bo_get(xe_vma_bo(vma)); + if (bo) { + int ret; + + ret = ttm_bo_access(&bo->ttm, offset_in_vma, buf, bytes, write); + + xe_bo_put(bo); + + return ret; + } + + return -EINVAL; +} + +static int xe_eudebug_vm_access(struct xe_vm *vm, u64 offset, + void *buf, u64 len, bool write) +{ + struct xe_vma *vma; + int ret; + + down_write(&vm->lock); + + vma = xe_vm_find_overlapping_vma(vm, offset, len); + if (vma) { + /* XXX: why find overlapping returns below start? */ + if (offset < xe_vma_start(vma) || + offset >= (xe_vma_start(vma) + xe_vma_size(vma))) { + ret = -EINVAL; + goto out; + } + + /* Offset into vma */ + offset -= xe_vma_start(vma); + ret = xe_eudebug_vma_access(vma, offset, buf, len, write); + } else { + ret = -EINVAL; + } + +out: + up_write(&vm->lock); + + return ret; +} + +struct vm_file { + struct xe_eudebug *debugger; + struct xe_file *xef; + struct xe_vm *vm; + u64 flags; + u64 client_id; + u64 vm_handle; + unsigned int timeout_us; +}; + +static ssize_t __vm_read_write(struct xe_vm *vm, + void *bb, + char __user *r_buffer, + const char __user *w_buffer, + unsigned long offset, + unsigned long len, + const bool write) +{ + ssize_t ret; + + if (!len) + return 0; + + if (write) { + ret = copy_from_user(bb, w_buffer, len); + if (ret) + return -EFAULT; + + ret = xe_eudebug_vm_access(vm, offset, bb, len, true); + if (ret <= 0) + return ret; + + len = ret; + } else { + ret = xe_eudebug_vm_access(vm, offset, bb, len, false); + if (ret <= 0) + return ret; + + len = ret; + + ret = copy_to_user(r_buffer, bb, len); + if (ret) + return -EFAULT; + } + + return len; +} + +static struct xe_vm *find_vm_get(struct xe_eudebug *d, const u32 id) +{ + struct xe_vm *vm; + + mutex_lock(&d->res->lock); + vm = find_resource__unlocked(d->res, XE_EUDEBUG_RES_TYPE_VM, id); + if (vm) + xe_vm_get(vm); + + mutex_unlock(&d->res->lock); + + return vm; +} + +static ssize_t __xe_eudebug_vm_access(struct file *file, + char __user *r_buffer, + const char __user *w_buffer, + size_t count, loff_t *__pos) +{ + struct vm_file *vmf = file->private_data; + struct xe_eudebug * const d = vmf->debugger; + struct xe_device * const xe = d->xe; + const bool write = !!w_buffer; + struct xe_vm *vm; + ssize_t copied = 0; + ssize_t bytes_left = count; + ssize_t ret; + unsigned long alloc_len; + loff_t pos = *__pos; + void *k_buffer; + + if (XE_IOCTL_DBG(xe, write && r_buffer)) + return -EINVAL; + + vm = find_vm_get(d, vmf->vm_handle); + if (XE_IOCTL_DBG(xe, !vm)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, vm != vmf->vm)) { + eu_warn(d, "vm_access(%s): vm handle mismatch client_handle=%llu, vm_handle=%llu, flags=0x%llx, pos=%llu, count=%zu\n", + write ? "write" : "read", + vmf->client_id, vmf->vm_handle, vmf->flags, pos, count); + xe_vm_put(vm); + return -EINVAL; + } + + if (!count) { + xe_vm_put(vm); + return 0; + } + + alloc_len = min_t(unsigned long, ALIGN(count, PAGE_SIZE), 64 * SZ_1M); + do { + k_buffer = vmalloc(alloc_len); + if (k_buffer) + break; + + alloc_len >>= 1; + } while (alloc_len > PAGE_SIZE); + + if (XE_IOCTL_DBG(xe, !k_buffer)) { + xe_vm_put(vm); + return -ENOMEM; + } + + do { + const ssize_t len = min_t(ssize_t, bytes_left, alloc_len); + + ret = __vm_read_write(vm, k_buffer, + write ? NULL : r_buffer + copied, + write ? w_buffer + copied : NULL, + pos + copied, + len, + write); + if (ret <= 0) + break; + + bytes_left -= ret; + copied += ret; + } while (bytes_left > 0); + + vfree(k_buffer); + xe_vm_put(vm); + + if (XE_WARN_ON(copied < 0)) + copied = 0; + + *__pos += copied; + + return copied ?: ret; +} + +static ssize_t xe_eudebug_vm_read(struct file *file, + char __user *buffer, + size_t count, loff_t *pos) +{ + return __xe_eudebug_vm_access(file, buffer, NULL, count, pos); +} + +static ssize_t xe_eudebug_vm_write(struct file *file, + const char __user *buffer, + size_t count, loff_t *pos) +{ + return __xe_eudebug_vm_access(file, NULL, buffer, count, pos); +} + +static int engine_rcu_flush(struct xe_eudebug *d, + struct xe_hw_engine *hwe, + unsigned int timeout_us) +{ + const struct xe_reg psmi_addr = RING_PSMI_CTL(hwe->mmio_base); + struct xe_gt *gt = hwe->gt; + unsigned int fw_ref; + u32 mask = RCU_ASYNC_FLUSH_AND_INVALIDATE_ALL; + u32 psmi_ctrl; + u32 id; + int ret; + + if (hwe->class == XE_ENGINE_CLASS_RENDER) + id = 0; + else if (hwe->class == XE_ENGINE_CLASS_COMPUTE) + id = hwe->instance + 1; + else + return -EINVAL; + + if (id < 8) + mask |= id << RCU_ASYNC_FLUSH_ENGINE_ID_SHIFT; + else + mask |= (id - 8) << RCU_ASYNC_FLUSH_ENGINE_ID_SHIFT | + RCU_ASYNC_FLUSH_ENGINE_ID_DECODE1; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), hwe->domain); + if (!fw_ref) + return -ETIMEDOUT; + + /* Prevent concurrent flushes */ + mutex_lock(&d->eu_lock); + psmi_ctrl = xe_mmio_read32(>->mmio, psmi_addr); + if (!(psmi_ctrl & IDLE_MSG_DISABLE)) + xe_mmio_write32(>->mmio, psmi_addr, _MASKED_BIT_ENABLE(IDLE_MSG_DISABLE)); + + /* XXX: Timeout is per operation but in here we flush previous */ + ret = xe_mmio_wait32(>->mmio, RCU_ASYNC_FLUSH, + RCU_ASYNC_FLUSH_IN_PROGRESS, 0, + timeout_us, NULL, false); + if (ret) + goto out; + + xe_mmio_write32(>->mmio, RCU_ASYNC_FLUSH, mask); + + ret = xe_mmio_wait32(>->mmio, RCU_ASYNC_FLUSH, + RCU_ASYNC_FLUSH_IN_PROGRESS, 0, + timeout_us, NULL, false); +out: + if (!(psmi_ctrl & IDLE_MSG_DISABLE)) + xe_mmio_write32(>->mmio, psmi_addr, _MASKED_BIT_DISABLE(IDLE_MSG_DISABLE)); + + mutex_unlock(&d->eu_lock); + xe_force_wake_put(gt_to_fw(gt), fw_ref); + + return ret; +} + +static int xe_eudebug_vm_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ + struct vm_file *vmf = file->private_data; + struct xe_eudebug *d = vmf->debugger; + struct xe_gt *gt; + int gt_id; + int ret = -EINVAL; + + eu_dbg(d, "vm_fsync: client_handle=%llu, vm_handle=%llu, flags=0x%llx, start=%llu, end=%llu datasync=%d\n", + vmf->client_id, vmf->vm_handle, vmf->flags, start, end, datasync); + + for_each_gt(gt, d->xe, gt_id) { + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + /* XXX: vm open per engine? */ + for_each_hw_engine(hwe, gt, id) { + if (hwe->class != XE_ENGINE_CLASS_RENDER && + hwe->class != XE_ENGINE_CLASS_COMPUTE) + continue; + + ret = engine_rcu_flush(d, hwe, vmf->timeout_us); + if (ret) + break; + } + } + + return ret; +} + +static int xe_eudebug_vm_release(struct inode *inode, struct file *file) +{ + struct vm_file *vmf = file->private_data; + struct xe_eudebug *d = vmf->debugger; + + eu_dbg(d, "vm_release: client_handle=%llu, vm_handle=%llu, flags=0x%llx", + vmf->client_id, vmf->vm_handle, vmf->flags); + + xe_vm_put(vmf->vm); + xe_file_put(vmf->xef); + xe_eudebug_put(d); + drm_dev_put(&d->xe->drm); + + kfree(vmf); + + return 0; +} + +static const struct file_operations vm_fops = { + .owner = THIS_MODULE, + .llseek = generic_file_llseek, + .read = xe_eudebug_vm_read, + .write = xe_eudebug_vm_write, + .fsync = xe_eudebug_vm_fsync, + .mmap = NULL, + .release = xe_eudebug_vm_release, +}; + +static long +xe_eudebug_vm_open_ioctl(struct xe_eudebug *d, unsigned long arg) +{ + const u64 max_timeout_ns = DRM_XE_EUDEBUG_VM_SYNC_MAX_TIMEOUT_NSECS; + struct drm_xe_eudebug_vm_open param; + struct xe_device * const xe = d->xe; + struct vm_file *vmf = NULL; + struct xe_file *xef; + struct xe_vm *vm; + struct file *file; + long ret = 0; + int fd; + + if (XE_IOCTL_DBG(xe, _IOC_SIZE(DRM_XE_EUDEBUG_IOCTL_VM_OPEN) != sizeof(param))) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, !(_IOC_DIR(DRM_XE_EUDEBUG_IOCTL_VM_OPEN) & _IOC_WRITE))) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, copy_from_user(¶m, (void __user *)arg, sizeof(param)))) + return -EFAULT; + + if (XE_IOCTL_DBG(xe, param.flags)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, param.timeout_ns > max_timeout_ns)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, xe_eudebug_detached(d))) + return -ENOTCONN; + + xef = find_client_get(d, param.client_handle); + if (xef) + vm = find_vm_get(d, param.vm_handle); + else + vm = NULL; + + if (XE_IOCTL_DBG(xe, !xef)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, !vm)) { + ret = -EINVAL; + goto out_file_put; + } + + vmf = kzalloc(sizeof(*vmf), GFP_KERNEL); + if (XE_IOCTL_DBG(xe, !vmf)) { + ret = -ENOMEM; + goto out_vm_put; + } + + fd = get_unused_fd_flags(O_CLOEXEC); + if (XE_IOCTL_DBG(xe, fd < 0)) { + ret = fd; + goto out_free; + } + + kref_get(&d->ref); + vmf->debugger = d; + vmf->vm = vm; + vmf->xef = xef; + vmf->flags = param.flags; + vmf->client_id = param.client_handle; + vmf->vm_handle = param.vm_handle; + vmf->timeout_us = div64_u64(param.timeout_ns, 1000ull); + + file = anon_inode_getfile("[xe_eudebug.vm]", &vm_fops, vmf, O_RDWR); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + XE_IOCTL_DBG(xe, ret); + file = NULL; + goto out_fd_put; + } + + file->f_mode |= FMODE_PREAD | FMODE_PWRITE | + FMODE_READ | FMODE_WRITE | FMODE_LSEEK; + + fd_install(fd, file); + + eu_dbg(d, "vm_open: client_handle=%llu, handle=%llu, flags=0x%llx, fd=%d", + vmf->client_id, vmf->vm_handle, vmf->flags, fd); + + XE_WARN_ON(ret); + + drm_dev_get(&xe->drm); + + return fd; + +out_fd_put: + put_unused_fd(fd); + xe_eudebug_put(d); +out_free: + kfree(vmf); +out_vm_put: + xe_vm_put(vm); +out_file_put: + xe_file_put(xef); + + XE_WARN_ON(ret >= 0); + + return ret; +} diff --git a/include/uapi/drm/xe_drm_eudebug.h b/include/uapi/drm/xe_drm_eudebug.h index 1d5f1411c9a8..a5f13563b3b9 100644 --- a/include/uapi/drm/xe_drm_eudebug.h +++ b/include/uapi/drm/xe_drm_eudebug.h @@ -18,6 +18,7 @@ extern "C" { #define DRM_XE_EUDEBUG_IOCTL_READ_EVENT _IO('j', 0x0) #define DRM_XE_EUDEBUG_IOCTL_EU_CONTROL _IOWR('j', 0x2, struct drm_xe_eudebug_eu_control) #define DRM_XE_EUDEBUG_IOCTL_ACK_EVENT _IOW('j', 0x4, struct drm_xe_eudebug_ack_event) +#define DRM_XE_EUDEBUG_IOCTL_VM_OPEN _IOW('j', 0x1, struct drm_xe_eudebug_vm_open) /* XXX: Document events to match their internal counterparts when moved to xe_drm.h */ struct drm_xe_eudebug_event { @@ -187,6 +188,24 @@ struct drm_xe_eudebug_ack_event { __u64 seqno; }; +struct drm_xe_eudebug_vm_open { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @client_handle: id of client */ + __u64 client_handle; + + /** @vm_handle: id of vm */ + __u64 vm_handle; + + /** @flags: flags */ + __u64 flags; + +#define DRM_XE_EUDEBUG_VM_SYNC_MAX_TIMEOUT_NSECS (10ULL * NSEC_PER_SEC) + /** @timeout_ns: Timeout value in nanoseconds operations (fsync) */ + __u64 timeout_ns; +}; + #if defined(__cplusplus) } #endif -- 2.43.0