From: Marek Olšák <marek.olsak@xxxxxxx> v2: update amdgpu-symbol-check --- amdgpu/amdgpu-symbol-check | 1 + amdgpu/amdgpu.h | 14 ++++++++++++++ amdgpu/amdgpu_cs.c | 22 ++++++++++++++++++++++ include/drm/amdgpu_drm.h | 21 +++++++++++++++++++++ 4 files changed, 58 insertions(+) diff --git a/amdgpu/amdgpu-symbol-check b/amdgpu/amdgpu-symbol-check index 7ecfc98..d9f89ef 100755 --- a/amdgpu/amdgpu-symbol-check +++ b/amdgpu/amdgpu-symbol-check @@ -27,20 +27,21 @@ amdgpu_bo_wait_for_idle amdgpu_create_bo_from_user_mem amdgpu_cs_chunk_fence_info_to_data amdgpu_cs_chunk_fence_to_dep amdgpu_cs_create_semaphore amdgpu_cs_create_syncobj amdgpu_cs_ctx_create amdgpu_cs_ctx_free amdgpu_cs_destroy_semaphore amdgpu_cs_destroy_syncobj amdgpu_cs_export_syncobj +amdgpu_cs_fence_to_handle amdgpu_cs_import_syncobj amdgpu_cs_query_fence_status amdgpu_cs_query_reset_state amdgpu_cs_signal_semaphore amdgpu_cs_submit amdgpu_cs_submit_raw amdgpu_cs_syncobj_export_sync_file amdgpu_cs_syncobj_import_sync_file amdgpu_cs_syncobj_wait amdgpu_cs_wait_fences diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h index 979acfc..23cde10 100644 --- a/amdgpu/amdgpu.h +++ b/amdgpu/amdgpu.h @@ -1426,20 +1426,34 @@ int amdgpu_cs_syncobj_export_sync_file(amdgpu_device_handle dev, * * \return 0 on success\n * <0 - Negative POSIX Error code * */ int amdgpu_cs_syncobj_import_sync_file(amdgpu_device_handle dev, uint32_t syncobj, int sync_file_fd); /** + * Export an amdgpu fence as a handle (syncobj or fd). + * + * \param what AMDGPU_FENCE_TO_HANDLE_GET_{SYNCOBJ, FD} + * \param out_handle returned handle + * + * \return 0 on success\n + * <0 - Negative POSIX Error code + */ +int amdgpu_cs_fence_to_handle(amdgpu_device_handle dev, + struct amdgpu_cs_fence *fence, + uint32_t what, + uint32_t *out_handle); + +/** * Submit raw command submission to kernel * * \param dev - \c [in] device handle * \param context - \c [in] context handle for context id * \param bo_list_handle - \c [in] request bo list handle (0 for none) * \param num_chunks - \c [in] number of CS chunks to submit * \param chunks - \c [in] array of CS chunks * \param seq_no - \c [out] output sequence number for submission. * * \return 0 on success\n diff --git a/amdgpu/amdgpu_cs.c b/amdgpu/amdgpu_cs.c index 2cde7bf..9577d5c 100644 --- a/amdgpu/amdgpu_cs.c +++ b/amdgpu/amdgpu_cs.c @@ -706,10 +706,32 @@ void amdgpu_cs_chunk_fence_info_to_data(struct amdgpu_cs_fence_info *fence_info, void amdgpu_cs_chunk_fence_to_dep(struct amdgpu_cs_fence *fence, struct drm_amdgpu_cs_chunk_dep *dep) { dep->ip_type = fence->ip_type; dep->ip_instance = fence->ip_instance; dep->ring = fence->ring; dep->ctx_id = fence->context->id; dep->handle = fence->fence; } + +int amdgpu_cs_fence_to_handle(amdgpu_device_handle dev, + struct amdgpu_cs_fence *fence, + uint32_t what, + uint32_t *out_handle) +{ + union drm_amdgpu_fence_to_handle fth = {0}; + int r; + + fth.in.fence.ctx_id = fence->context->id; + fth.in.fence.ip_type = fence->ip_type; + fth.in.fence.ip_instance = fence->ip_instance; + fth.in.fence.ring = fence->ring; + fth.in.fence.seq_no = fence->fence; + fth.in.what = what; + + r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_FENCE_TO_HANDLE, + &fth, sizeof(fth)); + if (r == 0) + *out_handle = fth.out.handle; + return r; +} diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h index d9aa4a3..00f1b81 100644 --- a/include/drm/amdgpu_drm.h +++ b/include/drm/amdgpu_drm.h @@ -45,55 +45,59 @@ extern "C" { #define DRM_AMDGPU_CS 0x04 #define DRM_AMDGPU_INFO 0x05 #define DRM_AMDGPU_GEM_METADATA 0x06 #define DRM_AMDGPU_GEM_WAIT_IDLE 0x07 #define DRM_AMDGPU_GEM_VA 0x08 #define DRM_AMDGPU_WAIT_CS 0x09 #define DRM_AMDGPU_GEM_OP 0x10 #define DRM_AMDGPU_GEM_USERPTR 0x11 #define DRM_AMDGPU_WAIT_FENCES 0x12 #define DRM_AMDGPU_VM 0x13 +#define DRM_AMDGPU_FENCE_TO_HANDLE 0x15 #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) #define DRM_IOCTL_AMDGPU_CTX DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CTX, union drm_amdgpu_ctx) #define DRM_IOCTL_AMDGPU_BO_LIST DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_BO_LIST, union drm_amdgpu_bo_list) #define DRM_IOCTL_AMDGPU_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CS, union drm_amdgpu_cs) #define DRM_IOCTL_AMDGPU_INFO DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_INFO, struct drm_amdgpu_info) #define DRM_IOCTL_AMDGPU_GEM_METADATA DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_METADATA, struct drm_amdgpu_gem_metadata) #define DRM_IOCTL_AMDGPU_GEM_WAIT_IDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_WAIT_IDLE, union drm_amdgpu_gem_wait_idle) #define DRM_IOCTL_AMDGPU_GEM_VA DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_VA, struct drm_amdgpu_gem_va) #define DRM_IOCTL_AMDGPU_WAIT_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_CS, union drm_amdgpu_wait_cs) #define DRM_IOCTL_AMDGPU_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op) #define DRM_IOCTL_AMDGPU_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr) #define DRM_IOCTL_AMDGPU_WAIT_FENCES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences) #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm) +#define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle) #define AMDGPU_GEM_DOMAIN_CPU 0x1 #define AMDGPU_GEM_DOMAIN_GTT 0x2 #define AMDGPU_GEM_DOMAIN_VRAM 0x4 #define AMDGPU_GEM_DOMAIN_GDS 0x8 #define AMDGPU_GEM_DOMAIN_GWS 0x10 #define AMDGPU_GEM_DOMAIN_OA 0x20 /* Flag that CPU access will be required for the case of VRAM domain */ #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0) /* Flag that CPU access will not work, this VRAM domain is invisible */ #define AMDGPU_GEM_CREATE_NO_CPU_ACCESS (1 << 1) /* Flag that USWC attributes should be used for GTT */ #define AMDGPU_GEM_CREATE_CPU_GTT_USWC (1 << 2) /* Flag that the memory should be in VRAM and cleared */ #define AMDGPU_GEM_CREATE_VRAM_CLEARED (1 << 3) /* Flag that create shadow bo(GTT) while allocating vram bo */ #define AMDGPU_GEM_CREATE_SHADOW (1 << 4) /* Flag that allocating the BO should use linear VRAM */ #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5) +/* Flag that BO is always valid in this VM */ +#define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6) struct drm_amdgpu_gem_create_in { /** the requested memory size */ __u64 bo_size; /** physical start_addr alignment in bytes for some HW requirements */ __u64 alignment; /** the requested memory domains */ __u64 domains; /** allocation flags */ __u64 domain_flags; @@ -506,20 +510,34 @@ struct drm_amdgpu_cs_chunk_dep { struct drm_amdgpu_cs_chunk_fence { __u32 handle; __u32 offset; }; struct drm_amdgpu_cs_chunk_sem { __u32 handle; }; +#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ 0 +#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD 1 +#define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD 2 + +union drm_amdgpu_fence_to_handle { + struct { + struct drm_amdgpu_fence fence; + __u32 what; + } in; + struct { + __u32 handle; + } out; +}; + struct drm_amdgpu_cs_chunk_data { union { struct drm_amdgpu_cs_chunk_ib ib_data; struct drm_amdgpu_cs_chunk_fence fence_data; }; }; /** * Query h/w info: Flag that this is integrated (a.h.a. fusion) GPU * @@ -757,20 +775,21 @@ struct drm_amdgpu_info_device { __u32 pci_rev; __u32 family; __u32 num_shader_engines; __u32 num_shader_arrays_per_engine; /* in KHz */ __u32 gpu_counter_freq; __u64 max_engine_clock; __u64 max_memory_clock; /* cu information */ __u32 cu_active_number; + /* NOTE: cu_ao_mask is INVALID, DON'T use it */ __u32 cu_ao_mask; __u32 cu_bitmap[4][4]; /** Render backend pipe mask. One render backend is CB+DB. */ __u32 enabled_rb_pipes_mask; __u32 num_rb_pipes; __u32 num_hw_gfx_contexts; __u32 _pad; __u64 ids_flags; /** Starting virtual address for UMDs. */ __u64 virtual_address_offset; @@ -811,20 +830,22 @@ struct drm_amdgpu_info_device { __u32 num_cu_per_sh; /* number of tcc blocks*/ __u32 num_tcc_blocks; /* gs vgt table depth*/ __u32 gs_vgt_table_depth; /* gs primitive buffer depth*/ __u32 gs_prim_buffer_depth; /* max gs wavefront per vgt*/ __u32 max_gs_waves_per_vgt; __u32 _pad1; + /* always on cu bitmap */ + __u32 cu_ao_bitmap[4][4]; }; struct drm_amdgpu_info_hw_ip { /** Version of h/w IP */ __u32 hw_ip_version_major; __u32 hw_ip_version_minor; /** Capabilities */ __u64 capabilities_flags; /** command buffer address start alignment*/ __u32 ib_start_alignment; -- 2.7.4