This is the new ioctl wrapper used by the new admgpu driver. It's primarily used by xf86-video-amdgpu and mesa. v2: fix amdgpu_drm.h install v3: Integrate some of the sugestions from Emil: clean up Makefile.am, configure.ac capitalize header guards fix _FILE_OFFSET_BITS with config.h use drm_mmap/drm_munmap Remove unused ARRAY_SIZE macro use shared list implementation use shared math implementation use drmGetNodeTypeFromFd helper v4: remove unused tiling defines Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> --- Makefile.am | 5 + Makefile.sources | 1 + amdgpu/Makefile.am | 53 ++ amdgpu/amdgpu.h | 1276 ++++++++++++++++++++++++++++++++++++++++++++ amdgpu/amdgpu_bo.c | 626 ++++++++++++++++++++++ amdgpu/amdgpu_cs.c | 981 ++++++++++++++++++++++++++++++++++ amdgpu/amdgpu_device.c | 241 +++++++++ amdgpu/amdgpu_gpu_info.c | 275 ++++++++++ amdgpu/amdgpu_internal.h | 208 ++++++++ amdgpu/amdgpu_vamgr.c | 169 ++++++ amdgpu/libdrm_amdgpu.pc.in | 10 + amdgpu/util_hash.c | 382 +++++++++++++ amdgpu/util_hash.h | 99 ++++ amdgpu/util_hash_table.c | 257 +++++++++ amdgpu/util_hash_table.h | 65 +++ configure.ac | 19 + include/drm/amdgpu_drm.h | 580 ++++++++++++++++++++ 17 files changed, 5247 insertions(+) create mode 100644 amdgpu/Makefile.am create mode 100644 amdgpu/amdgpu.h create mode 100644 amdgpu/amdgpu_bo.c create mode 100644 amdgpu/amdgpu_cs.c create mode 100644 amdgpu/amdgpu_device.c create mode 100644 amdgpu/amdgpu_gpu_info.c create mode 100644 amdgpu/amdgpu_internal.h create mode 100644 amdgpu/amdgpu_vamgr.c create mode 100644 amdgpu/libdrm_amdgpu.pc.in create mode 100644 amdgpu/util_hash.c create mode 100644 amdgpu/util_hash.h create mode 100644 amdgpu/util_hash_table.c create mode 100644 amdgpu/util_hash_table.h create mode 100644 include/drm/amdgpu_drm.h diff --git a/Makefile.am b/Makefile.am index 42d3d7f..5defeb2 100644 --- a/Makefile.am +++ b/Makefile.am @@ -57,6 +57,10 @@ if HAVE_RADEON RADEON_SUBDIR = radeon endif +if HAVE_AMDGPU +AMDGPU_SUBDIR = amdgpu +endif + if HAVE_OMAP OMAP_SUBDIR = omap endif @@ -79,6 +83,7 @@ SUBDIRS = \ $(INTEL_SUBDIR) \ $(NOUVEAU_SUBDIR) \ $(RADEON_SUBDIR) \ + $(AMDGPU_SUBDIR) \ $(OMAP_SUBDIR) \ $(EXYNOS_SUBDIR) \ $(FREEDRENO_SUBDIR) \ diff --git a/Makefile.sources b/Makefile.sources index e1d861b..09a03a7 100644 --- a/Makefile.sources +++ b/Makefile.sources @@ -28,6 +28,7 @@ LIBDRM_INCLUDE_H_FILES := \ include/drm/qxl_drm.h \ include/drm/r128_drm.h \ include/drm/radeon_drm.h \ + include/drm/amdgpu_drm.h \ include/drm/savage_drm.h \ include/drm/sis_drm.h \ include/drm/tegra_drm.h \ diff --git a/amdgpu/Makefile.am b/amdgpu/Makefile.am new file mode 100644 index 0000000..4559467 --- /dev/null +++ b/amdgpu/Makefile.am @@ -0,0 +1,53 @@ +# Copyright © 2008 Jérôme Glisse +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# +# Authors: +# Jérôme Glisse <glisse@xxxxxxxxxxxxxxx> + +AM_CFLAGS = \ + $(WARN_CFLAGS) -Wno-switch-enum \ + -I$(top_srcdir) \ + $(PTHREADSTUBS_CFLAGS) \ + -I$(top_srcdir)/include/drm + +libdrm_amdgpu_la_LTLIBRARIES = libdrm_amdgpu.la +libdrm_amdgpu_ladir = $(libdir) +libdrm_amdgpu_la_LDFLAGS = -version-number 1:0:0 -no-undefined +libdrm_amdgpu_la_LIBADD = ../libdrm.la @PTHREADSTUBS_LIBS@ + +libdrm_amdgpu_la_SOURCES = \ + amdgpu_bo.c \ + amdgpu_cs.c \ + amdgpu_device.c \ + amdgpu_gpu_info.c \ + amdgpu_internal.h \ + amdgpu_vamgr.c \ + util_hash.c \ + util_hash.h \ + util_hash_table.c \ + util_hash_table.h + +libdrm_amdgpuincludedir = ${includedir}/libdrm +libdrm_amdgpuinclude_HEADERS = \ + amdgpu.h + +pkgconfigdir = @pkgconfigdir@ +pkgconfig_DATA = libdrm_amdgpu.pc diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h new file mode 100644 index 0000000..11a86ef --- /dev/null +++ b/amdgpu/amdgpu.h @@ -0,0 +1,1276 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * +*/ + +/** + * \file amdgpu.h + * + * Declare public libdrm_amdgpu API + * + * This file define API exposed by libdrm_amdgpu library. + * User wanted to use libdrm_amdgpu functionality must include + * this file. + * + */ +#ifndef _AMDGPU_H_ +#define _AMDGPU_H_ + +#include <stdint.h> +#include <stdbool.h> + +struct drm_amdgpu_info_hw_ip; + +/*--------------------------------------------------------------------------*/ +/* --------------------------- Defines ------------------------------------ */ +/*--------------------------------------------------------------------------*/ + +/** + * Define max. number of Command Buffers (IB) which could be sent to the single + * hardware IP to accommodate CE/DE requirements + * + * \sa amdgpu_cs_ib_info +*/ +#define AMDGPU_CS_MAX_IBS_PER_SUBMIT 4 + +/** + * + */ +#define AMDGPU_TIMEOUT_INFINITE 0xffffffffffffffffull + +/** + * The special flag for GFX submission to identify that this is CE IB + * \sa amdgpu_cs_ib_info +*/ +#define AMDGPU_CS_GFX_IB_CE 0x1 + +/** + * The special flag to mark that this IB will re-used + * by client and should not be automatically return back + * to free pool by libdrm_amdgpu when submission is completed. + * + * \sa amdgpu_cs_ib_info +*/ +#define AMDGPU_CS_REUSE_IB 0x2 + +/** + * The special resource flag for IB submission. + * When VRAM is full, some resources may be moved to GTT to make place + * for other resources which want to be in VRAM. This flag affects the order + * in which resources are moved back to VRAM until there is no space there. + * The resources with the highest priority will be moved first. + * The value can be between 0 and 15, inclusive. + */ +#define AMDGPU_IB_RESOURCE_PRIORITY(x) ((x) & 0xf) + + +/*--------------------------------------------------------------------------*/ +/* ----------------------------- Enums ------------------------------------ */ +/*--------------------------------------------------------------------------*/ + +/** + * Enum describing possible handle types + * + * \sa amdgpu_bo_import, amdgpu_bo_export + * +*/ +enum amdgpu_bo_handle_type { + /** GEM flink name (needs DRM authentication, used by DRI2) */ + amdgpu_bo_handle_type_gem_flink_name = 0, + + /** KMS handle which is used by all driver ioctls */ + amdgpu_bo_handle_type_kms = 1, + + /** DMA-buf fd handle */ + amdgpu_bo_handle_type_dma_buf_fd = 2 +}; + +/** + * Enum describing possible context reset states + * + * \sa amdgpu_cs_query_reset_state() + * +*/ +enum amdgpu_cs_ctx_reset_state { + /** No reset was detected */ + amdgpu_cs_reset_no_error = 0, + + /** Reset/TDR was detected and context caused */ + amdgpu_cs_reset_guilty = 1, + + /** Reset/TDR was detected caused by other context */ + amdgpu_cs_reset_innocent = 2, + + /** Reset TDR was detected by cause of it unknown */ + amdgpu_cs_reset_unknown = 3 +}; + +/** + * For performance reasons and to simplify logic libdrm_amdgpu will handle + * IBs only some pre-defined sizes. + * + * \sa amdgpu_cs_alloc_ib() + */ +enum amdgpu_cs_ib_size { + amdgpu_cs_ib_size_4K = 1, + amdgpu_cs_ib_size_16K = 2, + amdgpu_cs_ib_size_32K = 3, + amdgpu_cs_ib_size_64K = 4, + amdgpu_cs_ib_size_128K = 5 +}; + +/** The number of different IB sizes */ +#define AMDGPU_CS_IB_SIZE_NUM 6 + + +/*--------------------------------------------------------------------------*/ +/* -------------------------- Datatypes ----------------------------------- */ +/*--------------------------------------------------------------------------*/ + +/** + * Define opaque pointer to context associated with fd. + * This context will be returned as the result of + * "initialize" function and should be pass as the first + * parameter to any API call + */ +typedef struct amdgpu_device *amdgpu_device_handle; + +/** + * Define GPU Context type as pointer to opaque structure + * Example of GPU Context is the "rendering" context associated + * with OpenGL context (glCreateContext) + */ +typedef struct amdgpu_context *amdgpu_context_handle; + +/** + * Define handle for amdgpu resources: buffer, GDS, etc. + */ +typedef struct amdgpu_bo *amdgpu_bo_handle; + +/** + * Define handle to be used when dealing with command + * buffers (a.k.a. ibs) + * + */ +typedef struct amdgpu_ib *amdgpu_ib_handle; + + +/*--------------------------------------------------------------------------*/ +/* -------------------------- Structures ---------------------------------- */ +/*--------------------------------------------------------------------------*/ + +/** + * Structure describing memory allocation request + * + * \sa amdgpu_bo_alloc() + * +*/ +struct amdgpu_bo_alloc_request { + /** Allocation request. It must be aligned correctly. */ + uint64_t alloc_size; + + /** + * It may be required to have some specific alignment requirements + * for physical back-up storage (e.g. for displayable surface). + * If 0 there is no special alignment requirement + */ + uint64_t phys_alignment; + + /** + * UMD should specify where to allocate memory and how it + * will be accessed by the CPU. + */ + uint32_t preferred_heap; + + /** Additional flags passed on allocation */ + uint64_t flags; +}; + +/** + * Structure describing memory allocation request + * + * \sa amdgpu_bo_alloc() +*/ +struct amdgpu_bo_alloc_result { + /** Assigned virtual MC Base Address */ + uint64_t virtual_mc_base_address; + + /** Handle of allocated memory to be used by the given process only. */ + amdgpu_bo_handle buf_handle; +}; + +/** + * Special UMD specific information associated with buffer. + * + * It may be need to pass some buffer charactersitic as part + * of buffer sharing. Such information are defined UMD and + * opaque for libdrm_amdgpu as well for kernel driver. + * + * \sa amdgpu_bo_set_metadata(), amdgpu_bo_query_info, + * amdgpu_bo_import(), amdgpu_bo_export + * +*/ +struct amdgpu_bo_metadata { + /** Special flag associated with surface */ + uint64_t flags; + + /** + * ASIC-specific tiling information (also used by DCE). + * The encoding is defined by the AMDGPU_TILING_* definitions. + */ + uint64_t tiling_info; + + /** Size of metadata associated with the buffer, in bytes. */ + uint32_t size_metadata; + + /** UMD specific metadata. Opaque for kernel */ + uint32_t umd_metadata[64]; +}; + +/** + * Structure describing allocated buffer. Client may need + * to query such information as part of 'sharing' buffers mechanism + * + * \sa amdgpu_bo_set_metadata(), amdgpu_bo_query_info(), + * amdgpu_bo_import(), amdgpu_bo_export() +*/ +struct amdgpu_bo_info { + /** Allocated memory size */ + uint64_t alloc_size; + + /** + * It may be required to have some specific alignment requirements + * for physical back-up storage. + */ + uint64_t phys_alignment; + + /** + * Assigned virtual MC Base Address. + * \note This information will be returned only if this buffer was + * allocated in the same process otherwise 0 will be returned. + */ + uint64_t virtual_mc_base_address; + + /** Heap where to allocate memory. */ + uint32_t preferred_heap; + + /** Additional allocation flags. */ + uint64_t alloc_flags; + + /** Metadata associated with buffer if any. */ + struct amdgpu_bo_metadata metadata; +}; + +/** + * Structure with information about "imported" buffer + * + * \sa amdgpu_bo_import() + * + */ +struct amdgpu_bo_import_result { + /** Handle of memory/buffer to use */ + amdgpu_bo_handle buf_handle; + + /** Buffer size */ + uint64_t alloc_size; + + /** Assigned virtual MC Base Address */ + uint64_t virtual_mc_base_address; +}; + + +/** + * + * Structure to describe GDS partitioning information. + * \note OA and GWS resources are asscoiated with GDS partition + * + * \sa amdgpu_gpu_resource_query_gds_info + * +*/ +struct amdgpu_gds_resource_info { + uint32_t gds_gfx_partition_size; + uint32_t compute_partition_size; + uint32_t gds_total_size; + uint32_t gws_per_gfx_partition; + uint32_t gws_per_compute_partition; + uint32_t oa_per_gfx_partition; + uint32_t oa_per_compute_partition; +}; + + + +/** + * Structure describing result of request to allocate GDS + * + * \sa amdgpu_gpu_resource_gds_alloc + * +*/ +struct amdgpu_gds_alloc_info { + /** Handle assigned to gds allocation */ + amdgpu_bo_handle resource_handle; + + /** How much was really allocated */ + uint32_t gds_memory_size; + + /** Number of GWS resources allocated */ + uint32_t gws; + + /** Number of OA resources allocated */ + uint32_t oa; +}; + +/** + * Structure to described allocated command buffer (a.k.a. IB) + * + * \sa amdgpu_cs_alloc_ib() + * +*/ +struct amdgpu_cs_ib_alloc_result { + /** IB allocation handle */ + amdgpu_ib_handle handle; + + /** Assigned GPU VM MC Address of command buffer */ + uint64_t mc_address; + + /** Address to be used for CPU access */ + void *cpu; +}; + +/** + * Structure describing IB + * + * \sa amdgpu_cs_request, amdgpu_cs_submit() + * +*/ +struct amdgpu_cs_ib_info { + /** Special flags */ + uint64_t flags; + + /** Handle of command buffer */ + amdgpu_ib_handle ib_handle; + + /** + * Size of Command Buffer to be submitted. + * - The size is in units of dwords (4 bytes). + * - Must be less or equal to the size of allocated IB + * - Could be 0 + */ + uint32_t size; +}; + +/** + * Structure describing submission request + * + * \note We could have several IBs as packet. e.g. CE, CE, DE case for gfx + * + * \sa amdgpu_cs_submit() +*/ +struct amdgpu_cs_request { + /** Specify flags with additional information */ + uint64_t flags; + + /** Specify HW IP block type to which to send the IB. */ + unsigned ip_type; + + /** IP instance index if there are several IPs of the same type. */ + unsigned ip_instance; + + /** + * Specify ring index of the IP. We could have several rings + * in the same IP. E.g. 0 for SDMA0 and 1 for SDMA1. + */ + uint32_t ring; + + /** + * Specify number of resource handles passed. + * Size of 'handles' array + * + */ + uint32_t number_of_resources; + + /** Array of resources used by submission. */ + amdgpu_bo_handle *resources; + + /** Array of resources flags. This is optional and can be NULL. */ + uint8_t *resource_flags; + + /** Number of IBs to submit in the field ibs. */ + uint32_t number_of_ibs; + + /** + * IBs to submit. Those IBs will be submit together as single entity + */ + struct amdgpu_cs_ib_info *ibs; +}; + +/** + * Structure describing request to check submission state using fence + * + * \sa amdgpu_cs_query_fence_status() + * +*/ +struct amdgpu_cs_query_fence { + + /** In which context IB was sent to execution */ + amdgpu_context_handle context; + + /** Timeout in nanoseconds. */ + uint64_t timeout_ns; + + /** To which HW IP type the fence belongs */ + unsigned ip_type; + + /** IP instance index if there are several IPs of the same type. */ + unsigned ip_instance; + + /** Ring index of the HW IP */ + uint32_t ring; + + /** Flags */ + uint64_t flags; + + /** Specify fence for which we need to check + * submission status.*/ + uint64_t fence; +}; + +/** + * Structure which provide information about GPU VM MC Address space + * alignments requirements + * + * \sa amdgpu_query_buffer_size_alignment + */ +struct amdgpu_buffer_size_alignments { + /** Size alignment requirement for allocation in + * local memory */ + uint64_t size_local; + + /** + * Size alignment requirement for allocation in remote memory + */ + uint64_t size_remote; +}; + + +/** + * Structure which provide information about heap + * + * \sa amdgpu_query_heap_info() + * + */ +struct amdgpu_heap_info { + /** Theoretical max. available memory in the given heap */ + uint64_t heap_size; + + /** + * Number of bytes allocated in the heap. This includes all processes + * and private allocations in the kernel. It changes when new buffers + * are allocated, freed, and moved. It cannot be larger than + * heap_size. + */ + uint64_t heap_usage; + + /** + * Theoretical possible max. size of buffer which + * could be allocated in the given heap + */ + uint64_t max_allocation; +}; + + + +/** + * Describe GPU h/w info needed for UMD correct initialization + * + * \sa amdgpu_query_gpu_info() +*/ +struct amdgpu_gpu_info { + /** Asic id */ + uint32_t asic_id; + /**< Chip revision */ + uint32_t chip_rev; + /** Chip external revision */ + uint32_t chip_external_rev; + /** Family ID */ + uint32_t family_id; + /** Special flags */ + uint64_t ids_flags; + /** max engine clock*/ + uint64_t max_engine_clk; + /** number of shader engines */ + uint32_t num_shader_engines; + /** number of shader arrays per engine */ + uint32_t num_shader_arrays_per_engine; + /** Number of available good shader pipes */ + uint32_t avail_quad_shader_pipes; + /** Max. number of shader pipes.(including good and bad pipes */ + uint32_t max_quad_shader_pipes; + /** Number of parameter cache entries per shader quad pipe */ + uint32_t cache_entries_per_quad_pipe; + /** Number of available graphics context */ + uint32_t num_hw_gfx_contexts; + /** Number of render backend pipes */ + uint32_t rb_pipes; + /** Active render backend pipe number */ + uint32_t active_rb_pipes; + /** Enabled render backend pipe mask */ + uint32_t enabled_rb_pipes_mask; + /** Frequency of GPU Counter */ + uint32_t gpu_counter_freq; + /** CC_RB_BACKEND_DISABLE.BACKEND_DISABLE per SE */ + uint32_t backend_disable[4]; + /** Value of MC_ARB_RAMCFG register*/ + uint32_t mc_arb_ramcfg; + /** Value of GB_ADDR_CONFIG */ + uint32_t gb_addr_cfg; + /** Values of the GB_TILE_MODE0..31 registers */ + uint32_t gb_tile_mode[32]; + /** Values of GB_MACROTILE_MODE0..15 registers */ + uint32_t gb_macro_tile_mode[16]; + /** Value of PA_SC_RASTER_CONFIG register per SE */ + uint32_t pa_sc_raster_cfg[4]; + /** Value of PA_SC_RASTER_CONFIG_1 register per SE */ + uint32_t pa_sc_raster_cfg1[4]; + /* CU info */ + uint32_t cu_active_number; + uint32_t cu_ao_mask; + uint32_t cu_bitmap[4][4]; +}; + + +/*--------------------------------------------------------------------------*/ +/*------------------------- Functions --------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +/* + * Initialization / Cleanup + * +*/ + + +/** + * + * \param fd - \c [in] File descriptor for AMD GPU device + * received previously as the result of + * e.g. drmOpen() call. + * For legacy fd type, the DRI2/DRI3 authentication + * should be done before calling this function. + * \param major_version - \c [out] Major version of library. It is assumed + * that adding new functionality will cause + * increase in major version + * \param minor_version - \c [out] Minor version of library + * \param device_handle - \c [out] Pointer to opaque context which should + * be passed as the first parameter on each + * API call + * + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code + * + * + * \sa amdgpu_device_deinitialize() +*/ +int amdgpu_device_initialize(int fd, + uint32_t *major_version, + uint32_t *minor_version, + amdgpu_device_handle *device_handle); + + + +/** + * + * When access to such library does not needed any more the special + * function must be call giving opportunity to clean up any + * resources if needed. + * + * \param device_handle - \c [in] Context associated with file + * descriptor for AMD GPU device + * received previously as the + * result e.g. of drmOpen() call. + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code + * + * \sa amdgpu_device_initialize() + * +*/ +int amdgpu_device_deinitialize(amdgpu_device_handle device_handle); + + +/* + * Memory Management + * +*/ + +/** + * Allocate memory to be used by UMD for GPU related operations + * + * \param dev - \c [in] Device handle. + * See #amdgpu_device_initialize() + * \param alloc_buffer - \c [in] Pointer to the structure describing an + * allocation request + * \param info - \c [out] Pointer to structure which return + * information about allocated memory + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code + * + * \sa amdgpu_bo_free() +*/ +int amdgpu_bo_alloc(amdgpu_device_handle dev, + struct amdgpu_bo_alloc_request *alloc_buffer, + struct amdgpu_bo_alloc_result *info); + +/** + * Associate opaque data with buffer to be queried by another UMD + * + * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() + * \param buf_handle - \c [in] Buffer handle + * \param info - \c [in] Metadata to associated with buffer + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code +*/ +int amdgpu_bo_set_metadata(amdgpu_bo_handle buf_handle, + struct amdgpu_bo_metadata *info); + +/** + * Query buffer information including metadata previusly associated with + * buffer. + * + * \param dev - \c [in] Device handle. + * See #amdgpu_device_initialize() + * \param buf_handle - \c [in] Buffer handle + * \param info - \c [out] Structure describing buffer + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code + * + * \sa amdgpu_bo_set_metadata(), amdgpu_bo_alloc() +*/ +int amdgpu_bo_query_info(amdgpu_bo_handle buf_handle, + struct amdgpu_bo_info *info); + +/** + * Allow others to get access to buffer + * + * \param dev - \c [in] Device handle. + * See #amdgpu_device_initialize() + * \param buf_handle - \c [in] Buffer handle + * \param type - \c [in] Type of handle requested + * \param shared_handle - \c [out] Special "shared" handle + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code + * + * \sa amdgpu_bo_import() + * +*/ +int amdgpu_bo_export(amdgpu_bo_handle buf_handle, + enum amdgpu_bo_handle_type type, + uint32_t *shared_handle); + +/** + * Request access to "shared" buffer + * + * \param dev - \c [in] Device handle. + * See #amdgpu_device_initialize() + * \param type - \c [in] Type of handle requested + * \param shared_handle - \c [in] Shared handle received as result "import" + * operation + * \param output - \c [out] Pointer to structure with information + * about imported buffer + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code + * + * \note Buffer must be "imported" only using new "fd" (different from + * one used by "exporter"). + * + * \sa amdgpu_bo_export() + * +*/ +int amdgpu_bo_import(amdgpu_device_handle dev, + enum amdgpu_bo_handle_type type, + uint32_t shared_handle, + struct amdgpu_bo_import_result *output); + +/** + * Free previosuly allocated memory + * + * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() + * \param buf_handle - \c [in] Buffer handle to free + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code + * + * \note In the case of memory shared between different applications all + * resources will be “physically” freed only all such applications + * will be terminated + * \note If is UMD responsibility to ‘free’ buffer only when there is no + * more GPU access + * + * \sa amdgpu_bo_set_metadata(), amdgpu_bo_alloc() + * +*/ +int amdgpu_bo_free(amdgpu_bo_handle buf_handle); + +/** + * Request CPU access to GPU accessable memory + * + * \param buf_handle - \c [in] Buffer handle + * \param cpu - \c [out] CPU address to be used for access + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code + * + * \sa amdgpu_bo_cpu_unmap() + * +*/ +int amdgpu_bo_cpu_map(amdgpu_bo_handle buf_handle, void **cpu); + +/** + * Release CPU access to GPU memory + * + * \param buf_handle - \c [in] Buffer handle + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code + * + * \sa amdgpu_bo_cpu_map() + * +*/ +int amdgpu_bo_cpu_unmap(amdgpu_bo_handle buf_handle); + + +/** + * Wait until a buffer is not used by the device. + * + * \param dev - \c [in] Device handle. See #amdgpu_lib_initialize() + * \param buf_handle - \c [in] Buffer handle. + * \param timeout_ns - Timeout in nanoseconds. + * \param buffer_busy - 0 if buffer is idle, all GPU access was completed + * and no GPU access is scheduled. + * 1 GPU access is in fly or scheduled + * + * \return 0 - on success + * <0 - AMD specific error code + */ +int amdgpu_bo_wait_for_idle(amdgpu_bo_handle buf_handle, + uint64_t timeout_ns, + bool *buffer_busy); + + +/* + * Special GPU Resources + * +*/ + + + +/** + * Query information about GDS + * + * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() + * \param gds_info - \c [out] Pointer to structure to get GDS information + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code + * +*/ +int amdgpu_gpu_resource_query_gds_info(amdgpu_device_handle dev, + struct amdgpu_gds_resource_info * + gds_info); + + +/** + * Allocate GDS partitions + * + * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() + * \param gds_size - \c [in] Size of gds allocation. Must be aligned + * accordingly. + * \param alloc_info - \c [out] Pointer to structure to receive information + * about allocation + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code + * + * +*/ +int amdgpu_gpu_resource_gds_alloc(amdgpu_device_handle dev, + uint32_t gds_size, + struct amdgpu_gds_alloc_info *alloc_info); + + + + +/** + * Release GDS resource. When GDS and associated resources not needed any + * more UMD should free them + * + * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() + * \param handle - \c [in] Handle assigned to GDS allocation + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code + * +*/ +int amdgpu_gpu_resource_gds_free(amdgpu_bo_handle handle); + + + +/* + * GPU Execution context + * +*/ + +/** + * Create GPU execution Context + * + * For the purpose of GPU Scheduler and GPU Robustness extensions it is + * necessary to have information/identify rendering/compute contexts. + * It also may be needed to associate some specific requirements with such + * contexts. Kernel driver will guarantee that submission from the same + * context will always be executed in order (first come, first serve). + * + * + * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() + * \param context - \c [out] GPU Context handle + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code + * + * \sa amdgpu_cs_ctx_free() + * +*/ +int amdgpu_cs_ctx_create(amdgpu_device_handle dev, + amdgpu_context_handle *context); + +/** + * + * Destroy GPU execution context when not needed any more + * + * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() + * \param context - \c [in] GPU Context handle + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code + * + * \sa amdgpu_cs_ctx_create() + * +*/ +int amdgpu_cs_ctx_free(amdgpu_device_handle dev, + amdgpu_context_handle context); + +/** + * Query reset state for the specific GPU Context + * + * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() + * \param context - \c [in] GPU Context handle + * \param state - \c [out] Reset state status + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code + * + * \sa amdgpu_cs_ctx_create() + * +*/ +int amdgpu_cs_query_reset_state(amdgpu_device_handle dev, + amdgpu_context_handle context, + enum amdgpu_cs_ctx_reset_state *state); + + +/* + * Command Buffers Management + * +*/ + + +/** + * Allocate memory to be filled with PM4 packets and be served as the first + * entry point of execution (a.k.a. Indirect Buffer) + * + * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() + * \param context - \c [in] GPU Context which will use IB + * \param ib_size - \c [in] Size of allocation + * \param output - \c [out] Pointer to structure to get information about + * allocated IB + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code + * + * \sa amdgpu_cs_free_ib() + * +*/ +int amdgpu_cs_alloc_ib(amdgpu_device_handle dev, + amdgpu_context_handle context, + enum amdgpu_cs_ib_size ib_size, + struct amdgpu_cs_ib_alloc_result *output); + +/** + * If UMD has allocates IBs which doesn’t need any more than those IBs must + * be explicitly freed + * + * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() + * \param context - \c [in] GPU Context containing IB + * \param handle - \c [in] IB handle + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code + * + * \note Libdrm_amdgpu will guarantee that it will correctly detect when it + * is safe to return IB to free pool + * + * \sa amdgpu_cs_alloc_ib() + * +*/ +int amdgpu_cs_free_ib(amdgpu_device_handle dev, + amdgpu_context_handle context, + amdgpu_ib_handle handle); + +/** + * Send request to submit command buffers to hardware. + * + * Kernel driver could use GPU Scheduler to make decision when physically + * sent this request to the hardware. Accordingly this request could be put + * in queue and sent for execution later. The only guarantee is that request + * from the same GPU context to the same ip:ip_instance:ring will be executed in + * order. + * + * + * \param dev - \c [in] Device handle. + * See #amdgpu_device_initialize() + * \param context - \c [in] GPU Context + * \param flags - \c [in] Global submission flags + * \param ibs_request - \c [in] Pointer to submission requests. + * We could submit to the several + * engines/rings simulteniously as + * 'atomic' operation + * \param number_of_requests - \c [in] Number of submission requests + * \param fences - \c [out] Pointer to array of data to get + * fences to identify submission + * requests. Timestamps are valid + * in this GPU context and could be used + * to identify/detect completion of + * submission request + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code + * + * \note It is assumed that by default IB will be returned to free pool + * automatically by libdrm_amdgpu when submission will completed. + * It is possible for UMD to make decision to re-use the same IB in + * this case it should be explicitly freed.\n + * Accordingly, by default, after submission UMD should not touch passed + * IBs. If UMD needs to re-use IB then the special flag AMDGPU_CS_REUSE_IB + * must be passed. + * + * \note It is required to pass correct resource list with buffer handles + * which will be accessible by command buffers from submission + * This will allow kernel driver to correctly implement "paging". + * Failure to do so will have unpredictable results. + * + * \sa amdgpu_command_buffer_alloc(), amdgpu_command_buffer_free(), + * amdgpu_cs_query_fence_status() + * +*/ +int amdgpu_cs_submit(amdgpu_device_handle dev, + amdgpu_context_handle context, + uint64_t flags, + struct amdgpu_cs_request *ibs_request, + uint32_t number_of_requests, + uint64_t *fences); + +/** + * Query status of Command Buffer Submission + * + * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() + * \param fence - \c [in] Structure describing fence to query + * \param expired - \c [out] If fence expired or not.\n + * 0 – if fence is not expired\n + * !0 - otherwise + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code + * + * \note If UMD wants only to check operation status and returned immediately + * then timeout value as 0 must be passed. In this case success will be + * returned in the case if submission was completed or timeout error + * code. + * + * \sa amdgpu_cs_submit() +*/ +int amdgpu_cs_query_fence_status(amdgpu_device_handle dev, + struct amdgpu_cs_query_fence *fence, + uint32_t *expired); + + +/* + * Query / Info API + * +*/ + + +/** + * Query allocation size alignments + * + * UMD should query information about GPU VM MC size alignments requirements + * to be able correctly choose required allocation size and implement + * internal optimization if needed. + * + * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() + * \param info - \c [out] Pointer to structure to get size alignment + * requirements + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code + * +*/ +int amdgpu_query_buffer_size_alignment(amdgpu_device_handle dev, + struct amdgpu_buffer_size_alignments + *info); + + + +/** + * Query firmware versions + * + * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() + * \param fw_type - \c [in] AMDGPU_INFO_FW_* + * \param ip_instance - \c [in] Index of the IP block of the same type. + * \param index - \c [in] Index of the engine. (for SDMA and MEC) + * \param version - \c [out] Pointer to to the "version" return value + * \param feature - \c [out] Pointer to to the "feature" return value + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code + * +*/ +int amdgpu_query_firmware_version(amdgpu_device_handle dev, unsigned fw_type, + unsigned ip_instance, unsigned index, + uint32_t *version, uint32_t *feature); + + + +/** + * Query the number of HW IP instances of a certain type. + * + * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() + * \param type - \c [in] Hardware IP block type = AMDGPU_HW_IP_* + * \param count - \c [out] Pointer to structure to get information + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code +*/ +int amdgpu_query_hw_ip_count(amdgpu_device_handle dev, unsigned type, + uint32_t *count); + + + +/** + * Query engine information + * + * This query allows UMD to query information different engines and their + * capabilities. + * + * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() + * \param type - \c [in] Hardware IP block type = AMDGPU_HW_IP_* + * \param ip_instance - \c [in] Index of the IP block of the same type. + * \param info - \c [out] Pointer to structure to get information + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code +*/ +int amdgpu_query_hw_ip_info(amdgpu_device_handle dev, unsigned type, + unsigned ip_instance, + struct drm_amdgpu_info_hw_ip *info); + + + + +/** + * Query heap information + * + * This query allows UMD to query potentially available memory resources and + * adjust their logic if necessary. + * + * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() + * \param heap - \c [in] Heap type + * \param info - \c [in] Pointer to structure to get needed information + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code + * +*/ +int amdgpu_query_heap_info(amdgpu_device_handle dev, + uint32_t heap, + uint32_t flags, + struct amdgpu_heap_info *info); + + + +/** + * Get the CRTC ID from the mode object ID + * + * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() + * \param id - \c [in] Mode object ID + * \param result - \c [in] Pointer to the CRTC ID + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code + * +*/ +int amdgpu_query_crtc_from_id(amdgpu_device_handle dev, unsigned id, + int32_t *result); + + + +/** + * Query GPU H/w Info + * + * Query hardware specific information + * + * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() + * \param heap - \c [in] Heap type + * \param info - \c [in] Pointer to structure to get needed information + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX Error code + * +*/ +int amdgpu_query_gpu_info(amdgpu_device_handle dev, + struct amdgpu_gpu_info *info); + + + +/** + * Query hardware or driver information. + * + * The return size is query-specific and depends on the "info_id" parameter. + * No more than "size" bytes is returned. + * + * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() + * \param info_id - \c [in] AMDGPU_INFO_* + * \param size - \c [in] Size of the returned value. + * \param value - \c [out] Pointer to the return value. + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX error code + * +*/ +int amdgpu_query_info(amdgpu_device_handle dev, unsigned info_id, + unsigned size, void *value); + + + +/** + * Read a set of consecutive memory-mapped registers. + * Not all registers are allowed to be read by userspace. + * + * \param dev - \c [in] Device handle. See #amdgpu_device_initialize( + * \param dword_offset - \c [in] Register offset in dwords + * \param count - \c [in] The number of registers to read starting + * from the offset + * \param instance - \c [in] GRBM_GFX_INDEX selector. It may have other + * uses. Set it to 0xffffffff if unsure. + * \param flags - \c [in] Flags with additional information. + * \param values - \c [out] The pointer to return values. + * + * \return 0 on success\n + * >0 - AMD specific error code\n + * <0 - Negative POSIX error code + * +*/ +int amdgpu_read_mm_registers(amdgpu_device_handle dev, unsigned dword_offset, + unsigned count, uint32_t instance, uint32_t flags, + uint32_t *values); + + + +/** + * Request GPU access to user allocated memory e.g. via "malloc" + * + * \param dev - [in] Device handle. See #amdgpu_device_initialize() + * \param cpu - [in] CPU address of user allocated memory which we + * want to map to GPU address space (make GPU accessible) + * (This address must be correctly aligned). + * \param size - [in] Size of allocation (must be correctly aligned) + * \param amdgpu_bo_alloc_result - [out] Handle of allocation to be passed as resource + * on submission and be used in other operations.(e.g. for VA submission) + * ( Temporally defined amdgpu_bo_alloc_result as parameter for return mc address. ) + * + * + * \return 0 on success + * >0 - AMD specific error code + * <0 - Negative POSIX Error code + * + * + * \note + * This call doesn't guarantee that such memory will be persistently + * "locked" / make non-pageable. The purpose of this call is to provide + * opportunity for GPU get access to this resource during submission. + * + * The maximum amount of memory which could be mapped in this call depends + * if overcommit is disabled or not. If overcommit is disabled than the max. + * amount of memory to be pinned will be limited by left "free" size in total + * amount of memory which could be locked simultaneously ("GART" size). + * + * Supported (theoretical) max. size of mapping is restricted only by + * "GART" size. + * + * It is responsibility of caller to correctly specify access rights + * on VA assignment. +*/ +int amdgpu_create_bo_from_user_mem(amdgpu_device_handle dev, + void *cpu, + uint64_t size, + struct amdgpu_bo_alloc_result *info); + + +#endif /* #ifdef _AMDGPU_H_ */ diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c new file mode 100644 index 0000000..39641ce --- /dev/null +++ b/amdgpu/amdgpu_bo.c @@ -0,0 +1,626 @@ +/* + * Copyright © 2014 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/time.h> + +#include "libdrm.h" +#include "xf86drm.h" +#include "amdgpu_drm.h" +#include "amdgpu_internal.h" +#include "util_hash_table.h" + +static void amdgpu_close_kms_handle(amdgpu_device_handle dev, + uint32_t handle) +{ + struct drm_gem_close args = {}; + + args.handle = handle; + drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &args); +} + +void amdgpu_bo_free_internal(amdgpu_bo_handle bo) +{ + /* Remove the buffer from the hash tables. */ + pthread_mutex_lock(&bo->dev->bo_table_mutex); + util_hash_table_remove(bo->dev->bo_handles, + (void*)(uintptr_t)bo->handle); + if (bo->flink_name) { + util_hash_table_remove(bo->dev->bo_flink_names, + (void*)(uintptr_t)bo->flink_name); + } + pthread_mutex_unlock(&bo->dev->bo_table_mutex); + + /* Release CPU access. */ + if (bo->cpu_map_count > 0) { + bo->cpu_map_count = 1; + amdgpu_bo_cpu_unmap(bo); + } + + amdgpu_close_kms_handle(bo->dev, bo->handle); + pthread_mutex_destroy(&bo->cpu_access_mutex); + amdgpu_vamgr_free_va(&bo->dev->vamgr, bo->virtual_mc_base_address, bo->alloc_size); + free(bo); +} + +int amdgpu_bo_alloc(amdgpu_device_handle dev, + struct amdgpu_bo_alloc_request *alloc_buffer, + struct amdgpu_bo_alloc_result *info) +{ + struct amdgpu_bo *bo; + union drm_amdgpu_gem_create args; + unsigned heap = alloc_buffer->preferred_heap; + int r = 0; + + /* It's an error if the heap is not specified */ + if (!(heap & (AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM))) + return -EINVAL; + + bo = calloc(1, sizeof(struct amdgpu_bo)); + if (!bo) + return -ENOMEM; + + atomic_set(&bo->refcount, 1); + bo->dev = dev; + bo->alloc_size = alloc_buffer->alloc_size; + + memset(&args, 0, sizeof(args)); + args.in.bo_size = alloc_buffer->alloc_size; + args.in.alignment = alloc_buffer->phys_alignment; + + /* Set the placement. */ + args.in.domains = heap & AMDGPU_GEM_DOMAIN_MASK; + args.in.domain_flags = alloc_buffer->flags & AMDGPU_GEM_CREATE_CPU_GTT_MASK; + + /* Allocate the buffer with the preferred heap. */ + r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_GEM_CREATE, + &args, sizeof(args)); + if (r) { + free(bo); + return r; + } + + bo->handle = args.out.handle; + + pthread_mutex_init(&bo->cpu_access_mutex, NULL); + + /* map the buffer to the GPU virtual address space */ + { + union drm_amdgpu_gem_va va; + + memset(&va, 0, sizeof(va)); + + bo->virtual_mc_base_address = amdgpu_vamgr_find_va(&dev->vamgr, alloc_buffer->alloc_size, alloc_buffer->phys_alignment); + + va.in.handle = bo->handle; + va.in.operation = AMDGPU_VA_OP_MAP; + va.in.flags = AMDGPU_VM_PAGE_READABLE | + AMDGPU_VM_PAGE_WRITEABLE | + AMDGPU_VM_PAGE_EXECUTABLE; + va.in.va_address = bo->virtual_mc_base_address; + + r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_GEM_VA, &va, sizeof(va)); + if (r || va.out.result == AMDGPU_VA_RESULT_ERROR) { + amdgpu_bo_free_internal(bo); + return r; + } + pthread_mutex_lock(&dev->bo_table_mutex); + + util_hash_table_set(dev->bo_vas, + (void*)(uintptr_t)bo->virtual_mc_base_address, bo); + pthread_mutex_unlock(&dev->bo_table_mutex); + } + + info->buf_handle = bo; + info->virtual_mc_base_address = bo->virtual_mc_base_address; + return 0; +} + +int amdgpu_bo_set_metadata(amdgpu_bo_handle bo, + struct amdgpu_bo_metadata *info) +{ + struct drm_amdgpu_gem_metadata args = {}; + + args.handle = bo->handle; + args.op = AMDGPU_GEM_METADATA_OP_SET_METADATA; + args.data.flags = info->flags; + args.data.tiling_info = info->tiling_info; + + if (info->size_metadata > sizeof(args.data.data)) + return -EINVAL; + + if (info->size_metadata) { + args.data.data_size_bytes = info->size_metadata; + memcpy(args.data.data, info->umd_metadata, info->size_metadata); + } + + return drmCommandWriteRead(bo->dev->fd, + DRM_AMDGPU_GEM_METADATA, + &args, sizeof(args)); +} + +int amdgpu_bo_query_info(amdgpu_bo_handle bo, + struct amdgpu_bo_info *info) +{ + struct drm_amdgpu_gem_metadata metadata = {}; + struct drm_amdgpu_gem_create_in bo_info = {}; + struct drm_amdgpu_gem_op gem_op = {}; + int r; + + /* Query metadata. */ + metadata.handle = bo->handle; + metadata.op = AMDGPU_GEM_METADATA_OP_GET_METADATA; + + r = drmCommandWriteRead(bo->dev->fd, DRM_AMDGPU_GEM_METADATA, + &metadata, sizeof(metadata)); + if (r) + return r; + + if (metadata.data.data_size_bytes > + sizeof(info->metadata.umd_metadata)) + return -EINVAL; + + /* Query buffer info. */ + gem_op.handle = bo->handle; + gem_op.op = AMDGPU_GEM_OP_GET_GEM_CREATE_INFO; + gem_op.value = (intptr_t)&bo_info; + + r = drmCommandWriteRead(bo->dev->fd, DRM_AMDGPU_GEM_OP, + &gem_op, sizeof(gem_op)); + if (r) + return r; + + memset(info, 0, sizeof(*info)); + info->alloc_size = bo_info.bo_size; + info->phys_alignment = bo_info.alignment; + info->virtual_mc_base_address = bo->virtual_mc_base_address; + info->preferred_heap = bo_info.domains; + info->alloc_flags = bo_info.domain_flags; + info->metadata.flags = metadata.data.flags; + info->metadata.tiling_info = metadata.data.tiling_info; + + info->metadata.size_metadata = metadata.data.data_size_bytes; + if (metadata.data.data_size_bytes > 0) + memcpy(info->metadata.umd_metadata, metadata.data.data, + metadata.data.data_size_bytes); + + return 0; +} + +static void amdgpu_add_handle_to_table(amdgpu_bo_handle bo) +{ + pthread_mutex_lock(&bo->dev->bo_table_mutex); + util_hash_table_set(bo->dev->bo_handles, + (void*)(uintptr_t)bo->handle, bo); + pthread_mutex_unlock(&bo->dev->bo_table_mutex); +} + +static int amdgpu_bo_export_flink(amdgpu_bo_handle bo) +{ + struct drm_gem_flink flink; + int fd, dma_fd; + uint32_t handle; + int r; + + fd = bo->dev->fd; + handle = bo->handle; + if (bo->flink_name) + return 0; + + + if (bo->dev->flink_fd != bo->dev->fd) { + r = drmPrimeHandleToFD(bo->dev->fd, bo->handle, DRM_CLOEXEC, + &dma_fd); + if (!r) { + r = drmPrimeFDToHandle(bo->dev->flink_fd, dma_fd, &handle); + close(dma_fd); + } + if (r) + return r; + fd = bo->dev->flink_fd; + } + memset(&flink, 0, sizeof(flink)); + flink.handle = handle; + + r = drmIoctl(fd, DRM_IOCTL_GEM_FLINK, &flink); + if (r) + return r; + + bo->flink_name = flink.name; + + if (bo->dev->flink_fd != bo->dev->fd) { + struct drm_gem_close args = {}; + args.handle = handle; + drmIoctl(bo->dev->flink_fd, DRM_IOCTL_GEM_CLOSE, &args); + } + + pthread_mutex_lock(&bo->dev->bo_table_mutex); + util_hash_table_set(bo->dev->bo_flink_names, + (void*)(uintptr_t)bo->flink_name, + bo); + pthread_mutex_unlock(&bo->dev->bo_table_mutex); + + return 0; +} + +int amdgpu_bo_export(amdgpu_bo_handle bo, + enum amdgpu_bo_handle_type type, + uint32_t *shared_handle) +{ + int r; + + switch (type) { + case amdgpu_bo_handle_type_gem_flink_name: + r = amdgpu_bo_export_flink(bo); + if (r) + return r; + + *shared_handle = bo->flink_name; + return 0; + + case amdgpu_bo_handle_type_kms: + r = amdgpu_bo_export_flink(bo); + if (r) + return r; + + amdgpu_add_handle_to_table(bo); + *shared_handle = bo->handle; + return 0; + + case amdgpu_bo_handle_type_dma_buf_fd: + amdgpu_add_handle_to_table(bo); + return drmPrimeHandleToFD(bo->dev->fd, bo->handle, DRM_CLOEXEC, + (int*)shared_handle); + } + return -EINVAL; +} + +int amdgpu_bo_import(amdgpu_device_handle dev, + enum amdgpu_bo_handle_type type, + uint32_t shared_handle, + struct amdgpu_bo_import_result *output) +{ + struct drm_gem_open open_arg = {}; + union drm_amdgpu_gem_va va; + struct amdgpu_bo *bo = NULL; + int r; + int dma_fd; + uint64_t dma_buf_size = 0; + + /* Convert a DMA buf handle to a KMS handle now. */ + if (type == amdgpu_bo_handle_type_dma_buf_fd) { + uint32_t handle; + off_t size; + + /* Get a KMS handle. */ + r = drmPrimeFDToHandle(dev->fd, shared_handle, &handle); + if (r) { + return r; + } + + /* Query the buffer size. */ + size = lseek(shared_handle, 0, SEEK_END); + if (size == (off_t)-1) { + amdgpu_close_kms_handle(dev, handle); + return -errno; + } + lseek(shared_handle, 0, SEEK_SET); + + dma_buf_size = size; + shared_handle = handle; + } + + /* We must maintain a list of pairs <handle, bo>, so that we always + * return the same amdgpu_bo instance for the same handle. */ + pthread_mutex_lock(&dev->bo_table_mutex); + + /* If we have already created a buffer with this handle, find it. */ + switch (type) { + case amdgpu_bo_handle_type_gem_flink_name: + bo = util_hash_table_get(dev->bo_flink_names, + (void*)(uintptr_t)shared_handle); + break; + + case amdgpu_bo_handle_type_dma_buf_fd: + bo = util_hash_table_get(dev->bo_handles, + (void*)(uintptr_t)shared_handle); + break; + + case amdgpu_bo_handle_type_kms: + /* Importing a KMS handle in not allowed. */ + pthread_mutex_unlock(&dev->bo_table_mutex); + return -EPERM; + + default: + pthread_mutex_unlock(&dev->bo_table_mutex); + return -EINVAL; + } + + if (bo) { + pthread_mutex_unlock(&dev->bo_table_mutex); + + /* The buffer already exists, just bump the refcount. */ + atomic_inc(&bo->refcount); + + output->buf_handle = bo; + output->alloc_size = bo->alloc_size; + output->virtual_mc_base_address = + bo->virtual_mc_base_address; + return 0; + } + + bo = calloc(1, sizeof(struct amdgpu_bo)); + if (!bo) { + pthread_mutex_unlock(&dev->bo_table_mutex); + if (type == amdgpu_bo_handle_type_dma_buf_fd) { + amdgpu_close_kms_handle(dev, shared_handle); + } + return -ENOMEM; + } + + /* Open the handle. */ + switch (type) { + case amdgpu_bo_handle_type_gem_flink_name: + open_arg.name = shared_handle; + r = drmIoctl(dev->flink_fd, DRM_IOCTL_GEM_OPEN, &open_arg); + if (r) { + free(bo); + pthread_mutex_unlock(&dev->bo_table_mutex); + return r; + } + + bo->handle = open_arg.handle; + if (dev->flink_fd != dev->fd) { + r = drmPrimeHandleToFD(dev->flink_fd, bo->handle, DRM_CLOEXEC, &dma_fd); + if (r) { + free(bo); + pthread_mutex_unlock(&dev->bo_table_mutex); + return r; + } + r = drmPrimeFDToHandle(dev->fd, dma_fd, &bo->handle ); + + close(dma_fd); + + if (r) { + free(bo); + pthread_mutex_unlock(&dev->bo_table_mutex); + return r; + } + } + bo->flink_name = shared_handle; + bo->alloc_size = open_arg.size; + util_hash_table_set(dev->bo_flink_names, + (void*)(uintptr_t)bo->flink_name, bo); + break; + + case amdgpu_bo_handle_type_dma_buf_fd: + bo->handle = shared_handle; + bo->alloc_size = dma_buf_size; + break; + + case amdgpu_bo_handle_type_kms: + assert(0); /* unreachable */ + } + + /* Initialize it. */ + atomic_set(&bo->refcount, 1); + bo->dev = dev; + pthread_mutex_init(&bo->cpu_access_mutex, NULL); + + bo->virtual_mc_base_address = amdgpu_vamgr_find_va(&dev->vamgr, bo->alloc_size, 1 << 20); + + memset(&va, 0, sizeof(va)); + va.in.handle = bo->handle; + va.in.operation = AMDGPU_VA_OP_MAP; + va.in.va_address = bo->virtual_mc_base_address; + va.in.flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | + AMDGPU_VM_PAGE_EXECUTABLE; + + r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_GEM_VA, &va, sizeof(va)); + if (r || va.out.result == AMDGPU_VA_RESULT_ERROR) { + pthread_mutex_unlock(&dev->bo_table_mutex); + amdgpu_vamgr_free_va(&dev->vamgr, bo->virtual_mc_base_address, bo->alloc_size); + amdgpu_bo_reference(&bo, NULL); + return r; + } + + util_hash_table_set(dev->bo_vas, + (void*)(uintptr_t)bo->virtual_mc_base_address, bo); + util_hash_table_set(dev->bo_handles, (void*)(uintptr_t)bo->handle, bo); + pthread_mutex_unlock(&dev->bo_table_mutex); + + output->buf_handle = bo; + output->alloc_size = bo->alloc_size; + output->virtual_mc_base_address = bo->virtual_mc_base_address; + return 0; +} + +int amdgpu_bo_free(amdgpu_bo_handle buf_handle) +{ + /* Just drop the reference. */ + amdgpu_bo_reference(&buf_handle, NULL); + return 0; +} + +int amdgpu_bo_cpu_map(amdgpu_bo_handle bo, void **cpu) +{ + union drm_amdgpu_gem_mmap args; + void *ptr; + int r; + + pthread_mutex_lock(&bo->cpu_access_mutex); + + if (bo->cpu_ptr) { + /* already mapped */ + assert(bo->cpu_map_count > 0); + bo->cpu_map_count++; + *cpu = bo->cpu_ptr; + pthread_mutex_unlock(&bo->cpu_access_mutex); + return 0; + } + + assert(bo->cpu_map_count == 0); + + memset(&args, 0, sizeof(args)); + + /* Query the buffer address (args.addr_ptr). + * The kernel driver ignores the offset and size parameters. */ + args.in.handle = bo->handle; + + r = drmCommandWriteRead(bo->dev->fd, DRM_AMDGPU_GEM_MMAP, &args, + sizeof(args)); + if (r) { + pthread_mutex_unlock(&bo->cpu_access_mutex); + return r; + } + + /* Map the buffer. */ + ptr = drm_mmap(NULL, bo->alloc_size, PROT_READ | PROT_WRITE, MAP_SHARED, + bo->dev->fd, args.out.addr_ptr); + if (ptr == MAP_FAILED) { + pthread_mutex_unlock(&bo->cpu_access_mutex); + return -errno; + } + + bo->cpu_ptr = ptr; + bo->cpu_map_count = 1; + pthread_mutex_unlock(&bo->cpu_access_mutex); + + *cpu = ptr; + return 0; +} + +int amdgpu_bo_cpu_unmap(amdgpu_bo_handle bo) +{ + int r; + + pthread_mutex_lock(&bo->cpu_access_mutex); + assert(bo->cpu_map_count >= 0); + + if (bo->cpu_map_count == 0) { + /* not mapped */ + pthread_mutex_unlock(&bo->cpu_access_mutex); + return -EBADMSG; + } + + bo->cpu_map_count--; + if (bo->cpu_map_count > 0) { + /* mapped multiple times */ + pthread_mutex_unlock(&bo->cpu_access_mutex); + return 0; + } + + r = drm_munmap(bo->cpu_ptr, bo->alloc_size) == 0 ? 0 : -errno; + bo->cpu_ptr = NULL; + pthread_mutex_unlock(&bo->cpu_access_mutex); + return r; +} + +int amdgpu_query_buffer_size_alignment(amdgpu_device_handle dev, + struct amdgpu_buffer_size_alignments *info) +{ + info->size_local = dev->dev_info.pte_fragment_size; + info->size_remote = dev->dev_info.gart_page_size; + return 0; +} + +int amdgpu_bo_wait_for_idle(amdgpu_bo_handle bo, + uint64_t timeout_ns, + bool *busy) +{ + union drm_amdgpu_gem_wait_idle args; + int r; + + memset(&args, 0, sizeof(args)); + args.in.handle = bo->handle; + args.in.timeout = amdgpu_cs_calculate_timeout(timeout_ns); + + r = drmCommandWriteRead(bo->dev->fd, DRM_AMDGPU_GEM_WAIT_IDLE, + &args, sizeof(args)); + + if (r == 0) { + *busy = args.out.status; + return 0; + } else { + fprintf(stderr, "amdgpu: GEM_WAIT_IDLE failed with %i\n", r); + return r; + } +} + +int amdgpu_create_bo_from_user_mem(amdgpu_device_handle dev, + void *cpu, + uint64_t size, + struct amdgpu_bo_alloc_result *info) +{ + int r; + struct amdgpu_bo *bo; + struct drm_amdgpu_gem_userptr args; + union drm_amdgpu_gem_va va; + + memset(&args, 0, sizeof(args)); + args.addr = (uint64_t)cpu; + args.flags = AMDGPU_GEM_USERPTR_ANONONLY | AMDGPU_GEM_USERPTR_REGISTER; + args.size = size; + r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_GEM_USERPTR, + &args, sizeof(args)); + if (r) + return r; + + bo = calloc(1, sizeof(struct amdgpu_bo)); + if (!bo) + return -ENOMEM; + + atomic_set(&bo->refcount, 1); + bo->dev = dev; + bo->alloc_size = size; + bo->handle = args.handle; + bo->virtual_mc_base_address = amdgpu_vamgr_find_va(&dev->vamgr, size, 4 * 1024); + + memset(&va, 0, sizeof(va)); + va.in.handle = bo->handle; + va.in.operation = AMDGPU_VA_OP_MAP; + va.in.flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | + AMDGPU_VM_PAGE_EXECUTABLE; + va.in.va_address = bo->virtual_mc_base_address; + r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_GEM_VA, &va, sizeof(va)); + if (r || va.out.result == AMDGPU_VA_RESULT_ERROR) { + amdgpu_bo_free_internal(bo); + return r; + } + util_hash_table_set(dev->bo_vas, + (void*)(uintptr_t)bo->virtual_mc_base_address, bo); + info->buf_handle = bo; + info->virtual_mc_base_address = bo->virtual_mc_base_address; + return r; +} diff --git a/amdgpu/amdgpu_cs.c b/amdgpu/amdgpu_cs.c new file mode 100644 index 0000000..614904d --- /dev/null +++ b/amdgpu/amdgpu_cs.c @@ -0,0 +1,981 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * +*/ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <pthread.h> +#include <sched.h> +#include <sys/ioctl.h> + +#include "xf86drm.h" +#include "amdgpu_drm.h" +#include "amdgpu_internal.h" + +/** + * Create an IB buffer. + * + * \param dev - \c [in] Device handle + * \param context - \c [in] GPU Context + * \param ib_size - \c [in] Size of allocation + * \param ib - \c [out] return the pointer to the created IB buffer + * + * \return 0 on success otherwise POSIX Error code +*/ +static int amdgpu_cs_create_ib(amdgpu_device_handle dev, + amdgpu_context_handle context, + enum amdgpu_cs_ib_size ib_size, + amdgpu_ib_handle *ib) +{ + struct amdgpu_bo_alloc_request alloc_buffer; + struct amdgpu_bo_alloc_result info; + int r; + void *cpu; + struct amdgpu_ib *new_ib; + + memset(&alloc_buffer, 0, sizeof(alloc_buffer)); + + switch (ib_size) { + case amdgpu_cs_ib_size_4K: + alloc_buffer.alloc_size = 4 * 1024; + break; + case amdgpu_cs_ib_size_16K: + alloc_buffer.alloc_size = 16 * 1024; + break; + case amdgpu_cs_ib_size_32K: + alloc_buffer.alloc_size = 32 * 1024; + break; + case amdgpu_cs_ib_size_64K: + alloc_buffer.alloc_size = 64 * 1024; + break; + case amdgpu_cs_ib_size_128K: + alloc_buffer.alloc_size = 128 * 1024; + break; + default: + return -EINVAL; + } + + alloc_buffer.phys_alignment = 4 * 1024; + + alloc_buffer.preferred_heap = AMDGPU_GEM_DOMAIN_GTT; + + r = amdgpu_bo_alloc(dev, + &alloc_buffer, + &info); + if (r) + return r; + + r = amdgpu_bo_cpu_map(info.buf_handle, &cpu); + if (r) { + amdgpu_bo_free(info.buf_handle); + return r; + } + + new_ib = malloc(sizeof(struct amdgpu_ib)); + if (NULL == new_ib) { + amdgpu_bo_cpu_unmap(info.buf_handle); + amdgpu_bo_free(info.buf_handle); + return -ENOMEM; + } + + new_ib->buf_handle = info.buf_handle; + new_ib->cpu = cpu; + new_ib->virtual_mc_base_address = info.virtual_mc_base_address; + new_ib->ib_size = ib_size; + *ib = new_ib; + return 0; +} + +/** + * Destroy an IB buffer. + * + * \param dev - \c [in] Device handle + * \param ib - \c [in] the IB buffer + * + * \return 0 on success otherwise POSIX Error code +*/ +static int amdgpu_cs_destroy_ib(amdgpu_device_handle dev, + amdgpu_ib_handle ib) +{ + int r; + r = amdgpu_bo_cpu_unmap(ib->buf_handle); + if (r) + return r; + + r = amdgpu_bo_free(ib->buf_handle); + if (r) + return r; + + free(ib); + return 0; +} + +/** + * Initialize IB pools to empty. + * + * \param context - \c [in] GPU Context + * + * \return 0 on success otherwise POSIX Error code +*/ +static int amdgpu_cs_init_ib_pool(amdgpu_context_handle context) +{ + int i; + int r; + + r = pthread_mutex_init(&context->pool_mutex, NULL); + if (r) + return r; + + for (i = 0; i < AMDGPU_CS_IB_SIZE_NUM; i++) + LIST_INITHEAD(&context->ib_pools[i]); + + return 0; +} + +/** + * Allocate an IB buffer from IB pools. + * + * \param dev - \c [in] Device handle + * \param context - \c [in] GPU Context + * \param ib_size - \c [in] Size of allocation + * \param ib - \c [out] return the pointer to the allocated IB buffer + * + * \return 0 on success otherwise POSIX Error code +*/ +static int amdgpu_cs_alloc_from_ib_pool(amdgpu_device_handle dev, + amdgpu_context_handle context, + enum amdgpu_cs_ib_size ib_size, + amdgpu_ib_handle *ib) +{ + int r; + struct list_head *head; + head = &context->ib_pools[ib_size]; + + r = -ENOMEM; + pthread_mutex_lock(&context->pool_mutex); + if (!LIST_IS_EMPTY(head)) { + *ib = LIST_ENTRY(struct amdgpu_ib, head->next, list_node); + LIST_DEL(&(*ib)->list_node); + r = 0; + } + pthread_mutex_unlock(&context->pool_mutex); + + return r; +} + +/** + * Free an IB buffer to IB pools. + * + * \param context - \c [in] GPU Context + * \param ib - \c [in] the IB buffer + * + * \return N/A +*/ +static void amdgpu_cs_free_to_ib_pool(amdgpu_context_handle context, + amdgpu_ib_handle ib) +{ + struct list_head *head; + head = &context->ib_pools[ib->ib_size]; + pthread_mutex_lock(&context->pool_mutex); + LIST_ADD(&ib->list_node, head); + pthread_mutex_unlock(&context->pool_mutex); + return; +} + +/** + * Destroy all IB buffers in pools + * + * \param dev - \c [in] Device handle + * \param context - \c [in] GPU Context + * + * \return 0 on success otherwise POSIX Error code +*/ +static int amdgpu_cs_destroy_ib_pool(amdgpu_device_handle dev, + amdgpu_context_handle context) +{ + int i; + int r; + struct list_head *head; + struct amdgpu_ib *next; + struct amdgpu_ib *storage; + + r = 0; + pthread_mutex_lock(&context->pool_mutex); + for (i = 0; i < AMDGPU_CS_IB_SIZE_NUM; i++) { + head = &context->ib_pools[i]; + LIST_FOR_EACH_ENTRY_SAFE(next, storage, head, list_node) { + r = amdgpu_cs_destroy_ib(dev, next); + if (r) + break; + } + } + pthread_mutex_unlock(&context->pool_mutex); + pthread_mutex_destroy(&context->pool_mutex); + return r; +} + +/** + * Initialize pending IB lists + * + * \param context - \c [in] GPU Context + * + * \return 0 on success otherwise POSIX Error code +*/ +static int amdgpu_cs_init_pendings(amdgpu_context_handle context) +{ + unsigned ip, inst; + uint32_t ring; + int r; + + r = pthread_mutex_init(&context->pendings_mutex, NULL); + if (r) + return r; + + for (ip = 0; ip < AMDGPU_HW_IP_NUM; ip++) + for (inst = 0; inst < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; inst++) + for (ring = 0; ring < AMDGPU_CS_MAX_RINGS; ring++) + LIST_INITHEAD(&context->pendings[ip][inst][ring]); + + LIST_INITHEAD(&context->freed); + return 0; +} + +/** + * Free pending IBs + * + * \param dev - \c [in] Device handle + * \param context - \c [in] GPU Context + * + * \return 0 on success otherwise POSIX Error code +*/ +static int amdgpu_cs_destroy_pendings(amdgpu_device_handle dev, + amdgpu_context_handle context) +{ + int ip, inst; + uint32_t ring; + int r; + struct amdgpu_ib *next; + struct amdgpu_ib *s; + struct list_head *head; + + r = 0; + pthread_mutex_lock(&context->pendings_mutex); + for (ip = 0; ip < AMDGPU_HW_IP_NUM; ip++) + for (inst = 0; inst < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; inst++) + for (ring = 0; ring < AMDGPU_CS_MAX_RINGS; ring++) { + head = &context->pendings[ip][inst][ring]; + LIST_FOR_EACH_ENTRY_SAFE(next, s, head, list_node) { + r = amdgpu_cs_destroy_ib(dev, next); + if (r) + break; + } + } + + head = &context->freed; + LIST_FOR_EACH_ENTRY_SAFE(next, s, head, list_node) { + r = amdgpu_cs_destroy_ib(dev, next); + if (r) + break; + } + + pthread_mutex_unlock(&context->pendings_mutex); + pthread_mutex_destroy(&context->pendings_mutex); + return r; +} + +/** + * Add IB to pending IB lists without holding sequence_mutex. + * + * \param context - \c [in] GPU Context + * \param ib - \c [in] ib to added to pending lists + * \param ip - \c [in] hw ip block + * \param ip_instance - \c [in] instance of the hw ip block + * \param ring - \c [in] Ring of hw ip + * + * \return N/A +*/ +static void amdgpu_cs_add_pending(amdgpu_context_handle context, + amdgpu_ib_handle ib, + unsigned ip, unsigned ip_instance, + uint32_t ring) +{ + struct list_head *head; + pthread_mutex_lock(&context->pendings_mutex); + head = &context->pendings[ip][ip_instance][ring]; + LIST_ADDTAIL(&ib->list_node, head); + pthread_mutex_unlock(&context->pendings_mutex); + return; +} + +/** + * Garbage collector on a pending IB list without holding pendings_mutex. + * This function by itself is not multithread safe. + * + * \param context - \c [in] GPU Context + * \param ip - \c [in] hw ip block + * \param ip_instance - \c [in] instance of the hw ip block + * \param ring - \c [in] Ring of hw ip + * \param expired_fence - \c [in] fence expired + * + * \return N/A + * \note Hold pendings_mutex before calling this function. +*/ +static void amdgpu_cs_pending_gc_not_safe(amdgpu_context_handle context, + unsigned ip, unsigned ip_instance, + uint32_t ring, + uint64_t expired_fence) +{ + struct list_head *head; + struct amdgpu_ib *next; + struct amdgpu_ib *s; + int r; + + head = &context->pendings[ip][ip_instance][ring]; + LIST_FOR_EACH_ENTRY_SAFE(next, s, head, list_node) + if (next->cs_handle <= expired_fence) { + LIST_DEL(&next->list_node); + amdgpu_cs_free_to_ib_pool(context, next); + } else { + /* The pending list is a sorted list. + There is no need to continue. */ + break; + } + + /* walk the freed list as well */ + head = &context->freed; + LIST_FOR_EACH_ENTRY_SAFE(next, s, head, list_node) { + bool busy; + + r = amdgpu_bo_wait_for_idle(next->buf_handle, 0, &busy); + if (r || busy) + break; + + LIST_DEL(&next->list_node); + amdgpu_cs_free_to_ib_pool(context, next); + } + + return; +} + +/** + * Garbage collector on a pending IB list + * + * \param context - \c [in] GPU Context + * \param ip - \c [in] hw ip block + * \param ip_instance - \c [in] instance of the hw ip block + * \param ring - \c [in] Ring of hw ip + * \param expired_fence - \c [in] fence expired + * + * \return N/A +*/ +static void amdgpu_cs_pending_gc(amdgpu_context_handle context, + unsigned ip, unsigned ip_instance, + uint32_t ring, + uint64_t expired_fence) +{ + pthread_mutex_lock(&context->pendings_mutex); + amdgpu_cs_pending_gc_not_safe(context, ip, ip_instance, ring, + expired_fence); + pthread_mutex_unlock(&context->pendings_mutex); + return; +} + +/** + * Garbage collector on all pending IB lists + * + * \param context - \c [in] GPU Context + * + * \return N/A +*/ +static void amdgpu_cs_all_pending_gc(amdgpu_context_handle context) +{ + unsigned ip, inst; + uint32_t ring; + uint64_t expired_fences[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS]; + + pthread_mutex_lock(&context->sequence_mutex); + for (ip = 0; ip < AMDGPU_HW_IP_NUM; ip++) + for (inst = 0; inst < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; inst++) + for (ring = 0; ring < AMDGPU_CS_MAX_RINGS; ring++) + expired_fences[ip][inst][ring] = + context->expired_fences[ip][inst][ring]; + pthread_mutex_unlock(&context->sequence_mutex); + + pthread_mutex_lock(&context->pendings_mutex); + for (ip = 0; ip < AMDGPU_HW_IP_NUM; ip++) + for (inst = 0; inst < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; inst++) + for (ring = 0; ring < AMDGPU_CS_MAX_RINGS; ring++) + amdgpu_cs_pending_gc_not_safe(context, ip, inst, ring, + expired_fences[ip][inst][ring]); + pthread_mutex_unlock(&context->pendings_mutex); +} + +/** + * Allocate an IB buffer + * If there is no free IB buffer in pools, create one. + * + * \param dev - \c [in] Device handle + * \param context - \c [in] GPU Context + * \param ib_size - \c [in] Size of allocation + * \param ib - \c [out] return the pointer to the allocated IB buffer + * + * \return 0 on success otherwise POSIX Error code +*/ +static int amdgpu_cs_alloc_ib_local(amdgpu_device_handle dev, + amdgpu_context_handle context, + enum amdgpu_cs_ib_size ib_size, + amdgpu_ib_handle *ib) +{ + int r; + + r = amdgpu_cs_alloc_from_ib_pool(dev, context, ib_size, ib); + if (!r) + return r; + + amdgpu_cs_all_pending_gc(context); + + /* Retry to allocate from free IB pools after garbage collector. */ + r = amdgpu_cs_alloc_from_ib_pool(dev, context, ib_size, ib); + if (!r) + return r; + + /* There is no suitable IB in free pools. Create one. */ + r = amdgpu_cs_create_ib(dev, context, ib_size, ib); + return r; +} + +int amdgpu_cs_alloc_ib(amdgpu_device_handle dev, + amdgpu_context_handle context, + enum amdgpu_cs_ib_size ib_size, + struct amdgpu_cs_ib_alloc_result *output) +{ + int r; + amdgpu_ib_handle ib; + + if (NULL == dev) + return -EINVAL; + if (NULL == context) + return -EINVAL; + if (NULL == output) + return -EINVAL; + if (ib_size >= AMDGPU_CS_IB_SIZE_NUM) + return -EINVAL; + + r = amdgpu_cs_alloc_ib_local(dev, context, ib_size, &ib); + if (!r) { + output->handle = ib; + output->cpu = ib->cpu; + output->mc_address = ib->virtual_mc_base_address; + } + + return r; +} + +int amdgpu_cs_free_ib(amdgpu_device_handle dev, + amdgpu_context_handle context, + amdgpu_ib_handle handle) +{ + if (NULL == dev) + return -EINVAL; + if (NULL == context) + return -EINVAL; + if (NULL == handle) + return -EINVAL; + + pthread_mutex_lock(&context->pendings_mutex); + LIST_ADD(&handle->list_node, &context->freed); + pthread_mutex_unlock(&context->pendings_mutex); + return 0; +} + +/** + * Create command submission context + * + * \param dev - \c [in] amdgpu device handle + * \param context - \c [out] amdgpu context handle + * + * \return 0 on success otherwise POSIX Error code +*/ +int amdgpu_cs_ctx_create(amdgpu_device_handle dev, + amdgpu_context_handle *context) +{ + struct amdgpu_context *gpu_context; + union drm_amdgpu_ctx args; + int r; + + if (NULL == dev) + return -EINVAL; + if (NULL == context) + return -EINVAL; + + gpu_context = calloc(1, sizeof(struct amdgpu_context)); + if (NULL == gpu_context) + return -ENOMEM; + + r = pthread_mutex_init(&gpu_context->sequence_mutex, NULL); + if (r) + goto error_mutex; + + r = amdgpu_cs_init_ib_pool(gpu_context); + if (r) + goto error_pool; + + r = amdgpu_cs_init_pendings(gpu_context); + if (r) + goto error_pendings; + + r = amdgpu_cs_alloc_ib_local(dev, gpu_context, amdgpu_cs_ib_size_4K, + &gpu_context->fence_ib); + if (r) + goto error_fence_ib; + + + memset(&args, 0, sizeof(args)); + args.in.op = AMDGPU_CTX_OP_ALLOC_CTX; + r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CTX, &args, sizeof(args)); + if (r) + goto error_kernel; + + gpu_context->id = args.out.alloc.ctx_id; + *context = (amdgpu_context_handle)gpu_context; + + return 0; + +error_kernel: + amdgpu_cs_free_ib(dev, gpu_context, gpu_context->fence_ib); + +error_fence_ib: + amdgpu_cs_destroy_pendings(dev, gpu_context); + +error_pendings: + amdgpu_cs_destroy_ib_pool(dev, gpu_context); + +error_pool: + pthread_mutex_destroy(&gpu_context->sequence_mutex); + +error_mutex: + free(gpu_context); + return r; +} + +/** + * Release command submission context + * + * \param dev - \c [in] amdgpu device handle + * \param context - \c [in] amdgpu context handle + * + * \return 0 on success otherwise POSIX Error code +*/ +int amdgpu_cs_ctx_free(amdgpu_device_handle dev, + amdgpu_context_handle context) +{ + int r; + union drm_amdgpu_ctx args; + + if (NULL == dev) + return -EINVAL; + if (NULL == context) + return -EINVAL; + + r = amdgpu_cs_free_ib(dev, context, context->fence_ib); + if (r) + return r; + + r = amdgpu_cs_destroy_pendings(dev, context); + if (r) + return r; + + r = amdgpu_cs_destroy_ib_pool(dev, context); + if (r) + return r; + + pthread_mutex_destroy(&context->sequence_mutex); + + /* now deal with kernel side */ + memset(&args, 0, sizeof(args)); + args.in.op = AMDGPU_CTX_OP_FREE_CTX; + args.in.ctx_id = context->id; + r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CTX, &args, sizeof(args)); + + free(context); + + return r; +} + +static int amdgpu_cs_create_bo_list(amdgpu_device_handle dev, + amdgpu_context_handle context, + struct amdgpu_cs_request *request, + amdgpu_ib_handle fence_ib, + uint32_t *handle) +{ + struct drm_amdgpu_bo_list_entry *list; + union drm_amdgpu_bo_list args; + unsigned num_resources; + unsigned i; + int r; + + num_resources = request->number_of_resources; + if (fence_ib) + ++num_resources; + + list = alloca(sizeof(struct drm_amdgpu_bo_list_entry) * num_resources); + + memset(&args, 0, sizeof(args)); + args.in.operation = AMDGPU_BO_LIST_OP_CREATE; + args.in.bo_number = num_resources; + args.in.bo_info_size = sizeof(struct drm_amdgpu_bo_list_entry); + args.in.bo_info_ptr = (uint64_t)(uintptr_t)list; + + for (i = 0; i < request->number_of_resources; i++) { + list[i].bo_handle = request->resources[i]->handle; + if (request->resource_flags) + list[i].bo_priority = request->resource_flags[i]; + else + list[i].bo_priority = 0; + } + + if (fence_ib) + list[i].bo_handle = fence_ib->buf_handle->handle; + + r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_BO_LIST, + &args, sizeof(args)); + if (r) + return r; + + *handle = args.out.list_handle; + return 0; +} + +static int amdgpu_cs_free_bo_list(amdgpu_device_handle dev, uint32_t handle) +{ + union drm_amdgpu_bo_list args; + int r; + + memset(&args, 0, sizeof(args)); + args.in.operation = AMDGPU_BO_LIST_OP_DESTROY; + args.in.list_handle = handle; + + r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_BO_LIST, + &args, sizeof(args)); + + return r; +} + +static uint32_t amdgpu_cs_fence_index(unsigned ip, unsigned ring) +{ + return ip * AMDGPU_CS_MAX_RINGS + ring; +} + +/** + * Submit command to kernel DRM + * \param dev - \c [in] Device handle + * \param context - \c [in] GPU Context + * \param ibs_request - \c [in] Pointer to submission requests + * \param fence - \c [out] return fence for this submission + * + * \return 0 on success otherwise POSIX Error code + * \sa amdgpu_cs_submit() +*/ +static int amdgpu_cs_submit_one(amdgpu_device_handle dev, + amdgpu_context_handle context, + struct amdgpu_cs_request *ibs_request, + uint64_t *fence) +{ + int r; + uint32_t i, size; + union drm_amdgpu_cs cs; + uint64_t *chunk_array; + struct drm_amdgpu_cs_chunk *chunks; + struct drm_amdgpu_cs_chunk_data *chunk_data; + + if (ibs_request->ip_type >= AMDGPU_HW_IP_NUM) + return -EINVAL; + if (ibs_request->ring >= AMDGPU_CS_MAX_RINGS) + return -EINVAL; + if (ibs_request->number_of_ibs > AMDGPU_CS_MAX_IBS_PER_SUBMIT) + return -EINVAL; + + size = (ibs_request->number_of_ibs + 1) * ((sizeof(uint64_t) + + sizeof(struct drm_amdgpu_cs_chunk) + + sizeof(struct drm_amdgpu_cs_chunk_data)) + + ibs_request->number_of_resources + 1) * + sizeof(struct drm_amdgpu_bo_list_entry); + chunk_array = malloc(size); + if (NULL == chunk_array) + return -ENOMEM; + memset(chunk_array, 0, size); + + chunks = (struct drm_amdgpu_cs_chunk *)(chunk_array + ibs_request->number_of_ibs + 1); + chunk_data = (struct drm_amdgpu_cs_chunk_data *)(chunks + ibs_request->number_of_ibs + 1); + + memset(&cs, 0, sizeof(cs)); + cs.in.chunks = (uint64_t)(uintptr_t)chunk_array; + cs.in.ctx_id = context->id; + cs.in.num_chunks = ibs_request->number_of_ibs; + /* IB chunks */ + for (i = 0; i < ibs_request->number_of_ibs; i++) { + struct amdgpu_cs_ib_info *ib; + chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i]; + chunks[i].chunk_id = AMDGPU_CHUNK_ID_IB; + chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4; + chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i]; + + ib = &ibs_request->ibs[i]; + + chunk_data[i].ib_data.handle = ib->ib_handle->buf_handle->handle; + chunk_data[i].ib_data.va_start = ib->ib_handle->virtual_mc_base_address; + chunk_data[i].ib_data.ib_bytes = ib->size * 4; + chunk_data[i].ib_data.ip_type = ibs_request->ip_type; + chunk_data[i].ib_data.ip_instance = ibs_request->ip_instance; + chunk_data[i].ib_data.ring = ibs_request->ring; + + if (ib->flags & AMDGPU_CS_GFX_IB_CE) + chunk_data[i].ib_data.flags = AMDGPU_IB_FLAG_CE; + } + + r = amdgpu_cs_create_bo_list(dev, context, ibs_request, NULL, + &cs.in.bo_list_handle); + if (r) + goto error_unlock; + + pthread_mutex_lock(&context->sequence_mutex); + + if (ibs_request->ip_type != AMDGPU_HW_IP_UVD && + ibs_request->ip_type != AMDGPU_HW_IP_VCE) { + i = cs.in.num_chunks++; + + /* fence chunk */ + chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i]; + chunks[i].chunk_id = AMDGPU_CHUNK_ID_FENCE; + chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4; + chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i]; + + /* fence bo handle */ + chunk_data[i].fence_data.handle = context->fence_ib->buf_handle->handle; + /* offset */ + chunk_data[i].fence_data.offset = amdgpu_cs_fence_index( + ibs_request->ip_type, ibs_request->ring); + chunk_data[i].fence_data.offset *= sizeof(uint64_t); + } + + r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CS, + &cs, sizeof(cs)); + if (r) + goto error_unlock; + + + /* Hold sequence_mutex while adding record to the pending list. + So the pending list is a sorted list according to fence value. */ + + for (i = 0; i < ibs_request->number_of_ibs; i++) { + struct amdgpu_cs_ib_info *ib; + + ib = &ibs_request->ibs[i]; + if (ib->flags & AMDGPU_CS_REUSE_IB) + continue; + + ib->ib_handle->cs_handle = cs.out.handle; + + amdgpu_cs_add_pending(context, ib->ib_handle, ibs_request->ip_type, + ibs_request->ip_instance, + ibs_request->ring); + } + + *fence = cs.out.handle; + + pthread_mutex_unlock(&context->sequence_mutex); + + r = amdgpu_cs_free_bo_list(dev, cs.in.bo_list_handle); + if (r) + goto error_free; + + free(chunk_array); + return 0; + +error_unlock: + pthread_mutex_unlock(&context->sequence_mutex); + +error_free: + free(chunk_array); + return r; +} + +int amdgpu_cs_submit(amdgpu_device_handle dev, + amdgpu_context_handle context, + uint64_t flags, + struct amdgpu_cs_request *ibs_request, + uint32_t number_of_requests, + uint64_t *fences) +{ + int r; + uint32_t i; + + if (NULL == dev) + return -EINVAL; + if (NULL == context) + return -EINVAL; + if (NULL == ibs_request) + return -EINVAL; + if (NULL == fences) + return -EINVAL; + + r = 0; + for (i = 0; i < number_of_requests; i++) { + r = amdgpu_cs_submit_one(dev, context, ibs_request, fences); + if (r) + break; + fences++; + ibs_request++; + } + + return r; +} + +/** + * Calculate absolute timeout. + * + * \param timeout - \c [in] timeout in nanoseconds. + * + * \return absolute timeout in nanoseconds +*/ +uint64_t amdgpu_cs_calculate_timeout(uint64_t timeout) +{ + int r; + + if (timeout != AMDGPU_TIMEOUT_INFINITE) { + struct timespec current; + r = clock_gettime(CLOCK_MONOTONIC, ¤t); + if (r) + return r; + + timeout += ((uint64_t)current.tv_sec) * 1000000000ull; + timeout += current.tv_nsec; + } + return timeout; +} + +static int amdgpu_ioctl_wait_cs(amdgpu_device_handle dev, + unsigned ip, + unsigned ip_instance, + uint32_t ring, + uint64_t handle, + uint64_t timeout_ns, + bool *busy) +{ + union drm_amdgpu_wait_cs args; + int r; + + memset(&args, 0, sizeof(args)); + args.in.handle = handle; + args.in.ip_type = ip; + args.in.ip_instance = ip_instance; + args.in.ring = ring; + args.in.timeout = amdgpu_cs_calculate_timeout(timeout_ns); + + /* Handle errors manually here because of timeout */ + r = ioctl(dev->fd, DRM_IOCTL_AMDGPU_WAIT_CS, &args); + if (r == -1 && (errno == EINTR || errno == EAGAIN)) { + *busy = true; + return 0; + } else if (r) + return -errno; + + *busy = args.out.status; + return 0; +} + +int amdgpu_cs_query_fence_status(amdgpu_device_handle dev, + struct amdgpu_cs_query_fence *fence, + uint32_t *expired) +{ + amdgpu_context_handle context; + uint64_t *signaled_fence; + uint64_t *expired_fence; + unsigned ip_type, ip_instance; + uint32_t ring; + bool busy = true; + int r; + + if (NULL == dev) + return -EINVAL; + if (NULL == fence) + return -EINVAL; + if (NULL == expired) + return -EINVAL; + if (NULL == fence->context) + return -EINVAL; + if (fence->ip_type >= AMDGPU_HW_IP_NUM) + return -EINVAL; + if (fence->ring >= AMDGPU_CS_MAX_RINGS) + return -EINVAL; + + context = fence->context; + ip_type = fence->ip_type; + ip_instance = fence->ip_instance; + ring = fence->ring; + signaled_fence = context->fence_ib->cpu; + signaled_fence += amdgpu_cs_fence_index(ip_type, ring); + expired_fence = &context->expired_fences[ip_type][ip_instance][ring]; + *expired = false; + + pthread_mutex_lock(&context->sequence_mutex); + if (fence->fence <= *expired_fence) { + /* This fence value is expired already. */ + pthread_mutex_unlock(&context->sequence_mutex); + *expired = true; + return 0; + } + + if (fence->fence <= *signaled_fence) { + /* This fence value is signaled already. */ + *expired_fence = *signaled_fence; + pthread_mutex_unlock(&context->sequence_mutex); + amdgpu_cs_pending_gc(context, ip_type, ip_instance, ring, + fence->fence); + *expired = true; + return 0; + } + + pthread_mutex_unlock(&context->sequence_mutex); + + r = amdgpu_ioctl_wait_cs(dev, ip_type, ip_instance, ring, + fence->fence, fence->timeout_ns, &busy); + if (!r && !busy) { + *expired = true; + pthread_mutex_lock(&context->sequence_mutex); + /* The thread doesn't hold sequence_mutex. Other thread could + update *expired_fence already. Check whether there is a + newerly expired fence. */ + if (fence->fence > *expired_fence) { + *expired_fence = fence->fence; + pthread_mutex_unlock(&context->sequence_mutex); + amdgpu_cs_pending_gc(context, ip_type, ip_instance, + ring, fence->fence); + } else { + pthread_mutex_unlock(&context->sequence_mutex); + } + } + + return r; +} + diff --git a/amdgpu/amdgpu_device.c b/amdgpu/amdgpu_device.c new file mode 100644 index 0000000..c610fd3 --- /dev/null +++ b/amdgpu/amdgpu_device.c @@ -0,0 +1,241 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * +*/ + +/** + * \file amdgpu_device.c + * + * Implementation of functions for AMD GPU device + * + * + */ + +#include <sys/stat.h> +#include <errno.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> + +#include "xf86drm.h" +#include "amdgpu_drm.h" +#include "amdgpu_internal.h" +#include "util_hash_table.h" + +#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x))) +#define UINT_TO_PTR(x) ((void *)((intptr_t)(x))) +#define RENDERNODE_MINOR_MASK 0xff7f + +pthread_mutex_t fd_mutex = PTHREAD_MUTEX_INITIALIZER; +static struct util_hash_table *fd_tab; + +static unsigned handle_hash(void *key) +{ + return PTR_TO_UINT(key); +} + +static int handle_compare(void *key1, void *key2) +{ + return PTR_TO_UINT(key1) != PTR_TO_UINT(key2); +} + +static unsigned fd_hash(void *key) +{ + int fd = PTR_TO_UINT(key); + struct stat stat; + fstat(fd, &stat); + + if (!S_ISCHR(stat.st_mode)) + return stat.st_dev ^ stat.st_ino; + else + return stat.st_dev ^ (stat.st_rdev & RENDERNODE_MINOR_MASK); +} + +static int fd_compare(void *key1, void *key2) +{ + int fd1 = PTR_TO_UINT(key1); + int fd2 = PTR_TO_UINT(key2); + struct stat stat1, stat2; + fstat(fd1, &stat1); + fstat(fd2, &stat2); + + if (!S_ISCHR(stat1.st_mode) || !S_ISCHR(stat2.st_mode)) + return stat1.st_dev != stat2.st_dev || + stat1.st_ino != stat2.st_ino; + else + return major(stat1.st_rdev) != major(stat2.st_rdev) || + (minor(stat1.st_rdev) & RENDERNODE_MINOR_MASK) != + (minor(stat2.st_rdev) & RENDERNODE_MINOR_MASK); +} + +/** +* Get the authenticated form fd, +* +* \param fd - \c [in] File descriptor for AMD GPU device +* \param auth - \c [out] Pointer to output the fd is authenticated or not +* A render node fd, output auth = 0 +* A legacy fd, get the authenticated for compatibility root +* +* \return 0 on success\n +* >0 - AMD specific error code\n +* <0 - Negative POSIX Error code +*/ +static int amdgpu_get_auth(int fd, int *auth) +{ + int r = 0; + drm_client_t client; + + if (drmGetNodeTypeFromFd(fd) == DRM_NODE_RENDER) + *auth = 0; + else { + client.idx = 0; + r = drmIoctl(fd, DRM_IOCTL_GET_CLIENT, &client); + if (!r) + *auth = client.auth; + } + return r; +} + +int amdgpu_device_initialize(int fd, + uint32_t *major_version, + uint32_t *minor_version, + amdgpu_device_handle *device_handle) +{ + struct amdgpu_device *dev; + drmVersionPtr version; + int r; + int flag_auth = 0; + int flag_authexist=0; + uint32_t accel_working; + + *device_handle = NULL; + + pthread_mutex_lock(&fd_mutex); + if (!fd_tab) + fd_tab = util_hash_table_create(fd_hash, fd_compare); + r = amdgpu_get_auth(fd, &flag_auth); + if (r) { + pthread_mutex_unlock(&fd_mutex); + return r; + } + dev = util_hash_table_get(fd_tab, UINT_TO_PTR(fd)); + if (dev) { + r = amdgpu_get_auth(dev->fd, &flag_authexist); + if (r) { + pthread_mutex_unlock(&fd_mutex); + return r; + } + if ((flag_auth) && (!flag_authexist)) { + dev->flink_fd = fd; + } + *major_version = dev->major_version; + *minor_version = dev->minor_version; + amdgpu_device_reference(device_handle, dev); + pthread_mutex_unlock(&fd_mutex); + return 0; + } + + dev = calloc(1, sizeof(struct amdgpu_device)); + if (!dev) { + pthread_mutex_unlock(&fd_mutex); + return -ENOMEM; + } + + atomic_set(&dev->refcount, 1); + + version = drmGetVersion(fd); + if (version->version_major != 3) { + fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is " + "only compatible with 3.x.x.\n", + __func__, + version->version_major, + version->version_minor, + version->version_patchlevel); + drmFreeVersion(version); + r = -EBADF; + goto cleanup; + } + + dev->fd = fd; + dev->flink_fd = fd; + dev->major_version = version->version_major; + dev->minor_version = version->version_minor; + drmFreeVersion(version); + + dev->bo_flink_names = util_hash_table_create(handle_hash, + handle_compare); + dev->bo_handles = util_hash_table_create(handle_hash, handle_compare); + dev->bo_vas = util_hash_table_create(handle_hash, handle_compare); + pthread_mutex_init(&dev->bo_table_mutex, NULL); + + /* Check if acceleration is working. */ + r = amdgpu_query_info(dev, AMDGPU_INFO_ACCEL_WORKING, 4, &accel_working); + if (r) + goto cleanup; + if (!accel_working) { + r = -EBADF; + goto cleanup; + } + + r = amdgpu_query_gpu_info_init(dev); + if (r) + goto cleanup; + + amdgpu_vamgr_init(dev); + + *major_version = dev->major_version; + *minor_version = dev->minor_version; + *device_handle = dev; + util_hash_table_set(fd_tab, UINT_TO_PTR(fd), dev); + pthread_mutex_unlock(&fd_mutex); + + return 0; + +cleanup: + free(dev); + pthread_mutex_unlock(&fd_mutex); + return r; +} + +void amdgpu_device_free_internal(amdgpu_device_handle dev) +{ + util_hash_table_destroy(dev->bo_flink_names); + util_hash_table_destroy(dev->bo_handles); + util_hash_table_destroy(dev->bo_vas); + pthread_mutex_destroy(&dev->bo_table_mutex); + pthread_mutex_destroy(&(dev->vamgr.bo_va_mutex)); + util_hash_table_remove(fd_tab, UINT_TO_PTR(dev->fd)); + free(dev); +} + +int amdgpu_device_deinitialize(amdgpu_device_handle dev) +{ + amdgpu_device_reference(&dev, NULL); + return 0; +} + +void amdgpu_device_reference(struct amdgpu_device **dst, + struct amdgpu_device *src) +{ + if (update_references(&(*dst)->refcount, &src->refcount)) + amdgpu_device_free_internal(*dst); + *dst = src; +} diff --git a/amdgpu/amdgpu_gpu_info.c b/amdgpu/amdgpu_gpu_info.c new file mode 100644 index 0000000..0b77731 --- /dev/null +++ b/amdgpu/amdgpu_gpu_info.c @@ -0,0 +1,275 @@ +/* + * Copyright © 2014 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <errno.h> +#include <string.h> + +#include "amdgpu.h" +#include "amdgpu_drm.h" +#include "amdgpu_internal.h" +#include "xf86drm.h" + +int amdgpu_query_info(amdgpu_device_handle dev, unsigned info_id, + unsigned size, void *value) +{ + struct drm_amdgpu_info request; + + memset(&request, 0, sizeof(request)); + request.return_pointer = (uintptr_t)value; + request.return_size = size; + request.query = info_id; + + return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request, + sizeof(struct drm_amdgpu_info)); +} + +int amdgpu_query_crtc_from_id(amdgpu_device_handle dev, unsigned id, + int32_t *result) +{ + struct drm_amdgpu_info request; + + memset(&request, 0, sizeof(request)); + request.return_pointer = (uintptr_t)result; + request.return_size = sizeof(*result); + request.query = AMDGPU_INFO_CRTC_FROM_ID; + request.mode_crtc.id = id; + + return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request, + sizeof(struct drm_amdgpu_info)); +} + +int amdgpu_read_mm_registers(amdgpu_device_handle dev, unsigned dword_offset, + unsigned count, uint32_t instance, uint32_t flags, + uint32_t *values) +{ + struct drm_amdgpu_info request; + + memset(&request, 0, sizeof(request)); + request.return_pointer = (uintptr_t)values; + request.return_size = count * sizeof(uint32_t); + request.query = AMDGPU_INFO_READ_MMR_REG; + request.read_mmr_reg.dword_offset = dword_offset; + request.read_mmr_reg.count = count; + request.read_mmr_reg.instance = instance; + request.read_mmr_reg.flags = flags; + + return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request, + sizeof(struct drm_amdgpu_info)); +} + +int amdgpu_query_hw_ip_count(amdgpu_device_handle dev, unsigned type, + uint32_t *count) +{ + struct drm_amdgpu_info request; + + memset(&request, 0, sizeof(request)); + request.return_pointer = (uintptr_t)count; + request.return_size = sizeof(*count); + request.query = AMDGPU_INFO_HW_IP_COUNT; + request.query_hw_ip.type = type; + + return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request, + sizeof(struct drm_amdgpu_info)); +} + +int amdgpu_query_hw_ip_info(amdgpu_device_handle dev, unsigned type, + unsigned ip_instance, + struct drm_amdgpu_info_hw_ip *info) +{ + struct drm_amdgpu_info request; + + memset(&request, 0, sizeof(request)); + request.return_pointer = (uintptr_t)info; + request.return_size = sizeof(*info); + request.query = AMDGPU_INFO_HW_IP_INFO; + request.query_hw_ip.type = type; + request.query_hw_ip.ip_instance = ip_instance; + + return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request, + sizeof(struct drm_amdgpu_info)); +} + +int amdgpu_query_firmware_version(amdgpu_device_handle dev, unsigned fw_type, + unsigned ip_instance, unsigned index, + uint32_t *version, uint32_t *feature) +{ + struct drm_amdgpu_info request; + struct drm_amdgpu_info_firmware firmware; + int r; + + memset(&request, 0, sizeof(request)); + request.return_pointer = (uintptr_t)&firmware; + request.return_size = sizeof(firmware); + request.query = AMDGPU_INFO_FW_VERSION; + request.query_fw.fw_type = fw_type; + request.query_fw.ip_instance = ip_instance; + request.query_fw.index = index; + + r = drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request, + sizeof(struct drm_amdgpu_info)); + if (r) + return r; + + *version = firmware.ver; + *feature = firmware.feature; + return 0; +} + +int amdgpu_query_gpu_info_init(amdgpu_device_handle dev) +{ + int r, i; + + r = amdgpu_query_info(dev, AMDGPU_INFO_DEV_INFO, sizeof(dev->dev_info), + &dev->dev_info); + if (r) + return r; + + dev->info.asic_id = dev->dev_info.device_id; + dev->info.chip_rev = dev->dev_info.chip_rev; + dev->info.chip_external_rev = dev->dev_info.external_rev; + dev->info.family_id = dev->dev_info.family; + dev->info.max_engine_clk = dev->dev_info.max_engine_clock; + dev->info.gpu_counter_freq = dev->dev_info.gpu_counter_freq; + dev->info.enabled_rb_pipes_mask = dev->dev_info.enabled_rb_pipes_mask; + dev->info.rb_pipes = dev->dev_info.num_rb_pipes; + dev->info.ids_flags = dev->dev_info.ids_flags; + dev->info.num_hw_gfx_contexts = dev->dev_info.num_hw_gfx_contexts; + dev->info.num_shader_engines = dev->dev_info.num_shader_engines; + dev->info.num_shader_arrays_per_engine = + dev->dev_info.num_shader_arrays_per_engine; + + for (i = 0; i < (int)dev->info.num_shader_engines; i++) { + unsigned instance = (i << AMDGPU_INFO_MMR_SE_INDEX_SHIFT) | + (AMDGPU_INFO_MMR_SH_INDEX_MASK << + AMDGPU_INFO_MMR_SH_INDEX_SHIFT); + + r = amdgpu_read_mm_registers(dev, 0x263d, 1, instance, 0, + &dev->info.backend_disable[i]); + if (r) + return r; + /* extract bitfield CC_RB_BACKEND_DISABLE.BACKEND_DISABLE */ + dev->info.backend_disable[i] = + (dev->info.backend_disable[i] >> 16) & 0xff; + + r = amdgpu_read_mm_registers(dev, 0xa0d4, 1, instance, 0, + &dev->info.pa_sc_raster_cfg[i]); + if (r) + return r; + + r = amdgpu_read_mm_registers(dev, 0xa0d5, 1, instance, 0, + &dev->info.pa_sc_raster_cfg1[i]); + if (r) + return r; + } + + r = amdgpu_read_mm_registers(dev, 0x2644, 32, 0xffffffff, 0, + dev->info.gb_tile_mode); + if (r) + return r; + + r = amdgpu_read_mm_registers(dev, 0x2664, 16, 0xffffffff, 0, + dev->info.gb_macro_tile_mode); + if (r) + return r; + + r = amdgpu_read_mm_registers(dev, 0x263e, 1, 0xffffffff, 0, + &dev->info.gb_addr_cfg); + if (r) + return r; + + r = amdgpu_read_mm_registers(dev, 0x9d8, 1, 0xffffffff, 0, + &dev->info.mc_arb_ramcfg); + if (r) + return r; + + dev->info.cu_active_number = dev->dev_info.cu_active_number; + dev->info.cu_ao_mask = dev->dev_info.cu_ao_mask; + memcpy(&dev->info.cu_bitmap[0][0], &dev->dev_info.cu_bitmap[0][0], sizeof(dev->info.cu_bitmap)); + + /* TODO: info->max_quad_shader_pipes is not set */ + /* TODO: info->avail_quad_shader_pipes is not set */ + /* TODO: info->cache_entries_per_quad_pipe is not set */ + /* TODO: info->active_rb_pipes is not set */ + return 0; +} + +int amdgpu_query_gpu_info(amdgpu_device_handle dev, + struct amdgpu_gpu_info *info) +{ + /* Get ASIC info*/ + *info = dev->info; + + return 0; +} + +int amdgpu_query_heap_info(amdgpu_device_handle dev, + uint32_t heap, + uint32_t flags, + struct amdgpu_heap_info *info) +{ + struct drm_amdgpu_info_vram_gtt vram_gtt_info; + int r; + + r = amdgpu_query_info(dev, AMDGPU_INFO_VRAM_GTT, + sizeof(vram_gtt_info), &vram_gtt_info); + if (r) + return r; + + /* Get heap information */ + switch (heap) { + case AMDGPU_GEM_DOMAIN_VRAM: + /* query visible only vram heap */ + if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) + info->heap_size = vram_gtt_info.vram_cpu_accessible_size; + else /* query total vram heap */ + info->heap_size = vram_gtt_info.vram_size; + + info->max_allocation = vram_gtt_info.vram_cpu_accessible_size; + + if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) + r = amdgpu_query_info(dev, AMDGPU_INFO_VIS_VRAM_USAGE, + sizeof(info->heap_usage), + &info->heap_usage); + else + r = amdgpu_query_info(dev, AMDGPU_INFO_VRAM_USAGE, + sizeof(info->heap_usage), + &info->heap_usage); + if (r) + return r; + break; + case AMDGPU_GEM_DOMAIN_GTT: + info->heap_size = vram_gtt_info.gtt_size; + info->max_allocation = vram_gtt_info.vram_cpu_accessible_size; + + r = amdgpu_query_info(dev, AMDGPU_INFO_GTT_USAGE, + sizeof(info->heap_usage), + &info->heap_usage); + if (r) + return r; + break; + default: + return -EINVAL; + } + + return 0; +} diff --git a/amdgpu/amdgpu_internal.h b/amdgpu/amdgpu_internal.h new file mode 100644 index 0000000..8346f16 --- /dev/null +++ b/amdgpu/amdgpu_internal.h @@ -0,0 +1,208 @@ +/* + * Copyright © 2014 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _AMDGPU_INTERNAL_H_ +#define _AMDGPU_INTERNAL_H_ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <assert.h> +#include <pthread.h> +#include "xf86atomic.h" +#include "amdgpu.h" +#include "util_double_list.h" + +#define AMDGPU_CS_MAX_RINGS 8 + +struct amdgpu_bo_va_hole { + struct list_head list; + uint64_t offset; + uint64_t size; +}; + +struct amdgpu_bo_va_mgr { + /* the start virtual address */ + uint64_t va_offset; + struct list_head va_holes; + pthread_mutex_t bo_va_mutex; + uint32_t va_alignment; +}; + +struct amdgpu_device { + atomic_t refcount; + int fd; + int flink_fd; + unsigned major_version; + unsigned minor_version; + + /** List of buffer handles. Protected by bo_table_mutex. */ + struct util_hash_table *bo_handles; + /** List of buffer GEM flink names. Protected by bo_table_mutex. */ + struct util_hash_table *bo_flink_names; + /** List of buffer virtual memory ranges. Protected by bo_table_mutex. */ + struct util_hash_table *bo_vas; + /** This protects all hash tables. */ + pthread_mutex_t bo_table_mutex; + struct amdgpu_bo_va_mgr vamgr; + struct drm_amdgpu_info_device dev_info; + struct amdgpu_gpu_info info; +}; + +struct amdgpu_bo { + atomic_t refcount; + struct amdgpu_device *dev; + + uint64_t alloc_size; + uint64_t virtual_mc_base_address; + + uint32_t handle; + uint32_t flink_name; + + pthread_mutex_t cpu_access_mutex; + void *cpu_ptr; + int cpu_map_count; +}; + +/* + * There are three mutexes. + * To avoid deadlock, only hold the mutexes in this order: + * sequence_mutex -> pendings_mutex -> pool_mutex. +*/ +struct amdgpu_context { + /** Mutex for accessing fences and to maintain command submissions + and pending lists in good sequence. */ + pthread_mutex_t sequence_mutex; + /** Buffer for user fences */ + struct amdgpu_ib *fence_ib; + /** The newest expired fence for the ring of the ip blocks. */ + uint64_t expired_fences[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS]; + /** Mutex for accessing pendings list. */ + pthread_mutex_t pendings_mutex; + /** Pending IBs. */ + struct list_head pendings[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS]; + /** Freed IBs not yet in pool */ + struct list_head freed; + /** Mutex for accessing free ib pool. */ + pthread_mutex_t pool_mutex; + /** Internal free IB pools. */ + struct list_head ib_pools[AMDGPU_CS_IB_SIZE_NUM]; + /* context id*/ + uint32_t id; +}; + +struct amdgpu_ib { + struct list_head list_node; + amdgpu_bo_handle buf_handle; + void *cpu; + uint64_t virtual_mc_base_address; + enum amdgpu_cs_ib_size ib_size; + uint64_t cs_handle; +}; + +/** + * Functions. + */ + +void amdgpu_device_free_internal(amdgpu_device_handle dev); + +void amdgpu_bo_free_internal(amdgpu_bo_handle bo); + +void amdgpu_vamgr_init(struct amdgpu_device *dev); + +uint64_t amdgpu_vamgr_find_va(struct amdgpu_bo_va_mgr *mgr, + uint64_t size, uint64_t alignment); + +void amdgpu_vamgr_free_va(struct amdgpu_bo_va_mgr *mgr, uint64_t va, + uint64_t size); + +int amdgpu_query_gpu_info_init(amdgpu_device_handle dev); + +uint64_t amdgpu_cs_calculate_timeout(uint64_t timeout); + +/** + * Inline functions. + */ + +/** + * Increment src and decrement dst as if we were updating references + * for an assignment between 2 pointers of some objects. + * + * \return true if dst is 0 + */ +static inline bool update_references(atomic_t *dst, atomic_t *src) +{ + if (dst != src) { + /* bump src first */ + if (src) { + assert(atomic_read(src) > 0); + atomic_inc(src); + } + if (dst) { + assert(atomic_read(dst) > 0); + return atomic_dec_and_test(dst); + } + } + return false; +} + +/** + * Assignment between two amdgpu_bo pointers with reference counting. + * + * Usage: + * struct amdgpu_bo *dst = ... , *src = ...; + * + * dst = src; + * // No reference counting. Only use this when you need to move + * // a reference from one pointer to another. + * + * amdgpu_bo_reference(&dst, src); + * // Reference counters are updated. dst is decremented and src is + * // incremented. dst is freed if its reference counter is 0. + */ +static inline void amdgpu_bo_reference(struct amdgpu_bo **dst, + struct amdgpu_bo *src) +{ + if (update_references(&(*dst)->refcount, &src->refcount)) + amdgpu_bo_free_internal(*dst); + *dst = src; +} + +/** + * Assignment between two amdgpu_device pointers with reference counting. + * + * Usage: + * struct amdgpu_device *dst = ... , *src = ...; + * + * dst = src; + * // No reference counting. Only use this when you need to move + * // a reference from one pointer to another. + * + * amdgpu_device_reference(&dst, src); + * // Reference counters are updated. dst is decremented and src is + * // incremented. dst is freed if its reference counter is 0. + */ +void amdgpu_device_reference(struct amdgpu_device **dst, + struct amdgpu_device *src); +#endif diff --git a/amdgpu/amdgpu_vamgr.c b/amdgpu/amdgpu_vamgr.c new file mode 100644 index 0000000..2335912 --- /dev/null +++ b/amdgpu/amdgpu_vamgr.c @@ -0,0 +1,169 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * +*/ + +#include <stdlib.h> +#include <string.h> +#include "amdgpu.h" +#include "amdgpu_drm.h" +#include "amdgpu_internal.h" +#include "util_math.h" + +void amdgpu_vamgr_init(struct amdgpu_device *dev) +{ + struct amdgpu_bo_va_mgr *vamgr = &dev->vamgr; + + vamgr->va_offset = dev->dev_info.virtual_address_offset; + vamgr->va_alignment = dev->dev_info.virtual_address_alignment; + + list_inithead(&vamgr->va_holes); + pthread_mutex_init(&vamgr->bo_va_mutex, NULL); +} + +uint64_t amdgpu_vamgr_find_va(struct amdgpu_bo_va_mgr *mgr, + uint64_t size, uint64_t alignment) +{ + struct amdgpu_bo_va_hole *hole, *n; + uint64_t offset = 0, waste = 0; + + alignment = MAX2(alignment, mgr->va_alignment); + size = ALIGN(size, mgr->va_alignment); + + pthread_mutex_lock(&mgr->bo_va_mutex); + /* TODO: using more appropriate way to track the holes */ + /* first look for a hole */ + LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) { + offset = hole->offset; + waste = offset % alignment; + waste = waste ? alignment - waste : 0; + offset += waste; + if (offset >= (hole->offset + hole->size)) { + continue; + } + if (!waste && hole->size == size) { + offset = hole->offset; + list_del(&hole->list); + free(hole); + pthread_mutex_unlock(&mgr->bo_va_mutex); + return offset; + } + if ((hole->size - waste) > size) { + if (waste) { + n = calloc(1, + sizeof(struct amdgpu_bo_va_hole)); + n->size = waste; + n->offset = hole->offset; + list_add(&n->list, &hole->list); + } + hole->size -= (size + waste); + hole->offset += size + waste; + pthread_mutex_unlock(&mgr->bo_va_mutex); + return offset; + } + if ((hole->size - waste) == size) { + hole->size = waste; + pthread_mutex_unlock(&mgr->bo_va_mutex); + return offset; + } + } + + offset = mgr->va_offset; + waste = offset % alignment; + waste = waste ? alignment - waste : 0; + if (waste) { + n = calloc(1, sizeof(struct amdgpu_bo_va_hole)); + n->size = waste; + n->offset = offset; + list_add(&n->list, &mgr->va_holes); + } + offset += waste; + mgr->va_offset += size + waste; + pthread_mutex_unlock(&mgr->bo_va_mutex); + return offset; +} + +void amdgpu_vamgr_free_va(struct amdgpu_bo_va_mgr *mgr, uint64_t va, + uint64_t size) +{ + struct amdgpu_bo_va_hole *hole; + + size = ALIGN(size, mgr->va_alignment); + + pthread_mutex_lock(&mgr->bo_va_mutex); + if ((va + size) == mgr->va_offset) { + mgr->va_offset = va; + /* Delete uppermost hole if it reaches the new top */ + if (!LIST_IS_EMPTY(&mgr->va_holes)) { + hole = container_of(mgr->va_holes.next, hole, list); + if ((hole->offset + hole->size) == va) { + mgr->va_offset = hole->offset; + list_del(&hole->list); + free(hole); + } + } + } else { + struct amdgpu_bo_va_hole *next; + + hole = container_of(&mgr->va_holes, hole, list); + LIST_FOR_EACH_ENTRY(next, &mgr->va_holes, list) { + if (next->offset < va) + break; + hole = next; + } + + if (&hole->list != &mgr->va_holes) { + /* Grow upper hole if it's adjacent */ + if (hole->offset == (va + size)) { + hole->offset = va; + hole->size += size; + /* Merge lower hole if it's adjacent */ + if (next != hole + && &next->list != &mgr->va_holes + && (next->offset + next->size) == va) { + next->size += hole->size; + list_del(&hole->list); + free(hole); + } + goto out; + } + } + + /* Grow lower hole if it's adjacent */ + if (next != hole && &next->list != &mgr->va_holes && + (next->offset + next->size) == va) { + next->size += size; + goto out; + } + + /* FIXME on allocation failure we just lose virtual address space + * maybe print a warning + */ + next = calloc(1, sizeof(struct amdgpu_bo_va_hole)); + if (next) { + next->size = size; + next->offset = va; + list_add(&next->list, &hole->list); + } + } +out: + pthread_mutex_unlock(&mgr->bo_va_mutex); +} diff --git a/amdgpu/libdrm_amdgpu.pc.in b/amdgpu/libdrm_amdgpu.pc.in new file mode 100644 index 0000000..417865e --- /dev/null +++ b/amdgpu/libdrm_amdgpu.pc.in @@ -0,0 +1,10 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libdrm_amdgpu +Description: Userspace interface to kernel DRM services for amdgpu +Version: @PACKAGE_VERSION@ +Libs: -L${libdir} -ldrm_amdgpu +Cflags: -I${includedir} -I${includedir}/libdrm diff --git a/amdgpu/util_hash.c b/amdgpu/util_hash.c new file mode 100644 index 0000000..b1e12c4 --- /dev/null +++ b/amdgpu/util_hash.c @@ -0,0 +1,382 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin <zackr@xxxxxxxxxx> + */ + +#include "util_hash.h" + +#include <stdlib.h> +#include <assert.h> + +#define MAX(a, b) ((a > b) ? (a) : (b)) + +static const int MinNumBits = 4; + +static const unsigned char prime_deltas[] = { + 0, 0, 1, 3, 1, 5, 3, 3, 1, 9, 7, 5, 3, 9, 25, 3, + 1, 21, 3, 21, 7, 15, 9, 5, 3, 29, 15, 0, 0, 0, 0, 0 +}; + +static int primeForNumBits(int numBits) +{ + return (1 << numBits) + prime_deltas[numBits]; +} + +/* Returns the smallest integer n such that + primeForNumBits(n) >= hint. +*/ +static int countBits(int hint) +{ + int numBits = 0; + int bits = hint; + + while (bits > 1) { + bits >>= 1; + numBits++; + } + + if (numBits >= (int)sizeof(prime_deltas)) { + numBits = sizeof(prime_deltas) - 1; + } else if (primeForNumBits(numBits) < hint) { + ++numBits; + } + return numBits; +} + +struct util_node { + struct util_node *next; + unsigned key; + void *value; +}; + +struct util_hash_data { + struct util_node *fakeNext; + struct util_node **buckets; + int size; + int nodeSize; + short userNumBits; + short numBits; + int numBuckets; +}; + +struct util_hash { + union { + struct util_hash_data *d; + struct util_node *e; + } data; +}; + +static void *util_data_allocate_node(struct util_hash_data *hash) +{ + return malloc(hash->nodeSize); +} + +static void util_free_node(struct util_node *node) +{ + free(node); +} + +static struct util_node * +util_hash_create_node(struct util_hash *hash, + unsigned akey, void *avalue, + struct util_node **anextNode) +{ + struct util_node *node = util_data_allocate_node(hash->data.d); + + if (!node) + return NULL; + + node->key = akey; + node->value = avalue; + + node->next = (struct util_node*)(*anextNode); + *anextNode = node; + ++hash->data.d->size; + return node; +} + +static void util_data_rehash(struct util_hash_data *hash, int hint) +{ + if (hint < 0) { + hint = countBits(-hint); + if (hint < MinNumBits) + hint = MinNumBits; + hash->userNumBits = (short)hint; + while (primeForNumBits(hint) < (hash->size >> 1)) + ++hint; + } else if (hint < MinNumBits) { + hint = MinNumBits; + } + + if (hash->numBits != hint) { + struct util_node *e = (struct util_node *)(hash); + struct util_node **oldBuckets = hash->buckets; + int oldNumBuckets = hash->numBuckets; + int i = 0; + + hash->numBits = (short)hint; + hash->numBuckets = primeForNumBits(hint); + hash->buckets = malloc(sizeof(struct util_node*) * hash->numBuckets); + for (i = 0; i < hash->numBuckets; ++i) + hash->buckets[i] = e; + + for (i = 0; i < oldNumBuckets; ++i) { + struct util_node *firstNode = oldBuckets[i]; + while (firstNode != e) { + unsigned h = firstNode->key; + struct util_node *lastNode = firstNode; + struct util_node *afterLastNode; + struct util_node **beforeFirstNode; + + while (lastNode->next != e && lastNode->next->key == h) + lastNode = lastNode->next; + + afterLastNode = lastNode->next; + beforeFirstNode = &hash->buckets[h % hash->numBuckets]; + while (*beforeFirstNode != e) + beforeFirstNode = &(*beforeFirstNode)->next; + lastNode->next = *beforeFirstNode; + *beforeFirstNode = firstNode; + firstNode = afterLastNode; + } + } + free(oldBuckets); + } +} + +static void util_data_might_grow(struct util_hash_data *hash) +{ + if (hash->size >= hash->numBuckets) + util_data_rehash(hash, hash->numBits + 1); +} + +static void util_data_has_shrunk(struct util_hash_data *hash) +{ + if (hash->size <= (hash->numBuckets >> 3) && + hash->numBits > hash->userNumBits) { + int max = MAX(hash->numBits-2, hash->userNumBits); + util_data_rehash(hash, max); + } +} + +static struct util_node *util_data_first_node(struct util_hash_data *hash) +{ + struct util_node *e = (struct util_node *)(hash); + struct util_node **bucket = hash->buckets; + int n = hash->numBuckets; + while (n--) { + if (*bucket != e) + return *bucket; + ++bucket; + } + return e; +} + +static struct util_node **util_hash_find_node(struct util_hash *hash, unsigned akey) +{ + struct util_node **node; + + if (hash->data.d->numBuckets) { + node = (struct util_node **)(&hash->data.d->buckets[akey % hash->data.d->numBuckets]); + assert(*node == hash->data.e || (*node)->next); + while (*node != hash->data.e && (*node)->key != akey) + node = &(*node)->next; + } else { + node = (struct util_node **)((const struct util_node * const *)(&hash->data.e)); + } + return node; +} + +struct util_hash_iter util_hash_insert(struct util_hash *hash, + unsigned key, void *data) +{ + util_data_might_grow(hash->data.d); + + { + struct util_node **nextNode = util_hash_find_node(hash, key); + struct util_node *node = util_hash_create_node(hash, key, data, nextNode); + if (!node) { + struct util_hash_iter null_iter = {hash, 0}; + return null_iter; + } + + { + struct util_hash_iter iter = {hash, node}; + return iter; + } + } +} + +struct util_hash * util_hash_create(void) +{ + struct util_hash *hash = malloc(sizeof(struct util_hash)); + if (!hash) + return NULL; + + hash->data.d = malloc(sizeof(struct util_hash_data)); + if (!hash->data.d) { + free(hash); + return NULL; + } + + hash->data.d->fakeNext = 0; + hash->data.d->buckets = 0; + hash->data.d->size = 0; + hash->data.d->nodeSize = sizeof(struct util_node); + hash->data.d->userNumBits = (short)MinNumBits; + hash->data.d->numBits = 0; + hash->data.d->numBuckets = 0; + + return hash; +} + +void util_hash_delete(struct util_hash *hash) +{ + struct util_node *e_for_x = (struct util_node *)(hash->data.d); + struct util_node **bucket = (struct util_node **)(hash->data.d->buckets); + int n = hash->data.d->numBuckets; + while (n--) { + struct util_node *cur = *bucket++; + while (cur != e_for_x) { + struct util_node *next = cur->next; + util_free_node(cur); + cur = next; + } + } + free(hash->data.d->buckets); + free(hash->data.d); + free(hash); +} + +struct util_hash_iter util_hash_find(struct util_hash *hash, + unsigned key) +{ + struct util_node **nextNode = util_hash_find_node(hash, key); + struct util_hash_iter iter = {hash, *nextNode}; + return iter; +} + +unsigned util_hash_iter_key(struct util_hash_iter iter) +{ + if (!iter.node || iter.hash->data.e == iter.node) + return 0; + return iter.node->key; +} + +void * util_hash_iter_data(struct util_hash_iter iter) +{ + if (!iter.node || iter.hash->data.e == iter.node) + return 0; + return iter.node->value; +} + +static struct util_node *util_hash_data_next(struct util_node *node) +{ + union { + struct util_node *next; + struct util_node *e; + struct util_hash_data *d; + } a; + int start; + struct util_node **bucket; + int n; + + a.next = node->next; + if (!a.next) { + /* iterating beyond the last element */ + return 0; + } + if (a.next->next) + return a.next; + + start = (node->key % a.d->numBuckets) + 1; + bucket = a.d->buckets + start; + n = a.d->numBuckets - start; + while (n--) { + if (*bucket != a.e) + return *bucket; + ++bucket; + } + return a.e; +} + +struct util_hash_iter util_hash_iter_next(struct util_hash_iter iter) +{ + struct util_hash_iter next = {iter.hash, util_hash_data_next(iter.node)}; + return next; +} + +int util_hash_iter_is_null(struct util_hash_iter iter) +{ + if (!iter.node || iter.node == iter.hash->data.e) + return 1; + return 0; +} + +void * util_hash_take(struct util_hash *hash, + unsigned akey) +{ + struct util_node **node = util_hash_find_node(hash, akey); + if (*node != hash->data.e) { + void *t = (*node)->value; + struct util_node *next = (*node)->next; + util_free_node(*node); + *node = next; + --hash->data.d->size; + util_data_has_shrunk(hash->data.d); + return t; + } + return 0; +} + +struct util_hash_iter util_hash_first_node(struct util_hash *hash) +{ + struct util_hash_iter iter = {hash, util_data_first_node(hash->data.d)}; + return iter; +} + +struct util_hash_iter util_hash_erase(struct util_hash *hash, struct util_hash_iter iter) +{ + struct util_hash_iter ret = iter; + struct util_node *node = iter.node; + struct util_node **node_ptr; + + if (node == hash->data.e) + return iter; + + ret = util_hash_iter_next(ret); + node_ptr = (struct util_node**)(&hash->data.d->buckets[node->key % hash->data.d->numBuckets]); + while (*node_ptr != node) + node_ptr = &(*node_ptr)->next; + *node_ptr = node->next; + util_free_node(node); + --hash->data.d->size; + return ret; +} diff --git a/amdgpu/util_hash.h b/amdgpu/util_hash.h new file mode 100644 index 0000000..8e0f9a2 --- /dev/null +++ b/amdgpu/util_hash.h @@ -0,0 +1,99 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Hash implementation. + * + * This file provides a hash implementation that is capable of dealing + * with collisions. It stores colliding entries in linked list. All + * functions operating on the hash return an iterator. The iterator + * itself points to the collision list. If there wasn't any collision + * the list will have just one entry, otherwise client code should + * iterate over the entries to find the exact entry among ones that + * had the same key (e.g. memcmp could be used on the data to check + * that) + * + * @author Zack Rusin <zackr@xxxxxxxxxx> + */ + +#ifndef UTIL_HASH_H +#define UTIL_HASH_H + +#include <stdbool.h> + +struct util_hash; +struct util_node; + +struct util_hash_iter { + struct util_hash *hash; + struct util_node *node; +}; + + +struct util_hash *util_hash_create(void); +void util_hash_delete(struct util_hash *hash); + + +/** + * Adds a data with the given key to the hash. If entry with the given + * key is already in the hash, this current entry is instered before it + * in the collision list. + * Function returns iterator pointing to the inserted item in the hash. + */ +struct util_hash_iter util_hash_insert(struct util_hash *hash, unsigned key, + void *data); + +/** + * Removes the item pointed to by the current iterator from the hash. + * Note that the data itself is not erased and if it was a malloc'ed pointer + * it will have to be freed after calling this function by the callee. + * Function returns iterator pointing to the item after the removed one in + * the hash. + */ +struct util_hash_iter util_hash_erase(struct util_hash *hash, + struct util_hash_iter iter); + +void *util_hash_take(struct util_hash *hash, unsigned key); + + +struct util_hash_iter util_hash_first_node(struct util_hash *hash); + +/** + * Return an iterator pointing to the first entry in the collision list. + */ +struct util_hash_iter util_hash_find(struct util_hash *hash, unsigned key); + + +int util_hash_iter_is_null(struct util_hash_iter iter); +unsigned util_hash_iter_key(struct util_hash_iter iter); +void *util_hash_iter_data(struct util_hash_iter iter); + + +struct util_hash_iter util_hash_iter_next(struct util_hash_iter iter); + +#endif diff --git a/amdgpu/util_hash_table.c b/amdgpu/util_hash_table.c new file mode 100644 index 0000000..cb7213c --- /dev/null +++ b/amdgpu/util_hash_table.c @@ -0,0 +1,257 @@ +/************************************************************************** + * + * Copyright 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * General purpose hash table implementation. + * + * Just uses the util_hash for now, but it might be better switch to a linear + * probing hash table implementation at some point -- as it is said they have + * better lookup and cache performance and it appears to be possible to write + * a lock-free implementation of such hash tables . + * + * @author José Fonseca <jfonseca@xxxxxxxxxx> + */ + + + +#include "util_hash_table.h" +#include "util_hash.h" + +#include <stdlib.h> +#include <assert.h> + +struct util_hash_table +{ + struct util_hash *head; + + /** Hash function */ + unsigned (*make_hash)(void *key); + + /** Compare two keys */ + int (*compare)(void *key1, void *key2); +}; + +struct util_hash_table_item +{ + void *key; + void *value; +}; + + +static struct util_hash_table_item * +util_hash_table_item(struct util_hash_iter iter) +{ + return (struct util_hash_table_item *)util_hash_iter_data(iter); +} + +struct util_hash_table *util_hash_table_create(unsigned (*hash)(void *key), + int (*compare)(void *key1, void *key2)) +{ + struct util_hash_table *ht; + + ht = malloc(sizeof(struct util_hash_table)); + if(!ht) + return NULL; + + ht->head = util_hash_create(); + if(!ht->head) { + free(ht); + return NULL; + } + + ht->make_hash = hash; + ht->compare = compare; + + return ht; +} + +static struct util_hash_iter +util_hash_table_find_iter(struct util_hash_table *ht, + void *key, unsigned key_hash) +{ + struct util_hash_iter iter; + struct util_hash_table_item *item; + + iter = util_hash_find(ht->head, key_hash); + while (!util_hash_iter_is_null(iter)) { + item = (struct util_hash_table_item *)util_hash_iter_data(iter); + if (!ht->compare(item->key, key)) + break; + iter = util_hash_iter_next(iter); + } + + return iter; +} + +static struct util_hash_table_item * +util_hash_table_find_item(struct util_hash_table *ht, + void *key, unsigned key_hash) +{ + struct util_hash_iter iter; + struct util_hash_table_item *item; + + iter = util_hash_find(ht->head, key_hash); + while (!util_hash_iter_is_null(iter)) { + item = (struct util_hash_table_item *)util_hash_iter_data(iter); + if (!ht->compare(item->key, key)) + return item; + iter = util_hash_iter_next(iter); + } + + return NULL; +} + +void util_hash_table_set(struct util_hash_table *ht, void *key, void *value) +{ + unsigned key_hash; + struct util_hash_table_item *item; + struct util_hash_iter iter; + + assert(ht); + if (!ht) + return; + + key_hash = ht->make_hash(key); + + item = util_hash_table_find_item(ht, key, key_hash); + if(item) { + /* TODO: key/value destruction? */ + item->value = value; + return; + } + + item = malloc(sizeof(struct util_hash_table_item)); + if(!item) + return; + + item->key = key; + item->value = value; + + iter = util_hash_insert(ht->head, key_hash, item); + if(util_hash_iter_is_null(iter)) { + free(item); + return; + } +} + +void *util_hash_table_get(struct util_hash_table *ht, void *key) +{ + unsigned key_hash; + struct util_hash_table_item *item; + + assert(ht); + if (!ht) + return NULL; + + key_hash = ht->make_hash(key); + + item = util_hash_table_find_item(ht, key, key_hash); + if(!item) + return NULL; + + return item->value; +} + +void util_hash_table_remove(struct util_hash_table *ht, void *key) +{ + unsigned key_hash; + struct util_hash_iter iter; + struct util_hash_table_item *item; + + assert(ht); + if (!ht) + return; + + key_hash = ht->make_hash(key); + + iter = util_hash_table_find_iter(ht, key, key_hash); + if(util_hash_iter_is_null(iter)) + return; + + item = util_hash_table_item(iter); + assert(item); + free(item); + + util_hash_erase(ht->head, iter); +} + +void util_hash_table_clear(struct util_hash_table *ht) +{ + struct util_hash_iter iter; + struct util_hash_table_item *item; + + assert(ht); + if (!ht) + return; + + iter = util_hash_first_node(ht->head); + while (!util_hash_iter_is_null(iter)) { + item = (struct util_hash_table_item *)util_hash_take(ht->head, util_hash_iter_key(iter)); + free(item); + iter = util_hash_first_node(ht->head); + } +} + +void util_hash_table_foreach(struct util_hash_table *ht, + void (*callback)(void *key, void *value, void *data), + void *data) +{ + struct util_hash_iter iter; + struct util_hash_table_item *item; + + assert(ht); + if (!ht) + return; + + iter = util_hash_first_node(ht->head); + while (!util_hash_iter_is_null(iter)) { + item = (struct util_hash_table_item *)util_hash_iter_data(iter); + callback(item->key, item->value, data); + iter = util_hash_iter_next(iter); + } +} + +void util_hash_table_destroy(struct util_hash_table *ht) +{ + struct util_hash_iter iter; + struct util_hash_table_item *item; + + assert(ht); + if (!ht) + return; + + iter = util_hash_first_node(ht->head); + while (!util_hash_iter_is_null(iter)) { + item = (struct util_hash_table_item *)util_hash_iter_data(iter); + free(item); + iter = util_hash_iter_next(iter); + } + + util_hash_delete(ht->head); + free(ht); +} diff --git a/amdgpu/util_hash_table.h b/amdgpu/util_hash_table.h new file mode 100644 index 0000000..04fe704 --- /dev/null +++ b/amdgpu/util_hash_table.h @@ -0,0 +1,65 @@ +/************************************************************************** + * + * Copyright 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * General purpose hash table. + * + * @author José Fonseca <jfonseca@xxxxxxxxxx> + */ + +#ifndef U_HASH_TABLE_H_ +#define U_HASH_TABLE_H_ + +/** + * Generic purpose hash table. + */ +struct util_hash_table; + +/** + * Create an hash table. + * + * @param hash hash function + * @param compare should return 0 for two equal keys. + */ +struct util_hash_table *util_hash_table_create(unsigned (*hash)(void *key), + int (*compare)(void *key1, void *key2)); + +void util_hash_table_set(struct util_hash_table *ht, void *key, void *value); + +void *util_hash_table_get(struct util_hash_table *ht, void *key); + +void util_hash_table_remove(struct util_hash_table *ht, void *key); + +void util_hash_table_clear(struct util_hash_table *ht); + +void util_hash_table_foreach(struct util_hash_table *ht, + void (*callback)(void *key, void *value, void *data), + void *data); + +void util_hash_table_destroy(struct util_hash_table *ht); + +#endif /* U_HASH_TABLE_H_ */ diff --git a/configure.ac b/configure.ac index 155d577..a8997c6 100644 --- a/configure.ac +++ b/configure.ac @@ -74,6 +74,11 @@ AC_ARG_ENABLE(radeon, [Enable support for radeon's KMS API (default: auto)]), [RADEON=$enableval], [RADEON=auto]) +AC_ARG_ENABLE(amdgpu, + AS_HELP_STRING([--disable-amdgpu], + [Enable support for amdgpu's KMS API (default: auto)]), + [AMDGPU=$enableval], [AMDGPU=auto]) + AC_ARG_ENABLE(nouveau, AS_HELP_STRING([--disable-nouveau], [Enable support for nouveau's KMS API (default: auto)]), @@ -236,6 +241,9 @@ if test "x$drm_cv_atomic_primitives" = "xnone"; then LIBDRM_ATOMICS_NOT_FOUND_MSG($RADEON, radeon, Radeon, radeon) RADEON=no + LIBDRM_ATOMICS_NOT_FOUND_MSG($AMDGPU, amdgpu, AMD, amdgpu) + AMDGPU=no + LIBDRM_ATOMICS_NOT_FOUND_MSG($NOUVEAU, nouveau, NVIDIA, nouveau) NOUVEAU=no @@ -257,6 +265,9 @@ else if test "x$RADEON" = xauto; then RADEON=yes fi + if test "x$AMDGPU" = xauto; then + AMDGPU=yes + fi if test "x$NOUVEAU" = xauto; then NOUVEAU=yes fi @@ -336,6 +347,11 @@ if test "x$RADEON" = xyes; then AC_DEFINE(HAVE_RADEON, 1, [Have radeon support]) fi +AM_CONDITIONAL(HAVE_AMDGPU, [test "x$AMDGPU" = xyes]) +if test "x$AMDGPU" = xyes; then + AC_DEFINE(HAVE_AMDGPU, 1, [Have amdgpu support]) +fi + AM_CONDITIONAL(HAVE_TEGRA, [test "x$TEGRA" = xyes]) if test "x$TEGRA" = xyes; then AC_DEFINE(HAVE_TEGRA, 1, [Have Tegra support]) @@ -432,6 +448,8 @@ AC_CONFIG_FILES([ intel/libdrm_intel.pc radeon/Makefile radeon/libdrm_radeon.pc + amdgpu/Makefile + amdgpu/libdrm_amdgpu.pc nouveau/Makefile nouveau/libdrm_nouveau.pc omap/Makefile @@ -463,6 +481,7 @@ echo " libkms $LIBKMS" echo " Intel API $INTEL" echo " vmwgfx API $VMWGFX" echo " Radeon API $RADEON" +echo " AMDGPU API $AMDGPU" echo " Nouveau API $NOUVEAU" echo " OMAP API $OMAP" echo " EXYNOS API $EXYNOS" diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h new file mode 100644 index 0000000..477cfd8 --- /dev/null +++ b/include/drm/amdgpu_drm.h @@ -0,0 +1,580 @@ +/* amdgpu_drm.h -- Public header for the amdgpu driver -*- linux-c -*- + * + * Copyright 2000 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Fremont, California. + * Copyright 2002 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Kevin E. Martin <martin@xxxxxxxxxxx> + * Gareth Hughes <gareth@xxxxxxxxxxx> + * Keith Whitwell <keith@xxxxxxxxxxxxxxxxxxxx> + */ + +#ifndef __AMDGPU_DRM_H__ +#define __AMDGPU_DRM_H__ + +#include <drm.h> + +#define DRM_AMDGPU_GEM_CREATE 0x00 +#define DRM_AMDGPU_GEM_MMAP 0x01 +#define DRM_AMDGPU_CTX 0x02 +#define DRM_AMDGPU_BO_LIST 0x03 +#define DRM_AMDGPU_CS 0x04 +#define DRM_AMDGPU_INFO 0x05 +#define DRM_AMDGPU_GEM_METADATA 0x06 +#define DRM_AMDGPU_GEM_WAIT_IDLE 0x07 +#define DRM_AMDGPU_GEM_VA 0x08 +#define DRM_AMDGPU_WAIT_CS 0x09 +#define DRM_AMDGPU_GEM_OP 0x10 +#define DRM_AMDGPU_GEM_USERPTR 0x11 + +#define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) +#define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) +#define DRM_IOCTL_AMDGPU_CTX DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CTX, union drm_amdgpu_ctx) +#define DRM_IOCTL_AMDGPU_BO_LIST DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_BO_LIST, union drm_amdgpu_bo_list) +#define DRM_IOCTL_AMDGPU_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CS, union drm_amdgpu_cs) +#define DRM_IOCTL_AMDGPU_INFO DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_INFO, struct drm_amdgpu_info) +#define DRM_IOCTL_AMDGPU_GEM_METADATA DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_METADATA, struct drm_amdgpu_gem_metadata) +#define DRM_IOCTL_AMDGPU_GEM_WAIT_IDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_WAIT_IDLE, union drm_amdgpu_gem_wait_idle) +#define DRM_IOCTL_AMDGPU_GEM_VA DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_VA, union drm_amdgpu_gem_va) +#define DRM_IOCTL_AMDGPU_WAIT_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_CS, union drm_amdgpu_wait_cs) +#define DRM_IOCTL_AMDGPU_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op) +#define DRM_IOCTL_AMDGPU_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr) + +#define AMDGPU_GEM_DOMAIN_CPU 0x1 +#define AMDGPU_GEM_DOMAIN_GTT 0x2 +#define AMDGPU_GEM_DOMAIN_VRAM 0x4 +#define AMDGPU_GEM_DOMAIN_GDS 0x8 +#define AMDGPU_GEM_DOMAIN_GWS 0x10 +#define AMDGPU_GEM_DOMAIN_OA 0x20 + +#define AMDGPU_GEM_DOMAIN_MASK 0x3F + +/* Flag that CPU access will be required for the case of VRAM domain */ +#define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0) +/* Flag that CPU access will not work, this VRAM domain is invisible */ +#define AMDGPU_GEM_CREATE_NO_CPU_ACCESS (1 << 1) +/* Flag that un-cached attributes should be used for GTT */ +#define AMDGPU_GEM_CREATE_CPU_GTT_UC (1 << 2) +/* Flag that USWC attributes should be used for GTT */ +#define AMDGPU_GEM_CREATE_CPU_GTT_WC (1 << 3) + +/* Flag mask for GTT domain_flags */ +#define AMDGPU_GEM_CREATE_CPU_GTT_MASK \ + (AMDGPU_GEM_CREATE_CPU_GTT_WC | \ + AMDGPU_GEM_CREATE_CPU_GTT_UC | \ + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | \ + AMDGPU_GEM_CREATE_NO_CPU_ACCESS) + +struct drm_amdgpu_gem_create_in { + /** the requested memory size */ + uint64_t bo_size; + /** physical start_addr alignment in bytes for some HW requirements */ + uint64_t alignment; + /** the requested memory domains */ + uint64_t domains; + /** allocation flags */ + uint64_t domain_flags; +}; + +struct drm_amdgpu_gem_create_out { + /** returned GEM object handle */ + uint32_t handle; +}; + +union drm_amdgpu_gem_create { + struct drm_amdgpu_gem_create_in in; + struct drm_amdgpu_gem_create_out out; +}; + +/** Opcode to create new residency list. */ +#define AMDGPU_BO_LIST_OP_CREATE 0 +/** Opcode to destroy previously created residency list */ +#define AMDGPU_BO_LIST_OP_DESTROY 1 +/** Opcode to update resource information in the list */ +#define AMDGPU_BO_LIST_OP_UPDATE 2 + +struct drm_amdgpu_bo_list_in { + /** Type of operation */ + uint32_t operation; + /** Handle of list or 0 if we want to create one */ + uint32_t list_handle; + /** Number of BOs in list */ + uint32_t bo_number; + /** Size of each element describing BO */ + uint32_t bo_info_size; + /** Pointer to array describing BOs */ + uint64_t bo_info_ptr; +}; + +struct drm_amdgpu_bo_list_entry { + /** Handle of BO */ + uint32_t bo_handle; + /** New (if specified) BO priority to be used during migration */ + uint32_t bo_priority; +}; + +struct drm_amdgpu_bo_list_out { + /** Handle of resource list */ + uint32_t list_handle; +}; + +union drm_amdgpu_bo_list { + struct drm_amdgpu_bo_list_in in; + struct drm_amdgpu_bo_list_out out; +}; + +/* context related */ +#define AMDGPU_CTX_OP_ALLOC_CTX 1 +#define AMDGPU_CTX_OP_FREE_CTX 2 +#define AMDGPU_CTX_OP_QUERY_STATE 3 + +#define AMDGPU_CTX_OP_STATE_RUNNING 1 + +struct drm_amdgpu_ctx_in { + uint32_t op; + uint32_t flags; + uint32_t ctx_id; + uint32_t pad; +}; + +union drm_amdgpu_ctx_out { + struct { + uint32_t ctx_id; + } alloc; + + struct { + uint64_t flags; + uint64_t hangs; + } state; +}; + +union drm_amdgpu_ctx { + struct drm_amdgpu_ctx_in in; + union drm_amdgpu_ctx_out out; +}; + +/* + * This is not a reliable API and you should expect it to fail for any + * number of reasons and have fallback path that do not use userptr to + * perform any operation. + */ +#define AMDGPU_GEM_USERPTR_READONLY (1 << 0) +#define AMDGPU_GEM_USERPTR_ANONONLY (1 << 1) +#define AMDGPU_GEM_USERPTR_VALIDATE (1 << 2) +#define AMDGPU_GEM_USERPTR_REGISTER (1 << 3) + +struct drm_amdgpu_gem_userptr { + uint64_t addr; + uint64_t size; + uint32_t flags; + uint32_t handle; +}; + +#define AMDGPU_TILING_MACRO 0x1 +#define AMDGPU_TILING_MICRO 0x2 +#define AMDGPU_TILING_SWAP_16BIT 0x4 +#define AMDGPU_TILING_R600_NO_SCANOUT AMDGPU_TILING_SWAP_16BIT +#define AMDGPU_TILING_SWAP_32BIT 0x8 +/* this object requires a surface when mapped - i.e. front buffer */ +#define AMDGPU_TILING_SURFACE 0x10 +#define AMDGPU_TILING_MICRO_SQUARE 0x20 +#define AMDGPU_TILING_EG_BANKW_SHIFT 8 +#define AMDGPU_TILING_EG_BANKW_MASK 0xf +#define AMDGPU_TILING_EG_BANKH_SHIFT 12 +#define AMDGPU_TILING_EG_BANKH_MASK 0xf +#define AMDGPU_TILING_EG_MACRO_TILE_ASPECT_SHIFT 16 +#define AMDGPU_TILING_EG_MACRO_TILE_ASPECT_MASK 0xf +#define AMDGPU_TILING_EG_TILE_SPLIT_SHIFT 24 +#define AMDGPU_TILING_EG_TILE_SPLIT_MASK 0xf +#define AMDGPU_TILING_EG_STENCIL_TILE_SPLIT_SHIFT 28 +#define AMDGPU_TILING_EG_STENCIL_TILE_SPLIT_MASK 0xf + +#define AMDGPU_GEM_METADATA_OP_SET_METADATA 1 +#define AMDGPU_GEM_METADATA_OP_GET_METADATA 2 + +/** The same structure is shared for input/output */ +struct drm_amdgpu_gem_metadata { + uint32_t handle; /* GEM Object handle */ + uint32_t op; /** Do we want get or set metadata */ + struct { + uint64_t flags; + uint64_t tiling_info; /* family specific tiling info */ + uint32_t data_size_bytes; + uint32_t data[64]; + } data; +}; + +struct drm_amdgpu_gem_mmap_in { + uint32_t handle; /** the GEM object handle */ +}; + +struct drm_amdgpu_gem_mmap_out { + uint64_t addr_ptr; /** mmap offset from the vma offset manager */ +}; + +union drm_amdgpu_gem_mmap { + struct drm_amdgpu_gem_mmap_in in; + struct drm_amdgpu_gem_mmap_out out; +}; + +struct drm_amdgpu_gem_wait_idle_in { + uint32_t handle; /* GEM object handle */ + uint32_t flags; + uint64_t timeout; /* Timeout to wait. If 0 then returned immediately with the status */ +}; + +struct drm_amdgpu_gem_wait_idle_out { + uint32_t status; /* BO status: 0 - BO is idle, 1 - BO is busy */ + uint32_t domain; /* Returned current memory domain */ +}; + +union drm_amdgpu_gem_wait_idle { + struct drm_amdgpu_gem_wait_idle_in in; + struct drm_amdgpu_gem_wait_idle_out out; +}; + +struct drm_amdgpu_wait_cs_in { + uint64_t handle; + uint64_t timeout; + uint32_t ip_type; + uint32_t ip_instance; + uint32_t ring; +}; + +struct drm_amdgpu_wait_cs_out { + uint64_t status; +}; + +union drm_amdgpu_wait_cs { + struct drm_amdgpu_wait_cs_in in; + struct drm_amdgpu_wait_cs_out out; +}; + +/* Sets or returns a value associated with a buffer. */ +struct drm_amdgpu_gem_op { + uint32_t handle; /* buffer */ + uint32_t op; /* AMDGPU_GEM_OP_* */ + uint64_t value; /* input or return value */ +}; + +#define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO 0 +#define AMDGPU_GEM_OP_SET_INITIAL_DOMAIN 1 + +#define AMDGPU_VA_OP_MAP 1 +#define AMDGPU_VA_OP_UNMAP 2 + +#define AMDGPU_VA_RESULT_OK 0 +#define AMDGPU_VA_RESULT_ERROR 1 +#define AMDGPU_VA_RESULT_VA_INVALID_ALIGNMENT 2 + +/* Mapping flags */ +/* readable mapping */ +#define AMDGPU_VM_PAGE_READABLE (1 << 1) +/* writable mapping */ +#define AMDGPU_VM_PAGE_WRITEABLE (1 << 2) +/* executable mapping, new for VI */ +#define AMDGPU_VM_PAGE_EXECUTABLE (1 << 3) + +struct drm_amdgpu_gem_va_in { + /* GEM object handle */ + uint32_t handle; + uint32_t pad; + /* map or unmap*/ + uint32_t operation; + /* specify mapping flags */ + uint32_t flags; + /* va address to assign . Must be correctly aligned.*/ + uint64_t va_address; + /* Specify offset inside of BO to assign. Must be correctly aligned.*/ + uint64_t offset_in_bo; + /* Specify mapping size. If 0 and offset is 0 then map the whole BO.*/ + /* Must be correctly aligned. */ + uint64_t map_size; +}; + +struct drm_amdgpu_gem_va_out { + uint32_t result; +}; + +union drm_amdgpu_gem_va { + struct drm_amdgpu_gem_va_in in; + struct drm_amdgpu_gem_va_out out; +}; + +#define AMDGPU_HW_IP_GFX 0 +#define AMDGPU_HW_IP_COMPUTE 1 +#define AMDGPU_HW_IP_DMA 2 +#define AMDGPU_HW_IP_UVD 3 +#define AMDGPU_HW_IP_VCE 4 +#define AMDGPU_HW_IP_NUM 5 + +#define AMDGPU_HW_IP_INSTANCE_MAX_COUNT 1 + +#define AMDGPU_CHUNK_ID_IB 0x01 +#define AMDGPU_CHUNK_ID_FENCE 0x02 +struct drm_amdgpu_cs_chunk { + uint32_t chunk_id; + uint32_t length_dw; + uint64_t chunk_data; +}; + +struct drm_amdgpu_cs_in { + /** Rendering context id */ + uint32_t ctx_id; + /** Handle of resource list associated with CS */ + uint32_t bo_list_handle; + uint32_t num_chunks; + uint32_t pad; + /* this points to uint64_t * which point to cs chunks */ + uint64_t chunks; +}; + +struct drm_amdgpu_cs_out { + uint64_t handle; +}; + +union drm_amdgpu_cs { + struct drm_amdgpu_cs_in in; + struct drm_amdgpu_cs_out out; +}; + +/* Specify flags to be used for IB */ + +/* This IB should be submitted to CE */ +#define AMDGPU_IB_FLAG_CE (1<<0) + +/* GDS is used by this IB */ +#define AMDGPU_IB_FLAG_GDS (1<<1) + +struct drm_amdgpu_cs_chunk_ib { + /** + * Handle of GEM object to be used as IB or 0 if it is already in + * residency list. + */ + uint32_t handle; + uint32_t flags; /* IB Flags */ + uint64_t va_start; /* Virtual address to begin IB execution */ + uint32_t ib_bytes; /* Size of submission */ + uint32_t ip_type; /* HW IP to submit to */ + uint32_t ip_instance; /* HW IP index of the same type to submit to */ + uint32_t ring; /* Ring index to submit to */ +}; + +struct drm_amdgpu_cs_chunk_fence { + uint32_t handle; + uint32_t offset; +}; + +struct drm_amdgpu_cs_chunk_data { + union { + struct drm_amdgpu_cs_chunk_ib ib_data; + struct drm_amdgpu_cs_chunk_fence fence_data; + }; +}; + +/** + * Query h/w info: Flag that this is integrated (a.h.a. fusion) GPU + * + */ +#define AMDGPU_IDS_FLAGS_FUSION 0x1 + +/* indicate if acceleration can be working */ +#define AMDGPU_INFO_ACCEL_WORKING 0x00 +/* get the crtc_id from the mode object id? */ +#define AMDGPU_INFO_CRTC_FROM_ID 0x01 +/* query hw IP info */ +#define AMDGPU_INFO_HW_IP_INFO 0x02 +/* query hw IP instance count for the specified type */ +#define AMDGPU_INFO_HW_IP_COUNT 0x03 +/* timestamp for GL_ARB_timer_query */ +#define AMDGPU_INFO_TIMESTAMP 0x05 +/* Query the firmware version */ +#define AMDGPU_INFO_FW_VERSION 0x0e + /* Subquery id: Query VCE firmware version */ + #define AMDGPU_INFO_FW_VCE 0x1 + /* Subquery id: Query UVD firmware version */ + #define AMDGPU_INFO_FW_UVD 0x2 + /* Subquery id: Query GMC firmware version */ + #define AMDGPU_INFO_FW_GMC 0x03 + /* Subquery id: Query GFX ME firmware version */ + #define AMDGPU_INFO_FW_GFX_ME 0x04 + /* Subquery id: Query GFX PFP firmware version */ + #define AMDGPU_INFO_FW_GFX_PFP 0x05 + /* Subquery id: Query GFX CE firmware version */ + #define AMDGPU_INFO_FW_GFX_CE 0x06 + /* Subquery id: Query GFX RLC firmware version */ + #define AMDGPU_INFO_FW_GFX_RLC 0x07 + /* Subquery id: Query GFX MEC firmware version */ + #define AMDGPU_INFO_FW_GFX_MEC 0x08 + /* Subquery id: Query SMC firmware version */ + #define AMDGPU_INFO_FW_SMC 0x0a + /* Subquery id: Query SDMA firmware version */ + #define AMDGPU_INFO_FW_SDMA 0x0b +/* number of bytes moved for TTM migration */ +#define AMDGPU_INFO_NUM_BYTES_MOVED 0x0f +/* the used VRAM size */ +#define AMDGPU_INFO_VRAM_USAGE 0x10 +/* the used GTT size */ +#define AMDGPU_INFO_GTT_USAGE 0x11 +/* Information about GDS, etc. resource configuration */ +#define AMDGPU_INFO_GDS_CONFIG 0x13 +/* Query information about VRAM and GTT domains */ +#define AMDGPU_INFO_VRAM_GTT 0x14 +/* Query information about register in MMR address space*/ +#define AMDGPU_INFO_READ_MMR_REG 0x15 +/* Query information about device: rev id, family, etc. */ +#define AMDGPU_INFO_DEV_INFO 0x16 +/* visible vram usage */ +#define AMDGPU_INFO_VIS_VRAM_USAGE 0x17 + +#define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0 +#define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff +#define AMDGPU_INFO_MMR_SH_INDEX_SHIFT 8 +#define AMDGPU_INFO_MMR_SH_INDEX_MASK 0xff + +/* Input structure for the INFO ioctl */ +struct drm_amdgpu_info { + /* Where the return value will be stored */ + uint64_t return_pointer; + /* The size of the return value. Just like "size" in "snprintf", + * it limits how many bytes the kernel can write. */ + uint32_t return_size; + /* The query request id. */ + uint32_t query; + + union { + struct { + uint32_t id; + } mode_crtc; + + struct { + /** AMDGPU_HW_IP_* */ + uint32_t type; + /** + * Index of the IP if there are more IPs of the same type. + * Ignored by AMDGPU_INFO_HW_IP_COUNT. + */ + uint32_t ip_instance; + } query_hw_ip; + + struct { + uint32_t dword_offset; + uint32_t count; /* number of registers to read */ + uint32_t instance; + uint32_t flags; + } read_mmr_reg; + + struct { + /** AMDGPU_INFO_FW_* */ + uint32_t fw_type; + /** Index of the IP if there are more IPs of the same type. */ + uint32_t ip_instance; + /** + * Index of the engine. Whether this is used depends + * on the firmware type. (e.g. MEC, SDMA) + */ + uint32_t index; + } query_fw; + }; +}; + +struct drm_amdgpu_info_gds { + /** GDS GFX partition size */ + uint32_t gds_gfx_partition_size; + /** GDS compute partition size */ + uint32_t compute_partition_size; + /** total GDS memory size */ + uint32_t gds_total_size; + /** GWS size per GFX partition */ + uint32_t gws_per_gfx_partition; + /** GSW size per compute partition */ + uint32_t gws_per_compute_partition; + /** OA size per GFX partition */ + uint32_t oa_per_gfx_partition; + /** OA size per compute partition */ + uint32_t oa_per_compute_partition; +}; + +struct drm_amdgpu_info_vram_gtt { + uint64_t vram_size; + uint64_t vram_cpu_accessible_size; + uint64_t gtt_size; +}; + +struct drm_amdgpu_info_firmware { + uint32_t ver; + uint32_t feature; +}; + +struct drm_amdgpu_info_device { + /** PCI Device ID */ + uint32_t device_id; + /** Internal chip revision: A0, A1, etc.) */ + uint32_t chip_rev; + uint32_t external_rev; + /** Revision id in PCI Config space */ + uint32_t pci_rev; + uint32_t family; + uint32_t num_shader_engines; + uint32_t num_shader_arrays_per_engine; + uint32_t gpu_counter_freq; /* in KHz */ + uint64_t max_engine_clock; /* in KHz */ + /* cu information */ + uint32_t cu_active_number; + uint32_t cu_ao_mask; + uint32_t cu_bitmap[4][4]; + /** Render backend pipe mask. One render backend is CB+DB. */ + uint32_t enabled_rb_pipes_mask; + uint32_t num_rb_pipes; + uint32_t num_hw_gfx_contexts; + uint32_t _pad; + uint64_t ids_flags; + /** Starting virtual address for UMDs. */ + uint64_t virtual_address_offset; + /** Required alignment of virtual addresses. */ + uint32_t virtual_address_alignment; + /** Page table entry - fragment size */ + uint32_t pte_fragment_size; + uint32_t gart_page_size; +}; + +struct drm_amdgpu_info_hw_ip { + /** Version of h/w IP */ + uint32_t hw_ip_version_major; + uint32_t hw_ip_version_minor; + /** Capabilities */ + uint64_t capabilities_flags; + /** Bitmask of available rings. Bit 0 means ring 0, etc. */ + uint32_t available_rings; +}; + +/* + * Supported GPU families + */ +#define AMDGPU_FAMILY_UNKNOWN 0 +#define AMDGPU_FAMILY_CI 120 /* Bonaire, Hawaii */ +#define AMDGPU_FAMILY_KV 125 /* Kaveri, Kabini, Mullins */ +#define AMDGPU_FAMILY_VI 130 /* Iceland, Tonga */ +#define AMDGPU_FAMILY_CZ 135 /* Carrizo */ + +#endif -- 1.8.3.1 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel