This reverts commit 0e1c378fce66db833e08770d888dda1c5ec7936a. --- src/lib/CMakeLists.txt | 1 + src/lib/close_asic.c | 2 +- src/lib/discover.c | 3 - src/lib/free_maps.c | 44 ++++++++++ src/lib/read_vram.c | 218 ++++++++++++++++++++++++++++++++++++++++++++----- src/umr.h | 14 +++- 6 files changed, 254 insertions(+), 28 deletions(-) create mode 100644 src/lib/free_maps.c diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index a21fdf8eea2d..78d827ac1bf1 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -14,6 +14,7 @@ add_library(umrcore STATIC discover.c dump_ib.c find_reg.c + free_maps.c mmio.c query_drm.c read_sensor.c diff --git a/src/lib/close_asic.c b/src/lib/close_asic.c index a140409e617b..d532a11fa671 100644 --- a/src/lib/close_asic.c +++ b/src/lib/close_asic.c @@ -29,6 +29,7 @@ void umr_free_asic(struct umr_asic *asic) { int x; + umr_free_maps(asic); if (asic->pci.mem != NULL) { // free PCI mapping pci_device_unmap_range(asic->pci.pdevice, asic->pci.mem, asic->pci.pdevice->regions[asic->pci.region].size); @@ -56,7 +57,6 @@ void umr_close_asic(struct umr_asic *asic) cond_close(asic->fd.vram); cond_close(asic->fd.gpr); cond_close(asic->fd.drm); - cond_close(asic->fd.iova); umr_free_asic(asic); } } diff --git a/src/lib/discover.c b/src/lib/discover.c index ff7950e4e6ba..dcc212fc39e4 100644 --- a/src/lib/discover.c +++ b/src/lib/discover.c @@ -222,8 +222,6 @@ struct umr_asic *umr_discover_asic(struct umr_options *options) asic->fd.vram = open(fname, O_RDWR); snprintf(fname, sizeof(fname)-1, "/sys/kernel/debug/dri/%d/amdgpu_gpr", asic->instance); asic->fd.gpr = open(fname, O_RDWR); - snprintf(fname, sizeof(fname)-1, "/sys/kernel/debug/dri/%d/amdgpu_iova", asic->instance); - asic->fd.iova = open(fname, O_RDWR); asic->fd.drm = -1; // default to closed // if appending to the fd list remember to update close_asic() and discover_by_did()... } else { @@ -237,7 +235,6 @@ struct umr_asic *umr_discover_asic(struct umr_options *options) asic->fd.vram = -1; asic->fd.gpr = -1; asic->fd.drm = -1; - asic->fd.iova = -1; } if (options->use_pci) { diff --git a/src/lib/free_maps.c b/src/lib/free_maps.c new file mode 100644 index 000000000000..e1d27cb177f7 --- /dev/null +++ b/src/lib/free_maps.c @@ -0,0 +1,44 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Tom St Denis <tom.stdenis at amd.com> + * + */ +#include "umr.h" + +static void recurse_free(struct umr_map *map) +{ + if (map->left) + recurse_free(map->left); + if (map->right) + recurse_free(map->right); + free(map); +} + +void umr_free_maps(struct umr_asic *asic) +{ + if (!asic->maps) + return; + + recurse_free(asic->maps->maps); + free(asic->maps); + asic->maps = NULL; +} diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c index 3a327fb8e681..6e8f1f931895 100644 --- a/src/lib/read_vram.c +++ b/src/lib/read_vram.c @@ -25,6 +25,163 @@ #include "umrapp.h" #include <inttypes.h> +// find a mapping or create node for it +static struct umr_map *find_map(struct umr_dma_maps *maps, uint64_t dma_addr, int create) +{ + struct umr_map *n = maps->maps, **nn; + uint64_t key; + + // addresses aren't terribly random + // so if we use an identity function on the search + // key we'll end up with a really unbalanced tree + // so mix up address a bit to randomize keys + key = dma_addr ^ (dma_addr >> 11); + + if (!n) { + maps->maps = calloc(1, sizeof(maps->maps[0])); + maps->maps->key = key; + return maps->maps; + } + + while (n->dma_addr != dma_addr) { + if (key > n->key) + nn = &n->left; + else + nn = &n->right; + + if (*nn) { + n = *nn; + } else { + if (!create) return NULL; + + // add the new node + *nn = calloc(1, sizeof(maps->maps[0])); + (*nn)->key = key; + return *nn; + } + } + + return n; +} + +// insert/replace mapping in array +static int insert_map(struct umr_dma_maps *maps, + uint64_t dma_addr, uint64_t phys_addr, int valid) +{ + struct umr_map *map = find_map(maps, dma_addr, valid); + + // don't add a new node if we're marking it invalid + if (map) { + if (valid) { + map->dma_addr = dma_addr; + map->phys_addr = phys_addr; + map->valid = valid; + } else { + struct umr_map *tmap = NULL; + + // if marking invalid see if we can prune + // the tree a little if the node we're marking + // as invalid only has one child + if (map->left == NULL && map->right) { + tmap = map->right; + *map = *(map->right); + } else if (map->right == NULL && map->left) { + tmap = map->left; + *map = *(map->left); + } + if (tmap) + free(tmap); + } + } + + return 0; +} + +static int check_trace = 0; + +// try to convert a DMA address to physical via trace +static uint64_t dma_to_phys(struct umr_asic *asic, uint64_t dma_addr) +{ + struct umr_map *map = find_map(asic->maps, dma_addr, 0); + + if (map == NULL) + return dma_addr; + + if (map->valid) + return map->phys_addr; + else + return map->dma_addr; +} + +static int parse_trace(struct umr_asic *asic) +{ + FILE *f; + uint64_t d, p; + char *s, buf[512]; + int err = -1, valid; + struct umr_dma_maps *maps = asic->maps; + + if (!check_trace) { + check_trace = 1; + f = fopen("/sys/kernel/debug/tracing/events/ttm/ttm_dma_map/enable", "r"); + if (!f) { + fprintf(stderr, "[WARNING]: kernel does not support TTM mapping trace, please update kernel\n"); + } else { + fgets(buf, sizeof(buf)-1, f); + if (sscanf(buf, "%"SCNu64, &d) == 1) { + if (d != 1) { + fprintf(stderr, + "[WARNING]: ttm_dma_map trace is not enabled, VM decoding may fail!\n" + "[WARNING]: Enable with: 'echo 1 > /sys/kernel/debug/tracing/events/ttm/ttm_dma_map/enable'\n" + "[WARNING]: Enable with: 'echo 1 > /sys/kernel/debug/tracing/events/ttm/ttm_dma_unmap/enable'\n"); + } + } else { + fprintf(stderr, "[ERROR]: could not read ttm_dma_map enable file\n"); + } + fclose(f); + } + } + + // try to open ~/trace first + snprintf(buf, sizeof(buf)-1, "%s/trace", getenv("HOME")); + f = fopen(buf, "r"); + if (!f) + f = fopen("/sys/kernel/debug/tracing/trace", "r"); + if (!f) + goto error; + + while (fgets(buf, sizeof(buf)-1, f)) { + valid = -1; + + s = strstr(buf, "ttm_dma_map"); + if (s) { + s += strlen("ttm_dma_map"); + valid = 1; + } else { + s = strstr(buf, "ttm_dma_unmap"); + if (s) { + s += strlen("ttm_dma_unmap"); + valid = 0; + } + } + + if (valid != -1) { + s = strstr(s, asic->options.pci.name); + if (s) { + s += strlen(asic->options.pci.name) + 2; + if (sscanf(s, "0x%"SCNx64" => 0x%"SCNx64, &d, &p) == 2) { + if (insert_map(maps, d, p, valid)) + goto error; + } + } + } + } + err = 0; +error: + fclose(f); + return err; +} + static void access_vram_via_mmio(struct umr_asic *asic, uint64_t address, uint32_t size, void *dst, int write_en) { uint32_t MM_INDEX, MM_INDEX_HI, MM_DATA; @@ -60,27 +217,33 @@ static void access_vram_via_mmio(struct umr_asic *asic, uint64_t address, uint32 #define DEBUG(...) #endif -static int umr_access_sram(struct umr_asic *asic, uint64_t address, uint32_t size, void *dst, int write_en) +static int umr_access_sram(uint64_t address, uint32_t size, void *dst, int write_en) { - DEBUG("Reading physical sys addr: 0x%llx\n", (unsigned long long)address); + int fd; - if (asic->fd.iova < 0) { - fprintf(stderr, "[ERROR]: amdgpu_iova not open, please update your kernel\n"); - return -1; - } + DEBUG("Reading physical sys addr: 0x%llx\n", (unsigned long long)address); - lseek(asic->fd.iova, address, SEEK_SET); - if (write_en == 0) { - memset(dst, 0xFF, size); - if (read(asic->fd.iova, dst, size) != size) { - return -1; - } - } else { - if (write(asic->fd.iova, dst, size) != size) { - return -1; + fd = open("/dev/fmem", O_RDWR); + if (fd < 0) + fd = open("/dev/mem", O_RDWR | O_DSYNC); + if (fd >= 0) { + lseek(fd, address, SEEK_SET); + if (write_en == 0) { + memset(dst, 0xFF, size); + if (read(fd, dst, size) != size) { + close(fd); + return -1; + } + } else { + if (write(fd, dst, size) != size) { + close(fd); + return -1; + } } + close(fd); + return 0; } - return 0; + return -1; } @@ -226,7 +389,7 @@ static int umr_access_vram_vi(struct umr_asic *asic, uint32_t vmid, return -1; // compute starting address - start_addr = pte_fields.page_base_addr + (address & 0xFFF); + start_addr = dma_to_phys(asic, pte_fields.page_base_addr) + (address & 0xFFF); } else { // depth == 0 == PTE only pte_idx = (address >> 12); @@ -251,7 +414,7 @@ static int umr_access_vram_vi(struct umr_asic *asic, uint32_t vmid, return -1; // compute starting address - start_addr = pte_fields.page_base_addr + (address & 0xFFF); + start_addr = dma_to_phys(asic, pte_fields.page_base_addr) + (address & 0xFFF); } // read upto 4K from it @@ -265,7 +428,7 @@ static int umr_access_vram_vi(struct umr_asic *asic, uint32_t vmid, // allow destination to be NULL to simply use decoder if (pdst) { if (pte_fields.system) { - if (umr_access_sram(asic, start_addr, chunk_size, pdst, write_en) < 0) { + if (umr_access_sram(start_addr, chunk_size, pdst, write_en) < 0) { fprintf(stderr, "[ERROR]: Cannot access system ram, perhaps CONFIG_STRICT_DEVMEM is set in your kernel config?\n"); fprintf(stderr, "[ERROR]: Alternatively download and install /dev/fmem\n"); return -1; @@ -544,7 +707,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, return -1; // compute starting address - start_addr = pte_fields.page_base_addr + (address & 0xFFF); + start_addr = dma_to_phys(asic, pte_fields.page_base_addr) + (address & 0xFFF); DEBUG("phys address to read from: %llx\n\n\n", (unsigned long long)start_addr); } else { // in AI+ the BASE_ADDR is treated like a PDE entry... @@ -593,7 +756,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, return -1; // compute starting address - start_addr = pte_fields.page_base_addr + (address & 0xFFF); + start_addr = dma_to_phys(asic, pte_fields.page_base_addr) + (address & 0xFFF); } // read upto 4K from it @@ -609,7 +772,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, // allow destination to be NULL to simply use decoder if (pdst) { if (pte_fields.system) { - if (umr_access_sram(asic, start_addr, chunk_size, pdst, write_en) < 0) { + if (umr_access_sram(start_addr, chunk_size, pdst, write_en) < 0) { fprintf(stderr, "[ERROR]: Cannot access system ram, perhaps CONFIG_STRICT_DEVMEM is set in your kernel config?\n"); fprintf(stderr, "[ERROR]: Alternatively download and install /dev/fmem\n"); return -1; @@ -662,6 +825,17 @@ int umr_access_vram(struct umr_asic *asic, uint32_t vmid, uint64_t address, uint return 0; } + if (!asic->maps) { + asic->maps = calloc(1, sizeof(asic->maps[0])); + if (!asic->maps) { + fprintf(stderr, "[ERROR]: Out of memory building dma maps\n"); + return -1; + } + + if (parse_trace(asic)) + return -1; + } + switch (asic->family) { case FAMILY_SI: case FAMILY_CIK: diff --git a/src/umr.h b/src/umr.h index a374a0d23301..3d2252e35608 100644 --- a/src/umr.h +++ b/src/umr.h @@ -210,6 +210,17 @@ struct umr_options { } pci; }; +struct umr_dma_maps { + struct umr_map { + uint64_t + dma_addr, + phys_addr, + key; + int valid; + struct umr_map *left, *right; + } *maps; +}; + struct umr_asic { char *asicname; int no_blocks; @@ -232,8 +243,7 @@ struct umr_asic { drm, wave, vram, - gpr, - iova; + gpr; } fd; struct { struct pci_device *pdevice; -- 2.12.0