Hi, Attached is a change to the radeon reloc emitting code, that stores the reloc index in the buffer object. This avoids quadratic runtime behavior in the number of emitted buffer object relocs per command stream. The reloc index is held in an array indexed by command stream number, which means that it should be safe to use with multiple command stream objects in place. The patch itself is bigger than needed since it reindents the 'buffer object already has an index' case in cs_gem_write_reloc. This change is driven by cs_gem_write_reloc already showing up noticable in profiles of some of my use cases. Comments? By the way, it appears to me that space_accounted should be also a per command stream member of the buffer object. True? Greetings Mathias
diff --git a/radeon/radeon_bo_gem.c b/radeon/radeon_bo_gem.c index 081ccb9..6f0a1b7 100644 --- a/radeon/radeon_bo_gem.c +++ b/radeon/radeon_bo_gem.c @@ -39,7 +39,6 @@ #include <sys/mman.h> #include <errno.h> #include "xf86drm.h" -#include "xf86atomic.h" #include "drm.h" #include "radeon_drm.h" #include "radeon_bo.h" @@ -50,7 +49,6 @@ struct radeon_bo_gem { struct radeon_bo_int base; uint32_t name; int map_count; - atomic_t reloc_in_cs; void *priv_ptr; }; @@ -68,7 +66,7 @@ static struct radeon_bo *bo_open(struct radeon_bo_manager *bom, uint32_t flags) { struct radeon_bo_gem *bo; - int r; + int i, r; bo = (struct radeon_bo_gem*)calloc(1, sizeof(struct radeon_bo_gem)); if (bo == NULL) { @@ -82,7 +80,8 @@ static struct radeon_bo *bo_open(struct radeon_bo_manager *bom, bo->base.domains = domains; bo->base.flags = flags; bo->base.ptr = NULL; - atomic_set(&bo->reloc_in_cs, 0); + for (i = 0; i < sizeof(bo->base.idx_by_cs)/sizeof(bo->base.idx_by_cs[0]); ++i) + bo->base.idx_by_cs[i] = -1; bo->map_count = 0; if (handle) { struct drm_gem_open open_arg; @@ -312,12 +311,6 @@ uint32_t radeon_gem_name_bo(struct radeon_bo *bo) return bo_gem->name; } -void *radeon_gem_get_reloc_in_cs(struct radeon_bo *bo) -{ - struct radeon_bo_gem *bo_gem = (struct radeon_bo_gem*)bo; - return &bo_gem->reloc_in_cs; -} - int radeon_gem_get_kernel_name(struct radeon_bo *bo, uint32_t *name) { struct radeon_bo_int *boi = (struct radeon_bo_int *)bo; diff --git a/radeon/radeon_bo_gem.h b/radeon/radeon_bo_gem.h index 0af8610..c56c58e 100644 --- a/radeon/radeon_bo_gem.h +++ b/radeon/radeon_bo_gem.h @@ -38,7 +38,6 @@ struct radeon_bo_manager *radeon_bo_manager_gem_ctor(int fd); void radeon_bo_manager_gem_dtor(struct radeon_bo_manager *bom); uint32_t radeon_gem_name_bo(struct radeon_bo *bo); -void *radeon_gem_get_reloc_in_cs(struct radeon_bo *bo); int radeon_gem_set_domain(struct radeon_bo *bo, uint32_t read_domains, uint32_t write_domain); int radeon_gem_get_kernel_name(struct radeon_bo *bo, uint32_t *name); #endif diff --git a/radeon/radeon_bo_int.h b/radeon/radeon_bo_int.h index 9589ead..f47f790 100644 --- a/radeon/radeon_bo_int.h +++ b/radeon/radeon_bo_int.h @@ -6,6 +6,8 @@ struct radeon_bo_manager { int fd; }; +#define MAX_CS_COUNT 32 + struct radeon_bo_int { void *ptr; uint32_t flags; @@ -18,6 +20,8 @@ struct radeon_bo_int { struct radeon_bo_manager *bom; uint32_t space_accounted; uint32_t referenced_in_cs; + /* store the reloc index indexed by the cs index number */ + uint32_t idx_by_cs[MAX_CS_COUNT]; }; /* bo functions */ diff --git a/radeon/radeon_cs_gem.c b/radeon/radeon_cs_gem.c index 81bd393..00440e6 100644 --- a/radeon/radeon_cs_gem.c +++ b/radeon/radeon_cs_gem.c @@ -43,7 +43,6 @@ #include "radeon_bo_gem.h" #include "drm.h" #include "xf86drm.h" -#include "xf86atomic.h" #include "radeon_drm.h" #include "bof.h" @@ -94,6 +93,9 @@ static uint32_t get_first_zero(const uint32_t n) static uint32_t generate_id(void) { uint32_t r = 0; + + assert(MAX_CS_COUNT == 8*sizeof(r)); + pthread_mutex_lock( &id_mutex ); /* check for free ids */ if (cs_id_source != ~r) { @@ -142,6 +144,7 @@ static struct radeon_cs_int *cs_gem_create(struct radeon_cs_manager *csm, csg->base.relocs_total_size = 0; csg->base.crelocs = 0; csg->base.id = generate_id(); + csg->base.index = get_first_zero(csg->base.id) - 2; csg->nrelocs = 4096 / (4 * 4) ; csg->relocs_bo = (struct radeon_bo_int**)calloc(1, csg->nrelocs*sizeof(void*)); @@ -176,7 +179,6 @@ static int cs_gem_write_reloc(struct radeon_cs_int *cs, struct cs_gem *csg = (struct cs_gem*)cs; struct cs_reloc_gem *reloc; uint32_t idx; - unsigned i; assert(boi->space_accounted); @@ -193,46 +195,38 @@ static int cs_gem_write_reloc(struct radeon_cs_int *cs, if (write_domain == RADEON_GEM_DOMAIN_CPU) { return -EINVAL; } - /* use bit field hash function to determine - if this bo is for sure not in this cs.*/ - if ((atomic_read((atomic_t *)radeon_gem_get_reloc_in_cs(bo)) & cs->id)) { - /* check if bo is already referenced. - * Scanning from end to begin reduces cycles with mesa because - * it often relocates same shared dma bo again. */ - for(i = cs->crelocs; i != 0;) { - --i; - idx = i * RELOC_SIZE; - reloc = (struct cs_reloc_gem*)&csg->relocs[idx]; - if (reloc->handle == bo->handle) { - /* Check domains must be in read or write. As we check already - * checked that in argument one of the read or write domain was - * set we only need to check that if previous reloc as the read - * domain set then the read_domain should also be set for this - * new relocation. - */ - /* the DDX expects to read and write from same pixmap */ - if (write_domain && (reloc->read_domain & write_domain)) { - reloc->read_domain = 0; - reloc->write_domain = write_domain; - } else if (read_domain & reloc->write_domain) { - reloc->read_domain = 0; - } else { - if (write_domain != reloc->write_domain) - return -EINVAL; - if (read_domain != reloc->read_domain) - return -EINVAL; - } - - reloc->read_domain |= read_domain; - reloc->write_domain |= write_domain; - /* update flags */ - reloc->flags |= (flags & reloc->flags); - /* write relocation packet */ - radeon_cs_write_dword((struct radeon_cs *)cs, 0xc0001000); - radeon_cs_write_dword((struct radeon_cs *)cs, idx); - return 0; - } + /* Check if this one is already used in this cs */ + idx = boi->idx_by_cs[cs->index]; + if (idx != ((uint32_t)(-1))) { + reloc = (struct cs_reloc_gem*)&csg->relocs[idx]; + assert (reloc->handle == bo->handle); + /* Check domains must be in read or write. As we check already + * checked that in argument one of the read or write domain was + * set we only need to check that if previous reloc as the read + * domain set then the read_domain should also be set for this + * new relocation. + */ + /* the DDX expects to read and write from same pixmap */ + if (write_domain && (reloc->read_domain & write_domain)) { + reloc->read_domain = 0; + reloc->write_domain = write_domain; + } else if (read_domain & reloc->write_domain) { + reloc->read_domain = 0; + } else { + if (write_domain != reloc->write_domain) + return -EINVAL; + if (read_domain != reloc->read_domain) + return -EINVAL; } + + reloc->read_domain |= read_domain; + reloc->write_domain |= write_domain; + /* update flags */ + reloc->flags |= (flags & reloc->flags); + /* write relocation packet */ + radeon_cs_write_dword((struct radeon_cs *)cs, 0xc0001000); + radeon_cs_write_dword((struct radeon_cs *)cs, idx); + return 0; } /* new relocation */ if (csg->base.crelocs >= csg->nrelocs) { @@ -262,8 +256,7 @@ static int cs_gem_write_reloc(struct radeon_cs_int *cs, reloc->flags = flags; csg->chunks[1].length_dw += RELOC_SIZE; radeon_bo_ref(bo); - /* bo might be referenced from another context so have to use atomic opertions */ - atomic_add((atomic_t *)radeon_gem_get_reloc_in_cs(bo), cs->id); + boi->idx_by_cs[cs->index] = idx; cs->relocs_total_size += boi->size; radeon_cs_write_dword((struct radeon_cs *)cs, 0xc0001000); radeon_cs_write_dword((struct radeon_cs *)cs, idx); @@ -438,8 +431,7 @@ static int cs_gem_emit(struct radeon_cs_int *cs) &csg->cs, sizeof(struct drm_radeon_cs)); for (i = 0; i < csg->base.crelocs; i++) { csg->relocs_bo[i]->space_accounted = 0; - /* bo might be referenced from another context so have to use atomic opertions */ - atomic_dec((atomic_t *)radeon_gem_get_reloc_in_cs((struct radeon_bo*)csg->relocs_bo[i]), cs->id); + csg->relocs_bo[i]->idx_by_cs[cs->index] = -1; radeon_bo_unref((struct radeon_bo *)csg->relocs_bo[i]); csg->relocs_bo[i] = NULL; } @@ -470,8 +462,7 @@ static int cs_gem_erase(struct radeon_cs_int *cs) if (csg->relocs_bo) { for (i = 0; i < csg->base.crelocs; i++) { if (csg->relocs_bo[i]) { - /* bo might be referenced from another context so have to use atomic opertions */ - atomic_dec((atomic_t *)radeon_gem_get_reloc_in_cs((struct radeon_bo*)csg->relocs_bo[i]), cs->id); + csg->relocs_bo[i]->idx_by_cs[cs->index] = -1; radeon_bo_unref((struct radeon_bo *)csg->relocs_bo[i]); csg->relocs_bo[i] = NULL; } diff --git a/radeon/radeon_cs_int.h b/radeon/radeon_cs_int.h index 6cee574..4c2be36 100644 --- a/radeon/radeon_cs_int.h +++ b/radeon/radeon_cs_int.h @@ -29,6 +29,7 @@ struct radeon_cs_int { void (*space_flush_fn)(void *); void *space_flush_data; uint32_t id; + uint32_t index; }; /* cs functions */
_______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel