[RFC] Avoid quadratic behavior in relocs/cs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

Attached is a change to the radeon reloc emitting code, that stores the reloc 
index in the buffer object. This avoids quadratic runtime behavior in the 
number of emitted buffer object relocs per command stream.
The reloc index is held in an array indexed by command stream number, which 
means that it should be safe to use with multiple command stream objects in 
place.

The patch itself is bigger than needed since it reindents the 'buffer object 
already has an index' case in cs_gem_write_reloc.

This change is driven by cs_gem_write_reloc already showing up noticable in 
profiles of some of my use cases.

Comments?

By the way, it appears to me that space_accounted should be also a per command 
stream member of the buffer object. True?

Greetings

Mathias
diff --git a/radeon/radeon_bo_gem.c b/radeon/radeon_bo_gem.c
index 081ccb9..6f0a1b7 100644
--- a/radeon/radeon_bo_gem.c
+++ b/radeon/radeon_bo_gem.c
@@ -39,7 +39,6 @@
 #include <sys/mman.h>
 #include <errno.h>
 #include "xf86drm.h"
-#include "xf86atomic.h"
 #include "drm.h"
 #include "radeon_drm.h"
 #include "radeon_bo.h"
@@ -50,7 +49,6 @@ struct radeon_bo_gem {
     struct radeon_bo_int base;
     uint32_t            name;
     int                 map_count;
-    atomic_t            reloc_in_cs;
     void *priv_ptr;
 };
 
@@ -68,7 +66,7 @@ static struct radeon_bo *bo_open(struct radeon_bo_manager *bom,
                                  uint32_t flags)
 {
     struct radeon_bo_gem *bo;
-    int r;
+    int i, r;
 
     bo = (struct radeon_bo_gem*)calloc(1, sizeof(struct radeon_bo_gem));
     if (bo == NULL) {
@@ -82,7 +80,8 @@ static struct radeon_bo *bo_open(struct radeon_bo_manager *bom,
     bo->base.domains = domains;
     bo->base.flags = flags;
     bo->base.ptr = NULL;
-    atomic_set(&bo->reloc_in_cs, 0);
+    for (i = 0; i < sizeof(bo->base.idx_by_cs)/sizeof(bo->base.idx_by_cs[0]); ++i)
+        bo->base.idx_by_cs[i] = -1;
     bo->map_count = 0;
     if (handle) {
         struct drm_gem_open open_arg;
@@ -312,12 +311,6 @@ uint32_t radeon_gem_name_bo(struct radeon_bo *bo)
     return bo_gem->name;
 }
 
-void *radeon_gem_get_reloc_in_cs(struct radeon_bo *bo)
-{
-    struct radeon_bo_gem *bo_gem = (struct radeon_bo_gem*)bo;
-    return &bo_gem->reloc_in_cs;
-}
-
 int radeon_gem_get_kernel_name(struct radeon_bo *bo, uint32_t *name)
 {
     struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
diff --git a/radeon/radeon_bo_gem.h b/radeon/radeon_bo_gem.h
index 0af8610..c56c58e 100644
--- a/radeon/radeon_bo_gem.h
+++ b/radeon/radeon_bo_gem.h
@@ -38,7 +38,6 @@ struct radeon_bo_manager *radeon_bo_manager_gem_ctor(int fd);
 void radeon_bo_manager_gem_dtor(struct radeon_bo_manager *bom);
 
 uint32_t radeon_gem_name_bo(struct radeon_bo *bo);
-void *radeon_gem_get_reloc_in_cs(struct radeon_bo *bo);
 int radeon_gem_set_domain(struct radeon_bo *bo, uint32_t read_domains, uint32_t write_domain);
 int radeon_gem_get_kernel_name(struct radeon_bo *bo, uint32_t *name);
 #endif
diff --git a/radeon/radeon_bo_int.h b/radeon/radeon_bo_int.h
index 9589ead..f47f790 100644
--- a/radeon/radeon_bo_int.h
+++ b/radeon/radeon_bo_int.h
@@ -6,6 +6,8 @@ struct radeon_bo_manager {
     int                     fd;
 };
 
+#define MAX_CS_COUNT 32
+
 struct radeon_bo_int {
     void                        *ptr;
     uint32_t                    flags;
@@ -18,6 +20,8 @@ struct radeon_bo_int {
     struct radeon_bo_manager    *bom;
     uint32_t                    space_accounted;
     uint32_t                    referenced_in_cs;
+    /* store the reloc index indexed by the cs index number */
+    uint32_t                    idx_by_cs[MAX_CS_COUNT];
 };
 
 /* bo functions */
diff --git a/radeon/radeon_cs_gem.c b/radeon/radeon_cs_gem.c
index 81bd393..00440e6 100644
--- a/radeon/radeon_cs_gem.c
+++ b/radeon/radeon_cs_gem.c
@@ -43,7 +43,6 @@
 #include "radeon_bo_gem.h"
 #include "drm.h"
 #include "xf86drm.h"
-#include "xf86atomic.h"
 #include "radeon_drm.h"
 #include "bof.h"
 
@@ -94,6 +93,9 @@ static uint32_t get_first_zero(const uint32_t n)
 static uint32_t generate_id(void)
 {
     uint32_t r = 0;
+
+    assert(MAX_CS_COUNT == 8*sizeof(r));
+
     pthread_mutex_lock( &id_mutex );
     /* check for free ids */
     if (cs_id_source != ~r) {
@@ -142,6 +144,7 @@ static struct radeon_cs_int *cs_gem_create(struct radeon_cs_manager *csm,
     csg->base.relocs_total_size = 0;
     csg->base.crelocs = 0;
     csg->base.id = generate_id();
+    csg->base.index = get_first_zero(csg->base.id) - 2;
     csg->nrelocs = 4096 / (4 * 4) ;
     csg->relocs_bo = (struct radeon_bo_int**)calloc(1,
                                                 csg->nrelocs*sizeof(void*));
@@ -176,7 +179,6 @@ static int cs_gem_write_reloc(struct radeon_cs_int *cs,
     struct cs_gem *csg = (struct cs_gem*)cs;
     struct cs_reloc_gem *reloc;
     uint32_t idx;
-    unsigned i;
 
     assert(boi->space_accounted);
 
@@ -193,46 +195,38 @@ static int cs_gem_write_reloc(struct radeon_cs_int *cs,
     if (write_domain == RADEON_GEM_DOMAIN_CPU) {
         return -EINVAL;
     }
-    /* use bit field hash function to determine
-       if this bo is for sure not in this cs.*/
-    if ((atomic_read((atomic_t *)radeon_gem_get_reloc_in_cs(bo)) & cs->id)) {
-        /* check if bo is already referenced.
-         * Scanning from end to begin reduces cycles with mesa because
-         * it often relocates same shared dma bo again. */
-        for(i = cs->crelocs; i != 0;) {
-            --i;
-            idx = i * RELOC_SIZE;
-            reloc = (struct cs_reloc_gem*)&csg->relocs[idx];
-            if (reloc->handle == bo->handle) {
-                /* Check domains must be in read or write. As we check already
-                 * checked that in argument one of the read or write domain was
-                 * set we only need to check that if previous reloc as the read
-                 * domain set then the read_domain should also be set for this
-                 * new relocation.
-                 */
-                /* the DDX expects to read and write from same pixmap */
-                if (write_domain && (reloc->read_domain & write_domain)) {
-                    reloc->read_domain = 0;
-                    reloc->write_domain = write_domain;
-                } else if (read_domain & reloc->write_domain) {
-                    reloc->read_domain = 0;
-                } else {
-                    if (write_domain != reloc->write_domain)
-                        return -EINVAL;
-                    if (read_domain != reloc->read_domain)
-                        return -EINVAL;
-                }
-
-                reloc->read_domain |= read_domain;
-                reloc->write_domain |= write_domain;
-                /* update flags */
-                reloc->flags |= (flags & reloc->flags);
-                /* write relocation packet */
-                radeon_cs_write_dword((struct radeon_cs *)cs, 0xc0001000);
-                radeon_cs_write_dword((struct radeon_cs *)cs, idx);
-                return 0;
-            }
+    /* Check if this one is already used in this cs */
+    idx = boi->idx_by_cs[cs->index];
+    if (idx != ((uint32_t)(-1))) {
+        reloc = (struct cs_reloc_gem*)&csg->relocs[idx];
+        assert (reloc->handle == bo->handle);
+        /* Check domains must be in read or write. As we check already
+         * checked that in argument one of the read or write domain was
+         * set we only need to check that if previous reloc as the read
+         * domain set then the read_domain should also be set for this
+         * new relocation.
+         */
+        /* the DDX expects to read and write from same pixmap */
+        if (write_domain && (reloc->read_domain & write_domain)) {
+            reloc->read_domain = 0;
+            reloc->write_domain = write_domain;
+        } else if (read_domain & reloc->write_domain) {
+            reloc->read_domain = 0;
+        } else {
+            if (write_domain != reloc->write_domain)
+                return -EINVAL;
+            if (read_domain != reloc->read_domain)
+                return -EINVAL;
         }
+
+        reloc->read_domain |= read_domain;
+        reloc->write_domain |= write_domain;
+        /* update flags */
+        reloc->flags |= (flags & reloc->flags);
+        /* write relocation packet */
+        radeon_cs_write_dword((struct radeon_cs *)cs, 0xc0001000);
+        radeon_cs_write_dword((struct radeon_cs *)cs, idx);
+        return 0;
     }
     /* new relocation */
     if (csg->base.crelocs >= csg->nrelocs) {
@@ -262,8 +256,7 @@ static int cs_gem_write_reloc(struct radeon_cs_int *cs,
     reloc->flags = flags;
     csg->chunks[1].length_dw += RELOC_SIZE;
     radeon_bo_ref(bo);
-    /* bo might be referenced from another context so have to use atomic opertions */
-    atomic_add((atomic_t *)radeon_gem_get_reloc_in_cs(bo), cs->id);
+    boi->idx_by_cs[cs->index] = idx;
     cs->relocs_total_size += boi->size;
     radeon_cs_write_dword((struct radeon_cs *)cs, 0xc0001000);
     radeon_cs_write_dword((struct radeon_cs *)cs, idx);
@@ -438,8 +431,7 @@ static int cs_gem_emit(struct radeon_cs_int *cs)
                             &csg->cs, sizeof(struct drm_radeon_cs));
     for (i = 0; i < csg->base.crelocs; i++) {
         csg->relocs_bo[i]->space_accounted = 0;
-        /* bo might be referenced from another context so have to use atomic opertions */
-        atomic_dec((atomic_t *)radeon_gem_get_reloc_in_cs((struct radeon_bo*)csg->relocs_bo[i]), cs->id);
+        csg->relocs_bo[i]->idx_by_cs[cs->index] = -1;
         radeon_bo_unref((struct radeon_bo *)csg->relocs_bo[i]);
         csg->relocs_bo[i] = NULL;
     }
@@ -470,8 +462,7 @@ static int cs_gem_erase(struct radeon_cs_int *cs)
     if (csg->relocs_bo) {
         for (i = 0; i < csg->base.crelocs; i++) {
             if (csg->relocs_bo[i]) {
-                /* bo might be referenced from another context so have to use atomic opertions */
-                atomic_dec((atomic_t *)radeon_gem_get_reloc_in_cs((struct radeon_bo*)csg->relocs_bo[i]), cs->id);
+                csg->relocs_bo[i]->idx_by_cs[cs->index] = -1;
                 radeon_bo_unref((struct radeon_bo *)csg->relocs_bo[i]);
                 csg->relocs_bo[i] = NULL;
             }
diff --git a/radeon/radeon_cs_int.h b/radeon/radeon_cs_int.h
index 6cee574..4c2be36 100644
--- a/radeon/radeon_cs_int.h
+++ b/radeon/radeon_cs_int.h
@@ -29,6 +29,7 @@ struct radeon_cs_int {
     void                        (*space_flush_fn)(void *);
     void                        *space_flush_data;
     uint32_t                    id;
+    uint32_t                    index;
 };
 
 /* cs functions */
_______________________________________________
dri-devel mailing list
dri-devel@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/dri-devel

[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux