For comments only. There are some assertion failures. Marek On Tue, Jul 18, 2017 at 1:47 PM, Marek Olšák <maraeo at gmail.com> wrote: > From: Marek Olšák <marek.olsak at amd.com> > > for lower overhead in the CS ioctl > --- > src/gallium/drivers/radeon/r600_buffer_common.c | 7 +++++++ > src/gallium/drivers/radeon/radeon_winsys.h | 1 + > src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 6 ++++++ > 3 files changed, 14 insertions(+) > > diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c > index dd1c209..2747ac4 100644 > --- a/src/gallium/drivers/radeon/r600_buffer_common.c > +++ b/src/gallium/drivers/radeon/r600_buffer_common.c > @@ -160,20 +160,27 @@ void r600_init_resource_fields(struct r600_common_screen *rscreen, > } > > /* Tiled textures are unmappable. Always put them in VRAM. */ > if ((res->b.b.target != PIPE_BUFFER && !rtex->surface.is_linear) || > res->flags & R600_RESOURCE_FLAG_UNMAPPABLE) { > res->domains = RADEON_DOMAIN_VRAM; > res->flags |= RADEON_FLAG_NO_CPU_ACCESS | > RADEON_FLAG_GTT_WC; > } > > + /* Only displayable single-sample textures can be shared between > + * processes. */ > + if (res->b.b.target == PIPE_BUFFER || > + res->b.b.nr_samples >= 2 || > + rtex->surface.micro_tile_mode != RADEON_MICRO_MODE_DISPLAY) > + res->flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING; > + > /* If VRAM is just stolen system memory, allow both VRAM and > * GTT, whichever has free space. If a buffer is evicted from > * VRAM to GTT, it will stay there. > * > * DRM 3.6.0 has good BO move throttling, so we can allow VRAM-only > * placements even with a low amount of stolen VRAM. > */ > if (!rscreen->info.has_dedicated_vram && > (rscreen->info.drm_major < 3 || rscreen->info.drm_minor < 6) && > res->domains == RADEON_DOMAIN_VRAM) { > diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h > index 351edcd..ce2fd73 100644 > --- a/src/gallium/drivers/radeon/radeon_winsys.h > +++ b/src/gallium/drivers/radeon/radeon_winsys.h > @@ -47,20 +47,21 @@ enum radeon_bo_domain { /* bitfield */ > RADEON_DOMAIN_GTT = 2, > RADEON_DOMAIN_VRAM = 4, > RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT > }; > > enum radeon_bo_flag { /* bitfield */ > RADEON_FLAG_GTT_WC = (1 << 0), > RADEON_FLAG_NO_CPU_ACCESS = (1 << 1), > RADEON_FLAG_NO_SUBALLOC = (1 << 2), > RADEON_FLAG_SPARSE = (1 << 3), > + RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4), > }; > > enum radeon_bo_usage { /* bitfield */ > RADEON_USAGE_READ = 2, > RADEON_USAGE_WRITE = 4, > RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE, > > /* The winsys ensures that the CS submission will be scheduled after > * previously flushed CSs referencing this BO in a conflicting way. > */ > diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c > index 97bbe23..f97e1bf 100644 > --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c > +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c > @@ -31,20 +31,24 @@ > > #include "amdgpu_cs.h" > > #include "os/os_time.h" > #include "state_tracker/drm_driver.h" > #include <amdgpu_drm.h> > #include <xf86drm.h> > #include <stdio.h> > #include <inttypes.h> > > +#ifndef AMDGPU_GEM_CREATE_NO_INTERPROCESS_SHARING > +#define AMDGPU_GEM_CREATE_NO_INTERPROCESS_SHARING (1 << 6) > +#endif > + > /* Set to 1 for verbose output showing committed sparse buffer ranges. */ > #define DEBUG_SPARSE_COMMITS 0 > > struct amdgpu_sparse_backing_chunk { > uint32_t begin, end; > }; > > static struct pb_buffer * > amdgpu_bo_create(struct radeon_winsys *rws, > uint64_t size, > @@ -395,20 +399,22 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, > > if (initial_domain & RADEON_DOMAIN_VRAM) > request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM; > if (initial_domain & RADEON_DOMAIN_GTT) > request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; > > if (flags & RADEON_FLAG_NO_CPU_ACCESS) > request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; > if (flags & RADEON_FLAG_GTT_WC) > request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC; > + if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING) > + request.flags |= AMDGPU_GEM_CREATE_NO_INTERPROCESS_SHARING; > > r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle); > if (r) { > fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n"); > fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size); > fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment); > fprintf(stderr, "amdgpu: domains : %u\n", initial_domain); > goto error_bo_alloc; > } > > -- > 2.7.4 >