[PATCH 3/3] drm/amdgpu: Shrink struct amdgpu_job further

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



By moving the booleans to flags and shrinking some fields we can stop
spilling job allocation into the 1k SLAB even with two appended indirect
buffers.

End result for struct amdgpu_job:

        /* size: 448, cachelines: 7, members: 24 */
        /* forced alignments: 1 */

So appending two IB buffers, which seems to be a typical case when
running a game such as Cyberpunk 2077, makes every submission perfectly
fit into the 512 byte SLAB with no wastage.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxx>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 19 ++++++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c      |  7 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c     | 29 +++++++++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c     |  6 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h     | 36 +++++++++++----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c      | 17 +++++-----
 11 files changed, 72 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 5cc5f59e3018..181296ea9df7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -108,13 +108,15 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
 			   struct drm_amdgpu_cs_chunk_ib *chunk_ib,
 			   unsigned int *num_ibs)
 {
+	struct amdgpu_job *job;
 	int r;
 
 	r = amdgpu_cs_job_idx(p, chunk_ib);
 	if (r < 0)
 		return r;
 
-	if (num_ibs[r] >= amdgpu_ring_max_ibs(chunk_ib->ip_type))
+	if (num_ibs[r] >= amdgpu_ring_max_ibs(chunk_ib->ip_type) ||
+	    WARN_ON_ONCE(overflows_type(num_ibs[r], typeof(job->num_ibs))))
 		return -EINVAL;
 
 	++(num_ibs[r]);
@@ -296,7 +298,8 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
 				       num_ibs[i], &p->jobs[i]);
 		if (ret)
 			goto free_all_kdata;
-		p->jobs[i]->enforce_isolation = p->adev->enforce_isolation[fpriv->xcp_id];
+		if (p->adev->enforce_isolation[fpriv->xcp_id])
+			p->jobs[i]->flags |= AMDGPU_ENFORCE_ISOLATION;
 	}
 	p->gang_leader = p->jobs[p->gang_leader_idx];
 
@@ -367,7 +370,7 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
 	}
 
 	if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
-		job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
+		job->flags |= AMDGPU_PREAMBLE_IB_PRESENT;
 
 	r =  amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ?
 			   chunk_ib->ib_bytes : 0,
@@ -583,8 +586,8 @@ static int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
 		p->jobs[i]->shadow_va = shadow->shadow_va;
 		p->jobs[i]->csa_va = shadow->csa_va;
 		p->jobs[i]->gds_va = shadow->gds_va;
-		p->jobs[i]->init_shadow =
-			shadow->flags & AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW;
+		if (shadow->flags & AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW)
+			p->jobs[i]->flags |= AMDGPU_INIT_SHADOW;
 	}
 
 	return 0;
@@ -1004,7 +1007,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 
 static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *p)
 {
-	int i, j;
+	unsigned int i, j;
 
 	if (!trace_amdgpu_cs_enabled())
 		return;
@@ -1352,9 +1355,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 				   p->fence);
 	amdgpu_cs_post_dependencies(p);
 
-	if ((leader->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
+	if ((leader->flags & AMDGPU_PREAMBLE_IB_PRESENT) &&
 	    !p->ctx->preamble_presented) {
-		leader->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
+		leader->flags |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
 		p->ctx->preamble_presented = true;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 49ca8c814455..447345cb94db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1904,7 +1904,7 @@ static void amdgpu_ib_preempt_mark_partial_job(struct amdgpu_ring *ring)
 		job = to_amdgpu_job(s_job);
 		if (preempted && (&job->hw_fence) == fence)
 			/* mark the job as preempted */
-			job->preemption_status |= AMDGPU_IB_PREEMPTED;
+			job->flags |= AMDGPU_IB_PREEMPTED;
 	}
 	spin_unlock(&sched->job_list_lock);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 784b03abb3a4..f224547b6863 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -1430,7 +1430,7 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
 	if (r)
 		goto err;
 
-	job->enforce_isolation = true;
+	job->flags |= AMDGPU_ENFORCE_ISOLATION;
 
 	ib = &job->ibs[0];
 	for (i = 0; i <= ring->funcs->align_mask; ++i)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 1c19a65e6553..56058f18e0ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -658,7 +658,7 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 		goto error_alloc;
 
 	job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
-	job->vm_needs_flush = true;
+	job->flags |= AMDGPU_VM_NEEDS_FLUSH;
 	job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
 	fence = amdgpu_job_submit(job);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 2ea98ec60220..c6d11311dcd4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -153,7 +153,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
 		shadow_va = job->shadow_va;
 		csa_va = job->csa_va;
 		gds_va = job->gds_va;
-		init_shadow = job->init_shadow;
+		init_shadow = job->flags & AMDGPU_INIT_SHADOW;
 	} else {
 		vm = NULL;
 		fence_ctx = 0;
@@ -235,8 +235,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
 		status |= AMDGPU_HAVE_CTX_SWITCH;
 
 	if (job && ring->funcs->emit_cntxcntl) {
-		status |= job->preamble_status;
-		status |= job->preemption_status;
+		status |= job->flags & (AMDGPU_PREAMBLE_IB_PRESENT |
+					AMDGPU_PREAMBLE_IB_PRESENT_FIRST |
+					AMDGPU_IB_PREEMPTED);
 		amdgpu_ring_emit_cntxcntl(ring, status);
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 8e712a11aba5..b9e18a806b5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -276,8 +276,8 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
 	struct amdgpu_device *adev = ring->adev;
 	unsigned vmhub = ring->vm_hub;
 	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	uint32_t flags = vm->use_cpu_for_update ? AMDGPU_VM_NEEDS_FLUSH : 0;
 	uint64_t fence_context = adev->fence_context + ring->idx;
-	bool needs_flush = vm->use_cpu_for_update;
 	uint64_t updates = amdgpu_vm_tlb_seq(vm);
 	int r;
 
@@ -320,7 +320,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
 				return 0;
 			}
 		}
-		needs_flush = true;
+		flags = AMDGPU_VM_NEEDS_FLUSH;
 	}
 
 	/* Good we can use this VMID. Remember this submission as
@@ -330,8 +330,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
 	if (r)
 		return r;
 
-	job->vm_needs_flush = needs_flush;
-	job->spm_update_needed = true;
+	job->flags = AMDGPU_SPM_UPDATE_NEEDED | flags;
 	return 0;
 }
 
@@ -357,7 +356,8 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
 	uint64_t updates = amdgpu_vm_tlb_seq(vm);
 	int r;
 
-	job->vm_needs_flush = vm->use_cpu_for_update;
+	if (vm->use_cpu_for_update)
+		job->flags |= AMDGPU_VM_NEEDS_FLUSH;
 
 	/* Check if we can use a VMID already assigned to this VM */
 	list_for_each_entry_reverse((*id), &id_mgr->ids_lru, list) {
@@ -389,7 +389,8 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
 		if (r)
 			return r;
 
-		job->vm_needs_flush |= needs_flush;
+		if (needs_flush)
+			job->flags |= AMDGPU_VM_NEEDS_FLUSH;
 		return 0;
 	}
 
@@ -415,6 +416,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
 	struct amdgpu_vmid *idle = NULL;
 	struct amdgpu_vmid *id = NULL;
+	unsigned int vmid;
 	int r = 0;
 
 	mutex_lock(&id_mgr->lock);
@@ -441,19 +443,26 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 			if (r)
 				goto error;
 
-			job->vm_needs_flush = true;
+			job->flags |= AMDGPU_VM_NEEDS_FLUSH;
 		}
 
 		list_move_tail(&id->list, &id_mgr->ids_lru);
 	}
 
-	job->gds_switch_needed = amdgpu_vmid_gds_switch_needed(id, job);
-	if (job->vm_needs_flush) {
+	if (amdgpu_vmid_gds_switch_needed(id, job))
+		job->flags |= AMDGPU_GDS_SWITCH_NEEDED;
+	if (job->flags & AMDGPU_VM_NEEDS_FLUSH) {
 		id->flushed_updates = amdgpu_vm_tlb_seq(vm);
 		dma_fence_put(id->last_flush);
 		id->last_flush = NULL;
 	}
-	job->vmid = id - id_mgr->ids;
+
+	vmid = id - id_mgr->ids;
+	if (WARN_ON_ONCE(overflows_type(vmid, typeof(job->vmid)))) {
+		r = -ERANGE;
+		goto error;
+	}
+	job->vmid = vmid;
 	job->pasid = vm->pasid;
 
 	id->gds_base = job->gds_base;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 1899c601c95c..dca54a47dfff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -231,13 +231,17 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
 void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds,
 			      struct amdgpu_bo *gws, struct amdgpu_bo *oa)
 {
+	uint32_t size;
+
 	if (gds) {
 		job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
 		job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
 	}
 	if (gws) {
 		job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
-		job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
+		size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
+		WARN_ON_ONCE(overflows_type(size, job->gws_size));
+		job->gws_size = size;
 	}
 	if (oa) {
 		job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 7e5fe3d73a06..edb18df42816 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -28,13 +28,18 @@
 #include "amdgpu_ring.h"
 
 /* bit set means command submit involves a preamble IB */
-#define AMDGPU_PREAMBLE_IB_PRESENT          (1 << 0)
+#define AMDGPU_PREAMBLE_IB_PRESENT		(1 << 0)
 /* bit set means preamble IB is first presented in belonging context */
-#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST    (1 << 1)
+#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST	(1 << 1)
 /* bit set means context switch occured */
-#define AMDGPU_HAVE_CTX_SWITCH              (1 << 2)
+#define AMDGPU_HAVE_CTX_SWITCH			(1 << 2)
 /* bit set means IB is preempted */
-#define AMDGPU_IB_PREEMPTED                 (1 << 3)
+#define AMDGPU_IB_PREEMPTED			(1 << 3)
+#define AMDGPU_VM_NEEDS_FLUSH			(1 << 4)
+#define AMDGPU_GDS_SWITCH_NEEDED		(1 << 5)
+#define AMDGPU_SPM_UPDATE_NEEDED		(1 << 6)
+#define AMDGPU_ENFORCE_ISOLATION		(1 << 7)
+#define AMDGPU_INIT_SHADOW			(1 << 8)
 
 #define to_amdgpu_job(sched_job)		\
 		container_of((sched_job), struct amdgpu_job, base)
@@ -50,21 +55,18 @@ struct amdgpu_job {
 	struct amdgpu_sync	explicit_sync;
 	struct dma_fence	hw_fence;
 	struct dma_fence	*gang_submit;
-	bool                    vm_needs_flush;
-	bool			gds_switch_needed;
-	bool			spm_update_needed;
-	bool			enforce_isolation;
-	bool			init_shadow;
-	unsigned		vmid;
-	unsigned		pasid;
-	uint32_t		num_ibs;
-	uint32_t		preamble_status;
-	uint32_t                preemption_status;
+
+	uint32_t		pasid;
+	uint16_t		flags;
+	uint16_t		job_run_counter; /* >= 1 means a resubmit job */
+	uint8_t			vmid;
+	uint8_t			num_ibs;
+
+	uint16_t		gws_size; /* in pages so enough and prevents holes */
+	uint32_t		gws_base;
+
 	uint32_t		gds_base, gds_size;
-	uint32_t		gws_base, gws_size;
 	uint32_t		oa_base, oa_size;
-	/* job_run_counter >= 1 means a resubmit job */
-	uint32_t		job_run_counter;
 	uint64_t		vm_pd_addr;
 	uint64_t		generation;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 383fce40d4dd..913e142564bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -235,7 +235,7 @@ TRACE_EVENT(amdgpu_vm_grab_id,
 			   __entry->vmid = job->vmid;
 			   __entry->vm_hub = ring->vm_hub,
 			   __entry->pd_addr = job->vm_pd_addr;
-			   __entry->needs_flush = job->vm_needs_flush;
+			   __entry->needs_flush = job->flags & AMDGPU_VM_NEEDS_FLUSH;
 			   ),
 	    TP_printk("pasid=%d, ring=%s, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
 		      __entry->pasid, __get_str(ring), __entry->vmid,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 01ae2f88dec8..1f2d252802a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -2163,7 +2163,7 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
 		(*job)->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo ?
 							adev->gmc.pdb0_bo :
 							adev->gart.bo);
-		(*job)->vm_needs_flush = true;
+		(*job)->flags |= AMDGPU_VM_NEEDS_FLUSH;
 	}
 	if (!resv)
 		return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 5c07777d3239..04aa1c75cbc5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -726,10 +726,11 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
 	if (job->vmid == 0)
 		return false;
 
-	if (job->vm_needs_flush || ring->has_compute_vm_bug)
+	if ((job->flags & AMDGPU_VM_NEEDS_FLUSH) || ring->has_compute_vm_bug)
 		return true;
 
-	if (ring->funcs->emit_gds_switch && job->gds_switch_needed)
+	if (ring->funcs->emit_gds_switch &&
+	    (job->flags & AMDGPU_GDS_SWITCH_NEEDED))
 		return true;
 
 	if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid]))
@@ -756,11 +757,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
 	struct amdgpu_device *adev = ring->adev;
 	unsigned vmhub = ring->vm_hub;
 	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-	struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
-	bool spm_update_needed = job->spm_update_needed;
 	bool gds_switch_needed = ring->funcs->emit_gds_switch &&
-		job->gds_switch_needed;
-	bool vm_flush_needed = job->vm_needs_flush;
+				 (job->flags & AMDGPU_GDS_SWITCH_NEEDED);
+	bool spm_update_needed = job->flags & AMDGPU_SPM_UPDATE_NEEDED;
+	bool vm_flush_needed = job->flags & AMDGPU_VM_NEEDS_FLUSH;
+	struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
 	struct dma_fence *fence = NULL;
 	bool pasid_mapping_needed = false;
 	unsigned int patch;
@@ -786,7 +787,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
 		ring->funcs->emit_wreg;
 
 	if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
-	    !(job->enforce_isolation && !job->vmid))
+	    !((job->flags & AMDGPU_ENFORCE_ISOLATION) && !job->vmid))
 		return 0;
 
 	amdgpu_ring_ib_begin(ring);
@@ -799,7 +800,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
 
 	if (adev->gfx.enable_cleaner_shader &&
 	    ring->funcs->emit_cleaner_shader &&
-	    job->enforce_isolation)
+	    (job->flags & AMDGPU_ENFORCE_ISOLATION))
 		ring->funcs->emit_cleaner_shader(ring);
 
 	if (vm_flush_needed) {
-- 
2.48.0




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux