Re: [PATCH] drm/amdgpu: fix return random value when multiple threads read registers via mes.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 05.11.24 um 10:24 schrieb Chong Li:
The currect code use the address "adev->mes.read_val_ptr" to
store the value read from register via mes.
So when multiple threads read register,
multiple threads have to share the one address,
and overwrite the value each other.

Assign an address by "amdgpu_device_wb_get" to store register value.
each thread will has an address to store register value.

Signed-off-by: Chong Li <chongli2@xxxxxxx>

Reviewed-by: Christian König <christian.koenig@xxxxxxx>

---
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 30 +++++++++++--------------
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h |  3 ---
  2 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 83d0f731fb65..a8f7173b2663 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -189,17 +189,6 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
  			(uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs[i]];
  	}
- r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs);
-	if (r) {
-		dev_err(adev->dev,
-			"(%d) read_val_offs alloc failed\n", r);
-		goto error;
-	}
-	adev->mes.read_val_gpu_addr =
-		adev->wb.gpu_addr + (adev->mes.read_val_offs * 4);
-	adev->mes.read_val_ptr =
-		(uint32_t *)&adev->wb.wb[adev->mes.read_val_offs];
-
  	r = amdgpu_mes_doorbell_init(adev);
  	if (r)
  		goto error;
@@ -220,8 +209,6 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
  			amdgpu_device_wb_free(adev,
  				      adev->mes.query_status_fence_offs[i]);
  	}
-	if (adev->mes.read_val_ptr)
-		amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
idr_destroy(&adev->mes.pasid_idr);
  	idr_destroy(&adev->mes.gang_id_idr);
@@ -246,8 +233,6 @@ void amdgpu_mes_fini(struct amdgpu_device *adev)
  			amdgpu_device_wb_free(adev,
  				      adev->mes.query_status_fence_offs[i]);
  	}
-	if (adev->mes.read_val_ptr)
-		amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
amdgpu_mes_doorbell_free(adev); @@ -918,10 +903,19 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
  {
  	struct mes_misc_op_input op_input;
  	int r, val = 0;
+	uint32_t addr_offset = 0;
+	uint64_t read_val_gpu_addr;
+	uint32_t *read_val_ptr;
+ if (amdgpu_device_wb_get(adev, &addr_offset)) {
+		DRM_ERROR("critical bug! too many mes readers\n");
+		goto error;
+	}
+	read_val_gpu_addr = adev->wb.gpu_addr + (addr_offset * 4);
+	read_val_ptr = (uint32_t *)&adev->wb.wb[addr_offset];
  	op_input.op = MES_MISC_OP_READ_REG;
  	op_input.read_reg.reg_offset = reg;
-	op_input.read_reg.buffer_addr = adev->mes.read_val_gpu_addr;
+	op_input.read_reg.buffer_addr = read_val_gpu_addr;
if (!adev->mes.funcs->misc_op) {
  		DRM_ERROR("mes rreg is not supported!\n");
@@ -932,9 +926,11 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
  	if (r)
  		DRM_ERROR("failed to read reg (0x%x)\n", reg);
  	else
-		val = *(adev->mes.read_val_ptr);
+		val = *(read_val_ptr);
error:
+	if (addr_offset)
+		amdgpu_device_wb_free(adev, addr_offset);
  	return val;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 45e3508f0f8e..83f45bb48427 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -119,9 +119,6 @@ struct amdgpu_mes {
  	uint32_t			query_status_fence_offs[AMDGPU_MAX_MES_PIPES];
  	uint64_t			query_status_fence_gpu_addr[AMDGPU_MAX_MES_PIPES];
  	uint64_t			*query_status_fence_ptr[AMDGPU_MAX_MES_PIPES];
-	uint32_t                        read_val_offs;
-	uint64_t			read_val_gpu_addr;
-	uint32_t			*read_val_ptr;
uint32_t saved_flags;




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux