[+Marek, Alex for comment, see below] On 2017-08-13 04:56 AM, Oded Gabbay wrote: > On Sat, Aug 12, 2017 at 7:47 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote: >> From: Moses Reuben <moses.reuben at amd.com> >> >> Signed-off-by: Moses Reuben <moses.reuben at amd.com> >> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 34 +++++++++++++++++++++- >> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 35 ++++++++++++++++++++++- >> drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 4 +++ >> 3 files changed, 71 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c >> index 994d262..11d515a 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c >> @@ -135,6 +135,10 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, >> static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); >> >> static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); >> +static int alloc_memory_of_scratch(struct kgd_dev *kgd, >> + uint64_t va, uint32_t vmid); >> +static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable, >> + uint8_t element_size, uint8_t index_stride, uint8_t mtype); >> >> static const struct kfd2kgd_calls kfd2kgd = { >> .init_gtt_mem_allocation = alloc_gtt_mem, >> @@ -159,7 +163,9 @@ static const struct kfd2kgd_calls kfd2kgd = { >> .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, >> .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, >> .write_vmid_invalidate_request = write_vmid_invalidate_request, >> - .get_fw_version = get_fw_version >> + .get_fw_version = get_fw_version, >> + .alloc_memory_of_scratch = alloc_memory_of_scratch, >> + .write_config_static_mem = write_config_static_mem >> }; >> >> struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) >> @@ -652,6 +658,32 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) >> WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); >> } >> >> +static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable, >> + uint8_t element_size, uint8_t index_stride, uint8_t mtype) >> +{ >> + uint32_t reg; >> + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; >> + >> + reg = swizzle_enable << SH_STATIC_MEM_CONFIG__SWIZZLE_ENABLE__SHIFT | >> + element_size << SH_STATIC_MEM_CONFIG__ELEMENT_SIZE__SHIFT | >> + index_stride << SH_STATIC_MEM_CONFIG__INDEX_STRIDE__SHIFT | >> + mtype << SH_STATIC_MEM_CONFIG__PRIVATE_MTYPE__SHIFT; >> + >> + WREG32(mmSH_STATIC_MEM_CONFIG, reg); > Don't you need to select and lock srbm before you write to this register ? No, this register seems to be global. SRBM_GFX_CNTL had no effect on it when I experimented with it. Since this is global initialization, I'm wondering if this should be moved into part of the amdgpu initialization. Having amdkfd call back into amdgpu like this during its initialization always seemed like a bit roundabout. Marek, do you know if this register affects Mesa performance or behaviour with respect to scratch memory. According to the register spec it only affects flat scratch memory addressing. Not sure if you're using that addressing scheme in Mesa. Alex, I see that SH_STATIC_MEM_CONFIG gets initialized in gfx_v[78]_0_gpu_init identically to what we do in KFD. You're doing it per-VMID (under cik_srbm_select). But my experiments show that this register is global. So I think your initialization already works for KFD and we can probably just drop this from the KFD initialization. Do you have access to hardware docs that confirm that it's global? Regards, Felix > >> + return 0; >> +} >> +static int alloc_memory_of_scratch(struct kgd_dev *kgd, >> + uint64_t va, uint32_t vmid) >> +{ >> + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; >> + >> + lock_srbm(kgd, 0, 0, 0, vmid); >> + WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va); >> + unlock_srbm(kgd); >> + >> + return 0; >> +} >> + >> static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) >> { >> struct amdgpu_device *adev = (struct amdgpu_device *) kgd; >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c >> index 29a6f5d..ef677e5 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c >> @@ -94,6 +94,10 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, >> uint8_t vmid); >> static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); >> static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); >> +static int alloc_memory_of_scratch(struct kgd_dev *kgd, >> + uint64_t va, uint32_t vmid); >> +static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable, >> + uint8_t element_size, uint8_t index_stride, uint8_t mtype); >> >> static const struct kfd2kgd_calls kfd2kgd = { >> .init_gtt_mem_allocation = alloc_gtt_mem, >> @@ -120,12 +124,15 @@ static const struct kfd2kgd_calls kfd2kgd = { >> .get_atc_vmid_pasid_mapping_valid = >> get_atc_vmid_pasid_mapping_valid, >> .write_vmid_invalidate_request = write_vmid_invalidate_request, >> - .get_fw_version = get_fw_version >> + .get_fw_version = get_fw_version, >> + .alloc_memory_of_scratch = alloc_memory_of_scratch, >> + .write_config_static_mem = write_config_static_mem >> }; >> >> struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) >> { >> return (struct kfd2kgd_calls *)&kfd2kgd; >> + return (struct kfd2kgd_calls *)&kfd2kgd; >> } >> >> static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) >> @@ -573,6 +580,32 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, >> return 0; >> } >> >> +static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable, >> + uint8_t element_size, uint8_t index_stride, uint8_t mtype) >> +{ >> + uint32_t reg; >> + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; >> + >> + reg = swizzle_enable << SH_STATIC_MEM_CONFIG__SWIZZLE_ENABLE__SHIFT | >> + element_size << SH_STATIC_MEM_CONFIG__ELEMENT_SIZE__SHIFT | >> + index_stride << SH_STATIC_MEM_CONFIG__INDEX_STRIDE__SHIFT | >> + mtype << SH_STATIC_MEM_CONFIG__PRIVATE_MTYPE__SHIFT; >> + >> + WREG32(mmSH_STATIC_MEM_CONFIG, reg); > Same comment as above. > >> + return 0; >> +} >> +static int alloc_memory_of_scratch(struct kgd_dev *kgd, >> + uint64_t va, uint32_t vmid) >> +{ >> + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; >> + >> + lock_srbm(kgd, 0, 0, 0, vmid); >> + WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va); >> + unlock_srbm(kgd); >> + >> + return 0; >> +} >> + >> static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) >> { >> struct amdgpu_device *adev = (struct amdgpu_device *) kgd; >> diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h >> index ffafda0..1dd90b2 100644 >> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h >> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h >> @@ -199,6 +199,10 @@ struct kfd2kgd_calls { >> >> uint16_t (*get_fw_version)(struct kgd_dev *kgd, >> enum kgd_engine_type type); >> + int (*alloc_memory_of_scratch)(struct kgd_dev *kgd, >> + uint64_t va, uint32_t vmid); >> + int (*write_config_static_mem)(struct kgd_dev *kgd, bool swizzle_enable, >> + uint8_t element_size, uint8_t index_stride, uint8_t mtype); >> }; >> >> /** >> -- >> 2.7.4 >>