Regards, Oak -----Original Message----- From: Koenig, Christian <Christian.Koenig@xxxxxxx> Sent: Friday, August 9, 2019 8:31 AM To: Zeng, Oak <Oak.Zeng@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Kuehling, Felix <Felix.Kuehling@xxxxxxx>; Keely, Sean <Sean.Keely@xxxxxxx> Subject: Re: [PATCH 3/5] drm/amdkfd: Postpone memory mapping flags calculation to mapping time Am 09.08.19 um 04:15 schrieb Zeng, Oak: > Some mapping flags are decided by memory mapping destination which is > not know at memory object allocation time. So it is reasonable to > decide memory mapping flags at mapping time, instead of alloc time. > Record memory allocation flags during allocation time and calculate > mapping flags during memory mapping time. > > Also made the memory mapping flage calculation to be asic-specific, > because different asic can have different mapping scheme. > > The new memory mapping flag is decided by both memory allocation flags > and whether the memory mapping is to a remote device. > > This is preparation work to introduce more sophisticated mapping scheme. > > Change-Id: I98e7c47d850585ad7f0a9e11617c92b7a9aced3b > Signed-off-by: Oak Zeng <Oak.Zeng@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 16 +++++----------- > drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 4 ++++ > drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 20 +++++++++++++++++++- > drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 19 ++++++++++++++++++- > drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 20 +++++++++++++++++++- > drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 21 ++++++++++++++++++++- > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 21 ++++++++++++++++++++- > 8 files changed, 106 insertions(+), 16 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > index e519df3..026475a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > @@ -58,6 +58,7 @@ struct kgd_mem { > uint64_t va; > > uint32_t mapping_flags; > + uint32_t alloc_flags; > > atomic_t invalid; > struct amdkfd_process_info *process_info; diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > index 44a52b0..f91ef48 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > @@ -1081,7 +1081,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( > int byte_align; > u32 domain, alloc_domain; > u64 alloc_flags; > - uint32_t mapping_flags; > int ret; > > /* > @@ -1143,16 +1142,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( > adev->asic_type != CHIP_VEGAM) ? > VI_BO_SIZE_ALIGN : 1; > > - mapping_flags = AMDGPU_VM_PAGE_READABLE; > - if (flags & ALLOC_MEM_FLAGS_WRITABLE) > - mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; > - if (flags & ALLOC_MEM_FLAGS_EXECUTABLE) > - mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; > - if (flags & ALLOC_MEM_FLAGS_COHERENT) > - mapping_flags |= AMDGPU_VM_MTYPE_UC; > - else > - mapping_flags |= AMDGPU_VM_MTYPE_NC; > - (*mem)->mapping_flags = mapping_flags; > + (*mem)->alloc_flags = flags; > > amdgpu_sync_create(&(*mem)->sync); > > @@ -1298,6 +1288,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( > struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) > { > struct amdgpu_device *adev = get_amdgpu_device(kgd); > + struct amdgpu_device *bo_adev; > struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; > int ret; > struct amdgpu_bo *bo; > @@ -1315,6 +1306,9 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( > return -EINVAL; > } > > + bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); > + mem->mapping_flags = amdgpu_gmc_get_vm_mapping_flags(adev, > +mem->alloc_flags, bo_adev != adev); > + > /* Make sure restore is not running concurrently. Since we > * don't map invalid userptr BOs, we rely on the next restore > * worker to do the mapping > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > index 071145a..6bd0c28 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > @@ -105,6 +105,9 @@ struct amdgpu_gmc_funcs { > /* get the pde for a given mc addr */ > void (*get_vm_pde)(struct amdgpu_device *adev, int level, > u64 *dst, u64 *flags); > + /* get per asic vm mapping flags */ > + uint32_t (*get_vm_mapping_flags)(struct amdgpu_device *adev, > + uint32_t alloc_flags, bool remote_mapping); > }; > > struct amdgpu_xgmi { > @@ -185,6 +188,7 @@ struct amdgpu_gmc { > #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) > #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags)) > #define amdgpu_gmc_get_pte_flags(adev, flags) > (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags)) > +#define amdgpu_gmc_get_vm_mapping_flags(adev, alloc_flags, > +remote_mapping) ((adev)->gmc.gmc_funcs->get_vm_mapping_flags((adev), > +(alloc_flags), (remote_mapping))) > > /** > * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible > through the BAR diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > index 4e3ac10..7e420e0 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > @@ -415,12 +415,30 @@ static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level, > } > } > > +static uint32_t gmc_v10_0_get_vm_mapping_flags(struct amdgpu_device *adev, > + uint32_t alloc_flags, bool remote_mapping) { > + uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE; > + > + if (alloc_flags & ALLOC_MEM_FLAGS_WRITABLE) > + mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; > + if (alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE) > + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; > + if (alloc_flags & ALLOC_MEM_FLAGS_COHERENT) > + mapping_flags |= AMDGPU_VM_MTYPE_UC; > + else > + mapping_flags |= AMDGPU_VM_MTYPE_NC; > + > + return mapping_flags; > +} > + > static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = { > .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb, > .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb, > .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping, > .get_vm_pte_flags = gmc_v10_0_get_vm_pte_flags, > - .get_vm_pde = gmc_v10_0_get_vm_pde > + .get_vm_pde = gmc_v10_0_get_vm_pde, > + .get_vm_mapping_flags = gmc_v10_0_get_vm_mapping_flags > }; > > static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device *adev) diff > --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c > index b06d876..2b2af6a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c > @@ -404,6 +404,22 @@ static void gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, int level, > BUG_ON(*addr & 0xFFFFFF0000000FFFULL); > } > > +static uint32_t gmc_v6_0_get_vm_mapping_flags(struct amdgpu_device *adev, > + uint32_t alloc_flags, bool remote_mapping) { > + uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE; > + > + if (alloc_flags & ALLOC_MEM_FLAGS_WRITABLE) > + mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; > + if (alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE) > + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; > + if (alloc_flags & ALLOC_MEM_FLAGS_COHERENT) > + mapping_flags |= AMDGPU_VM_MTYPE_UC; > + else > + mapping_flags |= AMDGPU_VM_MTYPE_NC; > + > + return mapping_flags; > +} > static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev, > bool value) > { > @@ -1157,7 +1173,8 @@ static const struct amdgpu_gmc_funcs gmc_v6_0_gmc_funcs = { > .emit_flush_gpu_tlb = gmc_v6_0_emit_flush_gpu_tlb, > .set_prt = gmc_v6_0_set_prt, > .get_vm_pde = gmc_v6_0_get_vm_pde, > - .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags > + .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags, > + .get_vm_mapping_flags = gmc_v6_0_get_vm_mapping_flags That looks like a duplication of what get_vm_pte_flags is supposed to be doing. [Oak] There are two layers of definition here: general user-accessible AMDGPU_VM_xxx definitions (in amdgpu_drm.h) and asic-specific AMDGPU_PTE_xxx definitions (defined in amdgpu_vm.h). For amdgpu drm user, user pass in AMDGPU_VM_XX flags and get_vm_pte_flags translate those flags to AMDGPU_PTE_xxx flags to program page table. For compute stack, there are two layers of translation: we will first generate the AMDGPU_VM_xxx flags from memory object allocation flags/memory physical location/memory mapping destination/memory access path (pcie or xgmi). Then get_vm_pte_flags is called to translate AMDGPU_VM_xx flags to AMDGPU_PTE_xx flags. The second layer translation is the same with the drm usage. For compute stack user, due the API definition, user can't directly pass in the AMDGPU_VM_xxx flags, instead it pass in an allocation flags. Also it is not possible for user to pass in the AMDGPU_VM flags as some of the flags is decided by factors that user is unaware, for example whether the access is through pcie or xgmi. Due to above reasons, We had to do two layers of translations. The two layers of translation exists before this patch seriers (the first layer was in amdgpu_amdkfd_gpuvm.c). When I do this patch series, I had a need to make the first layers of translation also to be asic-specific as Arcturus introduce different mapping scheme. Then I moved the first layer translation to asic-specifc gmc functions. Christian. > }; > > static const struct amdgpu_irq_src_funcs gmc_v6_0_irq_funcs = { diff > --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > index 75aa333..619862e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > @@ -481,6 +481,23 @@ static void gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, int level, > BUG_ON(*addr & 0xFFFFFF0000000FFFULL); > } > > +static uint32_t gmc_v7_0_get_vm_mapping_flags(struct amdgpu_device *adev, > + uint32_t alloc_flags, bool remote_mapping) { > + uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE; > + > + if (alloc_flags & ALLOC_MEM_FLAGS_WRITABLE) > + mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; > + if (alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE) > + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; > + if (alloc_flags & ALLOC_MEM_FLAGS_COHERENT) > + mapping_flags |= AMDGPU_VM_MTYPE_UC; > + else > + mapping_flags |= AMDGPU_VM_MTYPE_NC; > + > + return mapping_flags; > +} > + > /** > * gmc_v8_0_set_fault_enable_default - update VM fault handling > * > @@ -1353,7 +1370,8 @@ static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = { > .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping, > .set_prt = gmc_v7_0_set_prt, > .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags, > - .get_vm_pde = gmc_v7_0_get_vm_pde > + .get_vm_pde = gmc_v7_0_get_vm_pde, > + .get_vm_mapping_flags = gmc_v7_0_get_vm_mapping_flags > }; > > static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = { diff > --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > index 8bf2ba3..d2cecb30 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > @@ -706,6 +706,24 @@ static void gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, int level, > BUG_ON(*addr & 0xFFFFFF0000000FFFULL); > } > > +static uint32_t gmc_v8_0_get_vm_mapping_flags(struct amdgpu_device *adev, > + uint32_t alloc_flags, bool remote_mapping) { > + uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE; > + > + if (alloc_flags & ALLOC_MEM_FLAGS_WRITABLE) > + mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; > + if (alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE) > + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; > + if (alloc_flags & ALLOC_MEM_FLAGS_COHERENT) > + mapping_flags |= AMDGPU_VM_MTYPE_UC; > + else > + mapping_flags |= AMDGPU_VM_MTYPE_NC; > + > + return mapping_flags; > +} > + > + > /** > * gmc_v8_0_set_fault_enable_default - update VM fault handling > * > @@ -1721,7 +1739,8 @@ static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = { > .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping, > .set_prt = gmc_v8_0_set_prt, > .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags, > - .get_vm_pde = gmc_v8_0_get_vm_pde > + .get_vm_pde = gmc_v8_0_get_vm_pde, > + .get_vm_mapping_flags = gmc_v8_0_get_vm_mapping_flags > }; > > static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = { diff > --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > index e6adc86..d709902 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > @@ -626,12 +626,31 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, > } > } > > +static uint32_t gmc_v9_0_get_vm_mapping_flags(struct amdgpu_device *adev, > + uint32_t alloc_flags, bool remote_mapping) { > + uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE; > + > + if (alloc_flags & ALLOC_MEM_FLAGS_WRITABLE) > + mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; > + if (alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE) > + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; > + if (alloc_flags & ALLOC_MEM_FLAGS_COHERENT) > + mapping_flags |= AMDGPU_VM_MTYPE_UC; > + else > + mapping_flags |= AMDGPU_VM_MTYPE_NC; > + > + return mapping_flags; > +} > + > + > static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { > .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, > .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, > .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping, > .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags, > - .get_vm_pde = gmc_v9_0_get_vm_pde > + .get_vm_pde = gmc_v9_0_get_vm_pde, > + .get_vm_mapping_flags = gmc_v9_0_get_vm_mapping_flags > }; > > static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx