On 04/05/2017 10:27 AM, Alex Deucher wrote: > On Tue, Apr 4, 2017 at 10:11 PM, Zhang, Jerry (Junwei) > <Jerry.Zhang at amd.com> wrote: >> On 04/04/2017 03:34 AM, Deucher, Alexander wrote: >>>> >>>> -----Original Message----- >>>> From: Junwei Zhang [mailto:Jerry.Zhang at amd.com] >>>> Sent: Friday, March 31, 2017 10:44 PM >>>> To: Deucher, Alexander; Koenig, Christian >>>> Cc: amd-gfx at lists.freedesktop.org; Zhang, Jerry >>>> Subject: [PATCH] drm/amdgpu: fix vm size and block size for VMPT (v3) >>>> >>>> From: "Zhang, Jerry" <Jerry.Zhang at amd.com> >>>> >>>> v2: set both of them in gmc >>>> v3: move vm size and block size in vm manager >>>> >>>> Signed-off-by: Junwei Zhang <Jerry.Zhang at amd.com> >>>> --- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 -------- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 22 ++++++++++++--------- >>>> - >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 +++- >>>> drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 2 +- >>>> drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 10 ++++++++-- >>>> drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 9 +++++++-- >>>> drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 9 +++++++-- >>>> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 21 +++++++++++++-------- >>>> drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 2 +- >>>> 9 files changed, 52 insertions(+), 35 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>> index fbb4afb..1d0c742 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>> @@ -1041,14 +1041,6 @@ static bool amdgpu_check_pot_argument(int arg) >>>> >>>> static void amdgpu_get_block_size(struct amdgpu_device *adev) >>>> { >>>> - /* from AI, asic starts to support multiple level VMPT */ >>>> - if (adev->asic_type >= CHIP_VEGA10) { >>>> - if (amdgpu_vm_block_size != 9) >>>> - dev_warn(adev->dev, >>>> - "Multi-VMPT limits block size to one >>>> page!\n"); >>>> - amdgpu_vm_block_size = 9; >>>> - return; >>>> - } >>>> /* defines number of bits in page table versus page directory, >>>> * a page is 4KB so we have 12 bits offset, minimum 9 bits in the >>>> * page table and the remaining bits are in the page directory */ >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> index 43adc4b..46d3f1a 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> @@ -100,13 +100,14 @@ static unsigned amdgpu_vm_num_entries(struct >>>> amdgpu_device *adev, >>>> if (level == 0) >>>> /* For the root directory */ >>>> return adev->vm_manager.max_pfn >> >>>> - (amdgpu_vm_block_size * adev- >>>>> vm_manager.num_level); >>>> + (adev->vm_manager.block_size * >>>> + adev->vm_manager.num_level); >>>> else if (level == adev->vm_manager.num_level) >>>> /* For the page tables on the leaves */ >>>> - return AMDGPU_VM_PTE_COUNT; >>>> + return AMDGPU_VM_PTE_COUNT(adev); >>>> else >>>> /* Everything in between */ >>>> - return 1 << amdgpu_vm_block_size; >>>> + return 1 << adev->vm_manager.block_size; >>>> } >>>> >>>> /** >>>> @@ -271,7 +272,7 @@ static int amdgpu_vm_alloc_levels(struct >>>> amdgpu_device *adev, >>>> unsigned level) >>>> { >>>> unsigned shift = (adev->vm_manager.num_level - level) * >>>> - amdgpu_vm_block_size; >>>> + adev->vm_manager.block_size; >>>> unsigned pt_idx, from, to; >>>> int r; >>>> >>>> @@ -970,7 +971,7 @@ static struct amdgpu_bo *amdgpu_vm_get_pt(struct >>>> amdgpu_pte_update_params *p, >>>> unsigned idx, level = p->adev->vm_manager.num_level; >>>> >>>> while (entry->entries) { >>>> - idx = addr >> (amdgpu_vm_block_size * level--); >>>> + idx = addr >> (p->adev->vm_manager.block_size * level--); >>>> idx %= amdgpu_bo_size(entry->bo) / 8; >>>> entry = &entry->entries[idx]; >>>> } >>>> @@ -997,7 +998,8 @@ static void amdgpu_vm_update_ptes(struct >>>> amdgpu_pte_update_params *params, >>>> uint64_t start, uint64_t end, >>>> uint64_t dst, uint64_t flags) >>>> { >>>> - const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1; >>>> + struct amdgpu_device *adev = params->adev; >>>> + const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1; >>>> >>>> uint64_t cur_pe_start, cur_nptes, cur_dst; >>>> uint64_t addr; /* next GPU address to be updated */ >>>> @@ -1021,7 +1023,7 @@ static void amdgpu_vm_update_ptes(struct >>>> amdgpu_pte_update_params *params, >>>> if ((addr & ~mask) == (end & ~mask)) >>>> nptes = end - addr; >>>> else >>>> - nptes = AMDGPU_VM_PTE_COUNT - (addr & mask); >>>> + nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); >>>> >>>> cur_pe_start = amdgpu_bo_gpu_offset(pt); >>>> cur_pe_start += (addr & mask) * 8; >>>> @@ -1049,7 +1051,7 @@ static void amdgpu_vm_update_ptes(struct >>>> amdgpu_pte_update_params *params, >>>> if ((addr & ~mask) == (end & ~mask)) >>>> nptes = end - addr; >>>> else >>>> - nptes = AMDGPU_VM_PTE_COUNT - (addr & mask); >>>> + nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & >>>> mask); >>>> >>>> next_pe_start = amdgpu_bo_gpu_offset(pt); >>>> next_pe_start += (addr & mask) * 8; >>>> @@ -1196,7 +1198,7 @@ static int amdgpu_vm_bo_update_mapping(struct >>>> amdgpu_device *adev, >>>> * reserve space for one command every (1 << BLOCK_SIZE) >>>> * entries or 2k dwords (whatever is smaller) >>>> */ >>>> - ncmds = (nptes >> min(amdgpu_vm_block_size, 11)) + 1; >>>> + ncmds = (nptes >> min(adev->vm_manager.block_size, 11)) + 1; >>>> >>>> /* padding, etc. */ >>>> ndw = 64; >>>> @@ -2067,7 +2069,7 @@ void amdgpu_vm_bo_invalidate(struct >>>> amdgpu_device *adev, >>>> int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) >>>> { >>>> const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, >>>> - AMDGPU_VM_PTE_COUNT * 8); >>>> + AMDGPU_VM_PTE_COUNT(adev) * 8); >>>> unsigned ring_instance; >>>> struct amdgpu_ring *ring; >>>> struct amd_sched_rq *rq; >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>> index 102b1f7..1242264 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>> @@ -45,7 +45,7 @@ >>>> #define AMDGPU_VM_MAX_UPDATE_SIZE 0x3FFFF >>>> >>>> /* number of entries in page table */ >>>> -#define AMDGPU_VM_PTE_COUNT (1 << amdgpu_vm_block_size) >>>> +#define AMDGPU_VM_PTE_COUNT(adev) (1 << adev- >>>>> vm_manager.block_size) >>> >>> >>> For safety, put parens around adev. E.g., >>> #define AMDGPU_VM_PTE_COUNT(adev) (1 << (adev)->vm_manager.block_size) >> >> >> Yeah, thanks to point it out. >> >>> >>> With that fixed: >>> Reviewed-by: Alex Deucher <alexander.deucher at amd.com> >>> >>> As a follow on patch, it would be nice to add some reasonable clamping in >>> the >>> gmc files so we can still override the default settings and not cause too >>> much >>> problem with multiple asics in the same system. >> >> >> Could you elaborate it a bit more, about "clamping"? > > e.g., setting amdgpu_vm_block_size to -1 (auto) bt default and setting > a reasonable default for each gmc if it's set to 0, or overriding if > it's not set to -1. That way we can have different defaults on each > family and still override for testing/debugging. Thanks for your explanation. Now I got the "clamping" :) Jerry > > Alex > > >> >> Jerry >> >>> >>> Alex >>> >>>> >>>> /* PTBs (Page Table Blocks) need to be aligned to 32K */ >>>> #define AMDGPU_VM_PTB_ALIGN_SIZE 32768 >>>> @@ -155,6 +155,8 @@ struct amdgpu_vm_manager { >>>> >>>> uint64_t max_pfn; >>>> uint32_t num_level; >>>> + uint64_t vm_size; >>>> + uint32_t block_size; >>>> /* vram base address for page table entry */ >>>> u64 vram_base_offset; >>>> /* is vm enabled? */ >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c >>>> b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c >>>> index 30ef312..9223c2b 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c >>>> @@ -222,7 +222,7 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device >>>> *adev) >>>> >>>> EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); >>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, >>>> PAGE_TABLE_BLOCK_SIZE, >>>> - amdgpu_vm_block_size - 9); >>>> + adev->vm_manager.block_size - 9); >>>> WREG32(SOC15_REG_OFFSET(GC, 0, >>>> mmVM_CONTEXT1_CNTL) + i, tmp); >>>> WREG32(SOC15_REG_OFFSET(GC, 0, >>>> mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0); >>>> WREG32(SOC15_REG_OFFSET(GC, 0, >>>> mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0); >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c >>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c >>>> index d958660..30d5c42 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c >>>> @@ -543,7 +543,8 @@ static int gmc_v6_0_gart_enable(struct >>>> amdgpu_device *adev) >>>> WREG32(mmVM_CONTEXT1_CNTL, >>>> VM_CONTEXT1_CNTL__ENABLE_CONTEXT_MASK | >>>> (1UL << VM_CONTEXT1_CNTL__PAGE_TABLE_DEPTH__SHIFT) | >>>> - ((amdgpu_vm_block_size - 9) << >>>> VM_CONTEXT1_CNTL__PAGE_TABLE_BLOCK_SIZE__SHIFT)); >>>> + ((adev->vm_manager.block_size - 9) >>>> + << VM_CONTEXT1_CNTL__PAGE_TABLE_BLOCK_SIZE__SHIFT)); >>>> if (amdgpu_vm_fault_stop == >>>> AMDGPU_VM_FAULT_STOP_ALWAYS) >>>> gmc_v6_0_set_fault_enable_default(adev, false); >>>> else >>>> @@ -848,7 +849,12 @@ static int gmc_v6_0_sw_init(void *handle) >>>> if (r) >>>> return r; >>>> >>>> - adev->vm_manager.max_pfn = amdgpu_vm_size << 18; >>>> + adev->vm_manager.vm_size = amdgpu_vm_size; >>>> + adev->vm_manager.block_size = amdgpu_vm_block_size; >>>> + adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; >>>> + >>>> + DRM_INFO("vm size is %d GB, block size is %d-bit\n", >>>> + adev->vm_manager.vm_size, adev- >>>>> vm_manager.block_size); >>>> >>>> adev->mc.mc_mask = 0xffffffffffULL; >>>> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c >>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c >>>> index 0c0a601..9427c7d 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c >>>> @@ -639,7 +639,7 @@ static int gmc_v7_0_gart_enable(struct >>>> amdgpu_device *adev) >>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, >>>> ENABLE_CONTEXT, 1); >>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, >>>> PAGE_TABLE_DEPTH, 1); >>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, >>>> PAGE_TABLE_BLOCK_SIZE, >>>> - amdgpu_vm_block_size - 9); >>>> + adev->vm_manager.block_size - 9); >>>> WREG32(mmVM_CONTEXT1_CNTL, tmp); >>>> if (amdgpu_vm_fault_stop == >>>> AMDGPU_VM_FAULT_STOP_ALWAYS) >>>> gmc_v7_0_set_fault_enable_default(adev, false); >>>> @@ -998,7 +998,12 @@ static int gmc_v7_0_sw_init(void *handle) >>>> * Currently set to 4GB ((1 << 20) 4k pages). >>>> * Max GPUVM size for cayman and SI is 40 bits. >>>> */ >>>> - adev->vm_manager.max_pfn = amdgpu_vm_size << 18; >>>> + adev->vm_manager.vm_size = amdgpu_vm_size; >>>> + adev->vm_manager.block_size = amdgpu_vm_block_size; >>>> + adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; >>>> + >>>> + DRM_INFO("vm size is %d GB, block size is %d-bit\n", >>>> + adev->vm_manager.vm_size, adev- >>>>> vm_manager.block_size); >>>> >>>> /* Set the internal MC address mask >>>> * This is the max address of the GPU's >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >>>> index d19d1c5..16742be 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >>>> @@ -848,7 +848,7 @@ static int gmc_v8_0_gart_enable(struct >>>> amdgpu_device *adev) >>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, >>>> WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); >>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, >>>> EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); >>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, >>>> PAGE_TABLE_BLOCK_SIZE, >>>> - amdgpu_vm_block_size - 9); >>>> + adev->vm_manager.block_size - 9); >>>> WREG32(mmVM_CONTEXT1_CNTL, tmp); >>>> if (amdgpu_vm_fault_stop == >>>> AMDGPU_VM_FAULT_STOP_ALWAYS) >>>> gmc_v8_0_set_fault_enable_default(adev, false); >>>> @@ -1082,7 +1082,12 @@ static int gmc_v8_0_sw_init(void *handle) >>>> * Currently set to 4GB ((1 << 20) 4k pages). >>>> * Max GPUVM size for cayman and SI is 40 bits. >>>> */ >>>> - adev->vm_manager.max_pfn = amdgpu_vm_size << 18; >>>> + adev->vm_manager.vm_size = amdgpu_vm_size; >>>> + adev->vm_manager.block_size = amdgpu_vm_block_size; >>>> + adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; >>>> + >>>> + DRM_INFO("vm size is %d GB, block size is %d-bit\n", >>>> + adev->vm_manager.vm_size, adev- >>>>> vm_manager.block_size); >>>> >>>> /* Set the internal MC address mask >>>> * This is the max address of the GPU's >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >>>> index df69aae..2180edb 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >>>> @@ -543,11 +543,23 @@ static int gmc_v9_0_sw_init(void *handle) >>>> >>>> if (adev->flags & AMD_IS_APU) { >>>> adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; >>>> + adev->vm_manager.vm_size = amdgpu_vm_size; >>>> + adev->vm_manager.block_size = amdgpu_vm_block_size; >>>> } else { >>>> /* XXX Don't know how to get VRAM type yet. */ >>>> adev->mc.vram_type = AMDGPU_VRAM_TYPE_HBM; >>>> + /* >>>> + * To fulfill 4-level page support, >>>> + * vm size is 256TB (48bit), maximum size of Vega10, >>>> + * block size 512 (9bit) >>>> + */ >>>> + adev->vm_manager.vm_size = 1U << 18; >>>> + adev->vm_manager.block_size = 9; >>>> } >>>> >>>> + DRM_INFO("vm size is %d GB, block size is %d-bit\n", >>>> + adev->vm_manager.vm_size, adev- >>>>> vm_manager.block_size); >>>> + >>>> /* This interrupt is VMC page fault.*/ >>>> r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0, >>>> &adev->mc.vm_fault); >>>> @@ -557,14 +569,7 @@ static int gmc_v9_0_sw_init(void *handle) >>>> if (r) >>>> return r; >>>> >>>> - /* Because of four level VMPTs, vm size is at least 512GB. >>>> - * The maximum size is 256TB (48bit). >>>> - */ >>>> - if (amdgpu_vm_size < 512) { >>>> - DRM_WARN("VM size is at least 512GB!\n"); >>>> - amdgpu_vm_size = 512; >>>> - } >>>> - adev->vm_manager.max_pfn = (uint64_t)amdgpu_vm_size << 18; >>>> + adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; >>>> >>>> /* Set the internal MC address mask >>>> * This is the max address of the GPU's >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c >>>> b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c >>>> index 266a0f4..2cc1d58 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c >>>> @@ -242,7 +242,7 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device >>>> *adev) >>>> >>>> EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); >>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, >>>> PAGE_TABLE_BLOCK_SIZE, >>>> - amdgpu_vm_block_size - 9); >>>> + adev->vm_manager.block_size - 9); >>>> WREG32(SOC15_REG_OFFSET(MMHUB, 0, >>>> mmVM_CONTEXT1_CNTL) + i, tmp); >>>> WREG32(SOC15_REG_OFFSET(MMHUB, 0, >>>> mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0); >>>> WREG32(SOC15_REG_OFFSET(MMHUB, 0, >>>> mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0); >>>> -- >>>> 1.9.1 >>> >>> >> _______________________________________________ >> amd-gfx mailing list >> amd-gfx at lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx