Looking at the ticket, it's getting a segfault in user mode. The only possible mechanism I can see where this change causes that segfault, is a badly handled memory allocation failure. But I don't have an explanation for how this patch introduces a memory allocation failure that wasn't there before (other than maybe some very obscure internal detail of the drm_mm node manager). I see one potential problem here on the error handling path that we're more likely to go to now. But I think it should only affect 32-bit kernels: > error: while (i--) drm_mm_remove_node(&nodes[i]); > spin_unlock(&mgr->lock); atomic64_sub(mem->num_pages << PAGE_SHIFT, > &mgr->usage); We need a (u64) cast here to avoid the left-shift overflowing. mem->num_pages is unsigned long, so that's already 64 bits on a 64-bit kernel. > kvfree(nodes); return r; } Regards, Felix Am 2021-07-15 um 9:50 a.m. schrieb Eric Huang: > Hi Christian, > > I have pushed it into amd-staging-dkms-5.11, but it causes a > regression with test TransferBench on MI200. Jira is here: > https://ontrack-internal.amd.com/browse/SWDEV-295245 > > Can you please take a look? Thanks! > > Regards, > Eric > > On 2021-07-14 4:33 a.m., Christian König wrote: >> Hi Eric, >> >> feel free to push into amd-staging-dkms-5.11, but please don't push >> it into amd-staging-drm-next. >> >> The later will just cause a merge failure which Alex needs to resolve >> manually. >> >> I can take care of pushing to amd-staging-drm-next as soon as that is >> rebased on latest upstream. >> >> Regards, >> Christian. >> >> Am 13.07.21 um 21:19 schrieb Eric Huang: >>> Hi Christian/Felix, >>> >>> If you don't have objection, it will be pushed into >>> amd-staging-dkms-5.11 and amd-staging-drm-next. >>> >>> Thanks, >>> Eric >>> >>> On 2021-07-13 3:17 p.m., Eric Huang wrote: >>>> For allocations larger than 48MiB we need more than a page for the >>>> housekeeping in the worst case resulting in the usual vmalloc >>>> overhead. >>>> >>>> Try to avoid this by assuming the good case and only falling back >>>> to the >>>> worst case if this didn't worked. >>>> >>>> Signed-off-by: Christian König <christian.koenig@xxxxxxx> >>>> Signed-off-by: Eric Huang <jinhuieric.huang@xxxxxxx> >>>> Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> >>>> --- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 71 >>>> +++++++++++++++----- >>>> 1 file changed, 53 insertions(+), 18 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c >>>> index be4261c4512e..ecbe05e1db66 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c >>>> @@ -361,9 +361,11 @@ static void amdgpu_vram_mgr_virt_start(struct >>>> ttm_resource *mem, >>>> static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, >>>> struct ttm_buffer_object *tbo, >>>> const struct ttm_place *place, >>>> + unsigned long num_nodes, >>>> + unsigned long pages_per_node, >>>> struct ttm_resource *mem) >>>> { >>>> - unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages; >>>> + unsigned long lpfn, pages_left, pages; >>>> struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); >>>> struct amdgpu_device *adev = to_amdgpu_device(mgr); >>>> uint64_t vis_usage = 0, mem_bytes, max_bytes; >>>> @@ -393,21 +395,6 @@ static int amdgpu_vram_mgr_new(struct >>>> ttm_resource_manager *man, >>>> return -ENOSPC; >>>> } >>>> - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { >>>> - pages_per_node = ~0ul; >>>> - num_nodes = 1; >>>> - } else { >>>> -#ifdef CONFIG_TRANSPARENT_HUGEPAGE >>>> - pages_per_node = HPAGE_PMD_NR; >>>> -#else >>>> - /* default to 2MB */ >>>> - pages_per_node = 2UL << (20UL - PAGE_SHIFT); >>>> -#endif >>>> - pages_per_node = max_t(uint32_t, pages_per_node, >>>> - mem->page_alignment); >>>> - num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node); >>>> - } >>>> - >>>> nodes = kvmalloc_array((uint32_t)num_nodes, sizeof(*nodes), >>>> GFP_KERNEL | __GFP_ZERO); >>>> if (!nodes) { >>>> @@ -435,7 +422,12 @@ static int amdgpu_vram_mgr_new(struct >>>> ttm_resource_manager *man, >>>> i = 0; >>>> spin_lock(&mgr->lock); >>>> while (pages_left) { >>>> - uint32_t alignment = mem->page_alignment; >>>> + unsigned long alignment = mem->page_alignment; >>>> + >>>> + if (i >= num_nodes) { >>>> + r = -E2BIG; >>>> + goto error; >>>> + } >>>> if (pages >= pages_per_node) >>>> alignment = pages_per_node; >>>> @@ -492,6 +484,49 @@ static int amdgpu_vram_mgr_new(struct >>>> ttm_resource_manager *man, >>>> return r; >>>> } >>>> +/** >>>> + * amdgpu_vram_mgr_alloc - allocate new range >>>> + * >>>> + * @man: TTM memory type manager >>>> + * @tbo: TTM BO we need this range for >>>> + * @place: placement flags and restrictions >>>> + * @mem: the resulting mem object >>>> + * >>>> + * Allocate VRAM for the given BO. >>>> + */ >>>> +static int amdgpu_vram_mgr_alloc(struct ttm_resource_manager *man, >>>> + struct ttm_buffer_object *tbo, >>>> + const struct ttm_place *place, >>>> + struct ttm_resource *mem) >>>> +{ >>>> + unsigned long num_nodes, pages_per_node; >>>> + int r; >>>> + >>>> + if (place->flags & TTM_PL_FLAG_CONTIGUOUS) >>>> + return amdgpu_vram_mgr_new(man, tbo, place, 1, ~0ul, mem); >>>> + >>>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE >>>> + pages_per_node = HPAGE_PMD_NR; >>>> +#else >>>> + /* default to 2MB */ >>>> + pages_per_node = 2UL << (20UL - PAGE_SHIFT); >>>> +#endif >>>> + pages_per_node = max_t(uint32_t, pages_per_node, >>>> + mem->page_alignment); >>>> + num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node); >>>> + >>>> + if (sizeof(struct drm_mm_node) * num_nodes > PAGE_SIZE) { >>>> + r = amdgpu_vram_mgr_new(man, tbo, place, >>>> + PAGE_SIZE / sizeof(struct drm_mm_node), >>>> + pages_per_node, mem); >>>> + if (r != -E2BIG) >>>> + return r; >>>> + } >>>> + >>>> + return amdgpu_vram_mgr_new(man, tbo, place, num_nodes, >>>> pages_per_node, >>>> + mem); >>>> +} >>>> + >>>> /** >>>> * amdgpu_vram_mgr_del - free ranges >>>> * >>>> @@ -693,7 +728,7 @@ static void amdgpu_vram_mgr_debug(struct >>>> ttm_resource_manager *man, >>>> } >>>> static const struct ttm_resource_manager_func >>>> amdgpu_vram_mgr_func = { >>>> - .alloc = amdgpu_vram_mgr_new, >>>> + .alloc = amdgpu_vram_mgr_alloc, >>>> .free = amdgpu_vram_mgr_del, >>>> .debug = amdgpu_vram_mgr_debug >>>> }; >>> >>> _______________________________________________ >>> amd-gfx mailing list >>> amd-gfx@xxxxxxxxxxxxxxxxxxxxx >>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx >> > _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx