[AMD Official Use Only - General] > -----Original Message----- > From: Limonciello, Mario <Mario.Limonciello@xxxxxxx> > Sent: Monday, July 17, 2023 8:39 AM > To: Chen, Guchun <Guchun.Chen@xxxxxxx>; amd- > gfx@xxxxxxxxxxxxxxxxxxxxx; Deucher, Alexander > <Alexander.Deucher@xxxxxxx>; Zhang, Hawking > <Hawking.Zhang@xxxxxxx>; Koenig, Christian > <Christian.Koenig@xxxxxxx>; Yang, Philip <Philip.Yang@xxxxxxx>; > Kuehling, Felix <Felix.Kuehling@xxxxxxx>; mikhail.v.gavrilov@xxxxxxxxx > Subject: Re: [1/2] drm/amdgpu: fix slab-out-of-bounds issue in > amdgpu_vm_pt_create > > > On 7/14/2023 6:05 AM, Guchun Chen wrote: > > Recent code set xcp_id stored from file private data when opening > > device to amdgpu bo for accounting memory usage etc, but not all VMs > > are attached to this fpriv structure like the vm cases in > > amdgpu_mes_self_test, otherwise, KASAN will complain below out of > > bound access. And more importantly, VM code should not touch fpriv > > structure, so drop fpriv code handling from amdgpu_vm_pt. > > > > [ 77.292314] BUG: KASAN: slab-out-of-bounds in > amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu] > > [ 77.293845] Read of size 4 at addr ffff888102c48a48 by task > modprobe/1069 > > [ 77.294146] Call Trace: > > [ 77.294178] <TASK> > > [ 77.294208] dump_stack_lvl+0x49/0x63 > > [ 77.294260] print_report+0x16f/0x4a6 > > [ 77.294307] ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu] > > [ 77.295979] ? kasan_complete_mode_report_info+0x3c/0x200 > > [ 77.296057] ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu] > > [ 77.297556] kasan_report+0xb4/0x130 > > [ 77.297609] ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu] > > [ 77.299202] __asan_load4+0x6f/0x90 > > [ 77.299272] amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu] > > [ 77.300796] ? amdgpu_init+0x6e/0x1000 [amdgpu] > > [ 77.302222] ? amdgpu_vm_pt_clear+0x750/0x750 [amdgpu] > > [ 77.303721] ? preempt_count_sub+0x18/0xc0 > > [ 77.303786] amdgpu_vm_init+0x39e/0x870 [amdgpu] > > [ 77.305186] ? amdgpu_vm_wait_idle+0x90/0x90 [amdgpu] > > [ 77.306683] ? kasan_set_track+0x25/0x30 > > [ 77.306737] ? kasan_save_alloc_info+0x1b/0x30 > > [ 77.306795] ? __kasan_kmalloc+0x87/0xa0 > > [ 77.306852] amdgpu_mes_self_test+0x169/0x620 [amdgpu] > > > > Fixes: ffc6deb773f7 ("drm/amdkfd: Store xcp partition id to amdgpu > > bo") > > Signed-off-by: Guchun Chen <guchun.chen@xxxxxxx> > > Reviewed-by: Christian König <christian.koenig@xxxxxxx> > > This bug was also reported in Gitlab. Here's some more tags. > > Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2686 > |Tested-by: Mikhail Gavrilov <mikhail.v.gavrilov@xxxxxxxxx>| Thanks for the info, Mario. Will add it in new patch set. Regards, Guchun > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +- > > drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 2 +- > > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 +++-- > > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 5 +++-- > > drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 11 ++++++----- > > 5 files changed, 14 insertions(+), 11 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > > index 85a0d5f419c8..9a5aa4318cad 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > > @@ -1232,7 +1232,7 @@ int amdgpu_driver_open_kms(struct drm_device > *dev, struct drm_file *file_priv) > > pasid = 0; > > } > > > > - r = amdgpu_vm_init(adev, &fpriv->vm); > > + r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id); > > if (r) > > goto error_pasid; > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > > index e9091ebfe230..cac1d1b227f5 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > > @@ -1382,7 +1382,7 @@ int amdgpu_mes_self_test(struct amdgpu_device > *adev) > > goto error_pasid; > > } > > > > - r = amdgpu_vm_init(adev, vm); > > + r = amdgpu_vm_init(adev, vm, 0); > > if (r) { > > DRM_ERROR("failed to initialize vm\n"); > > goto error_pasid; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > > index 32adc31c093d..74380b21e7a5 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > > @@ -2121,13 +2121,14 @@ long amdgpu_vm_wait_idle(struct > amdgpu_vm *vm, long timeout) > > * > > * @adev: amdgpu_device pointer > > * @vm: requested vm > > + * @xcp_id: GPU partition selection id > > * > > * Init @vm fields. > > * > > * Returns: > > * 0 for success, error for failure. > > */ > > -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) > > +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, > > +int32_t xcp_id) > > { > > struct amdgpu_bo *root_bo; > > struct amdgpu_bo_vm *root; > > @@ -2177,7 +2178,7 @@ int amdgpu_vm_init(struct amdgpu_device > *adev, struct amdgpu_vm *vm) > > vm->evicting = false; > > > > r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level, > > - false, &root); > > + false, &root, xcp_id); > > if (r) > > goto error_free_delayed; > > root_bo = &root->bo; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > > index 88ee4507f6b6..bca258c38919 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > > @@ -398,7 +398,7 @@ int amdgpu_vm_set_pasid(struct amdgpu_device > *adev, struct amdgpu_vm *vm, > > u32 pasid); > > > > long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout); -int > > amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm); > > +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, > > +int32_t xcp_id); > > int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct > amdgpu_vm *vm); > > void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct > amdgpu_vm *vm); > > void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm > > *vm); @@ -481,7 +481,8 @@ void amdgpu_vm_get_memory(struct > amdgpu_vm *vm, > > int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm > *vm, > > struct amdgpu_bo_vm *vmbo, bool immediate); > > int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct > amdgpu_vm *vm, > > - int level, bool immediate, struct amdgpu_bo_vm > **vmbo); > > + int level, bool immediate, struct amdgpu_bo_vm > **vmbo, > > + int32_t xcp_id); > > void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct > amdgpu_vm *vm); > > bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev, > > struct amdgpu_vm *vm); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c > > index 70fc5856a5b9..eb52dfe64948 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c > > @@ -498,11 +498,12 @@ int amdgpu_vm_pt_clear(struct amdgpu_device > *adev, struct amdgpu_vm *vm, > > * @level: the page table level > > * @immediate: use a immediate update > > * @vmbo: pointer to the buffer object pointer > > + * @xcp_id: GPU partition id > > */ > > int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct > amdgpu_vm *vm, > > - int level, bool immediate, struct amdgpu_bo_vm > **vmbo) > > + int level, bool immediate, struct amdgpu_bo_vm > **vmbo, > > + int32_t xcp_id) > > { > > - struct amdgpu_fpriv *fpriv = container_of(vm, struct amdgpu_fpriv, > vm); > > struct amdgpu_bo_param bp; > > struct amdgpu_bo *bo; > > struct dma_resv *resv; > > @@ -535,7 +536,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device > > *adev, struct amdgpu_vm *vm, > > > > bp.type = ttm_bo_type_kernel; > > bp.no_wait_gpu = immediate; > > - bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1; > > + bp.xcp_id_plus1 = xcp_id + 1; > > > > if (vm->root.bo) > > bp.resv = vm->root.bo->tbo.base.resv; @@ -561,7 +562,7 > @@ int > > amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm > *vm, > > bp.type = ttm_bo_type_kernel; > > bp.resv = bo->tbo.base.resv; > > bp.bo_ptr_size = sizeof(struct amdgpu_bo); > > - bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1; > > + bp.xcp_id_plus1 = xcp_id + 1; > > > > r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow); > > > > @@ -606,7 +607,7 @@ static int amdgpu_vm_pt_alloc(struct > amdgpu_device *adev, > > return 0; > > > > amdgpu_vm_eviction_unlock(vm); > > - r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt); > > + r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt, > 0); > > amdgpu_vm_eviction_lock(vm); > > if (r) > > return r;