OK, it doesn't matter, maybe my latest amd-staging-dkms-4.13 doesn't sync to drm-next. Just find it when I try KASAN config. Regards, David Zhou On 2017å¹´10æ??24æ?¥ 18:01, Christian König wrote: > Andrey already submitted a fix for this a few days ago. > > Christian. > > Am 24.10.2017 um 11:55 schrieb Chunming Zhou: >> The s_entity presented process could already be closed when calling >> amdgpu_job_free_cb. >> the s_entity will be buggy pointer after it's freed. See below >> calltrace: >> >> [ 355.616964] >> ================================================================== >> [ 355.617191] BUG: KASAN: use-after-free in >> amdgpu_job_free_cb+0x2f/0xc0 [amdgpu] >> [ 355.617197] Read of size 8 at addr ffff88039d593c40 by task >> kworker/9:1/100 >> >> [ 355.617206] CPU: 9 PID: 100 Comm: kworker/9:1 Not tainted >> 4.13.0-custom #1 >> [ 355.617208] Hardware name: Gigabyte Technology Co., Ltd. Default >> string/X99P-SLI-CF, BIOS F23 07/22/2016 >> [ 355.617342] Workqueue: events amd_sched_job_finish [amdgpu] >> [ 355.617344] Call Trace: >> [ 355.617351] dump_stack+0x63/0x8d >> [ 355.617356] print_address_description+0x70/0x290 >> [ 355.617474] ? amdgpu_job_free_cb+0x2f/0xc0 [amdgpu] >> [ 355.617477] kasan_report+0x265/0x350 >> [ 355.617479] __asan_load8+0x54/0x90 >> [ 355.617603] amdgpu_job_free_cb+0x2f/0xc0 [amdgpu] >> [ 355.617721] amd_sched_job_finish+0x161/0x180 [amdgpu] >> [ 355.617725] process_one_work+0x2ab/0x700 >> [ 355.617727] worker_thread+0x90/0x720 >> [ 355.617731] kthread+0x18c/0x1e0 >> [ 355.617732] ? process_one_work+0x700/0x700 >> [ 355.617735] ? kthread_create_on_node+0xb0/0xb0 >> [ 355.617738] ret_from_fork+0x25/0x30 >> >> [ 355.617742] Allocated by task 1347: >> [ 355.617747] save_stack_trace+0x1b/0x20 >> [ 355.617749] save_stack+0x46/0xd0 >> [ 355.617751] kasan_kmalloc+0xad/0xe0 >> [ 355.617753] kmem_cache_alloc_trace+0xef/0x200 >> [ 355.617853] amdgpu_driver_open_kms+0x98/0x290 [amdgpu] >> [ 355.617883] drm_open+0x38c/0x6e0 [drm] >> [ 355.617908] drm_stub_open+0x144/0x1b0 [drm] >> [ 355.617911] chrdev_open+0x180/0x320 >> [ 355.617913] do_dentry_open+0x3a2/0x570 >> [ 355.617915] vfs_open+0x86/0xe0 >> [ 355.617918] path_openat+0x49e/0x1db0 >> [ 355.617919] do_filp_open+0x11c/0x1a0 >> [ 355.617921] do_sys_open+0x16f/0x2a0 >> [ 355.617923] SyS_open+0x1e/0x20 >> [ 355.617926] do_syscall_64+0xea/0x210 >> [ 355.617928] return_from_SYSCALL_64+0x0/0x6a >> >> [ 355.617931] Freed by task 1347: >> [ 355.617934] save_stack_trace+0x1b/0x20 >> [ 355.617936] save_stack+0x46/0xd0 >> [ 355.617937] kasan_slab_free+0x70/0xc0 >> [ 355.617939] kfree+0x9d/0x1c0 >> [ 355.618038] amdgpu_driver_postclose_kms+0x1bc/0x3e0 [amdgpu] >> [ 355.618063] drm_release+0x454/0x610 [drm] >> [ 355.618065] __fput+0x177/0x350 >> [ 355.618066] ____fput+0xe/0x10 >> [ 355.618068] task_work_run+0xa0/0xc0 >> [ 355.618070] do_exit+0x456/0x1320 >> [ 355.618072] do_group_exit+0x86/0x130 >> [ 355.618074] SyS_exit_group+0x1d/0x20 >> [ 355.618076] do_syscall_64+0xea/0x210 >> [ 355.618078] return_from_SYSCALL_64+0x0/0x6a >> >> [ 355.618081] The buggy address belongs to the object at >> ffff88039d593b80 >>                 which belongs to the cache kmalloc-2048 of size 2048 >> [ 355.618085] The buggy address is located 192 bytes inside of >>                 2048-byte region [ffff88039d593b80, ffff88039d594380) >> [ 355.618087] The buggy address belongs to the page: >> [ 355.618091] page:ffffea000e756400 count:1 mapcount:0 >> mapping:         (null) index:0x0 compound_mapcount: 0 >> [ 355.618095] flags: 0x2ffff0000008100(slab|head) >> [ 355.618099] raw: 02ffff0000008100 0000000000000000 >> 0000000000000000 00000001000f000f >> [ 355.618103] raw: ffffea000edb0600 0000000200000002 >> ffff8803bfc0ea00 0000000000000000 >> [ 355.618105] page dumped because: kasan: bad access detected >> >> [ 355.618108] Memory state around the buggy address: >> [ 355.618110] ffff88039d593b00: fc fc fc fc fc fc fc fc fc fc fc fc >> fc fc fc fc >> [ 355.618113] ffff88039d593b80: fb fb fb fb fb fb fb fb fb fb fb fb >> fb fb fb fb >> [ 355.618116] >ffff88039d593c00: fb fb fb fb fb fb fb fb fb fb fb fb >> fb fb fb fb >> [ 355.618117]                                           ^ >> [ 355.618120] ffff88039d593c80: fb fb fb fb fb fb fb fb fb fb fb fb >> fb fb fb fb >> [ 355.618122] ffff88039d593d00: fb fb fb fb fb fb fb fb fb fb fb fb >> fb fb fb fb >> [ 355.618124] >> ================================================================== >> [ 355.618126] Disabling lock debugging due to kernel taint >> >> Change-Id: I8ff7122796b8cd16fc26e9c40e8d4c8153d67e0c >> Signed-off-by: Chunming Zhou <david1.zhou at amd.com> >> --- >>  drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 1 + >>  drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 27 >> ++++++++++++++------------- >>  2 files changed, 15 insertions(+), 13 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c >> b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c >> index 007fdbd..8101ed7 100644 >> --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c >> +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c >> @@ -535,6 +535,7 @@ int amd_sched_job_init(struct amd_sched_job *job, >>      if (!job->s_fence) >>          return -ENOMEM; >>      job->id = atomic64_inc_return(&sched->job_id_count); >> +   job->priority = job->s_entity->rq - job->sched->sched_rq; >>       INIT_WORK(&job->finish_work, amd_sched_job_finish); >>      INIT_LIST_HEAD(&job->node); >> diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h >> b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h >> index e21299c..8808eb1 100644 >> --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h >> +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h >> @@ -77,6 +77,18 @@ struct amd_sched_fence { >>      void                           *owner; >>  }; >>  +enum amd_sched_priority { >> +   AMD_SCHED_PRIORITY_MIN, >> +   AMD_SCHED_PRIORITY_LOW = AMD_SCHED_PRIORITY_MIN, >> +   AMD_SCHED_PRIORITY_NORMAL, >> +   AMD_SCHED_PRIORITY_HIGH_SW, >> +   AMD_SCHED_PRIORITY_HIGH_HW, >> +   AMD_SCHED_PRIORITY_KERNEL, >> +   AMD_SCHED_PRIORITY_MAX, >> +   AMD_SCHED_PRIORITY_INVALID = -1, >> +   AMD_SCHED_PRIORITY_UNSET = -2 >> +}; >> + >>  struct amd_sched_job { >>      struct amd_gpu_scheduler       *sched; >>      struct amd_sched_entity        *s_entity; >> @@ -87,6 +99,7 @@ struct amd_sched_job { >>      struct delayed_work       work_tdr; >>      uint64_t           id; >>      atomic_t karma; >> +   enum amd_sched_priority       priority; >>  }; >>   extern const struct dma_fence_ops amd_sched_fence_ops_scheduled; >> @@ -118,18 +131,6 @@ struct amd_sched_backend_ops { >>      void (*free_job)(struct amd_sched_job *sched_job); >>  }; >>  -enum amd_sched_priority { >> -   AMD_SCHED_PRIORITY_MIN, >> -   AMD_SCHED_PRIORITY_LOW = AMD_SCHED_PRIORITY_MIN, >> -   AMD_SCHED_PRIORITY_NORMAL, >> -   AMD_SCHED_PRIORITY_HIGH_SW, >> -   AMD_SCHED_PRIORITY_HIGH_HW, >> -   AMD_SCHED_PRIORITY_KERNEL, >> -   AMD_SCHED_PRIORITY_MAX, >> -   AMD_SCHED_PRIORITY_INVALID = -1, >> -   AMD_SCHED_PRIORITY_UNSET = -2 >> -}; >> - >>  /** >>   * One scheduler is implemented for each hardware ring >>  */ >> @@ -183,7 +184,7 @@ void amd_sched_job_kickout(struct amd_sched_job >> *s_job); >>  static inline enum amd_sched_priority >>  amd_sched_get_job_priority(struct amd_sched_job *job) >>  { >> -   return (job->s_entity->rq - job->sched->sched_rq); >> +   return job->priority; >>  } >>   #endif > >