[Public] > -----Original Message----- > From: Kuehling, Felix <Felix.Kuehling@xxxxxxx> > Sent: Thursday, February 16, 2023 6:44 PM > To: Kim, Jonathan <Jonathan.Kim@xxxxxxx>; amd- > gfx@xxxxxxxxxxxxxxxxxxxxx; dri-devel@xxxxxxxxxxxxxxxxxxxxx > Subject: Re: [PATCH 03/32] drm/amdkfd: prepare per-process debug enable > and disable > > > On 2023-01-25 14:53, Jonathan Kim wrote: > > The ROCm debugger will attach to a process to debug by PTRACE and will > > expect the KFD to prepare a process for the target PID, whether the > > target PID has opened the KFD device or not. > > > > This patch is to explicity handle this requirement. Further HW mode > > setting and runtime coordination requirements will be handled in > > following patches. > > > > In the case where the target process has not opened the KFD device, > > a new KFD process must be created for the target PID. > > The debugger as well as the target process for this case will have not > > acquired any VMs so handle process restoration to correctly account for > > this. > > > > To coordinate with HSA runtime, the debugger must be aware of the target > > process' runtime enablement status and will copy the runtime status > > information into the debugged KFD process for later query. > > > > On enablement, the debugger will subscribe to a set of exceptions where > > each exception events will notify the debugger through a pollable FIFO > > file descriptor that the debugger provides to the KFD to manage. > > Some events will be synchronously raised while other are scheduled, > > which is why a debug_event_workarea worker is initialized. > > > > Finally on process termination of either the debugger or the target, > > debugging must be disabled if it has not been done so. > > > > v3: fix typo on debug trap disable and PTRACE ATTACH relax check. > > remove unnecessary queue eviction counter reset when there's nothing > > to evict. > > change err code to EALREADY if attaching to an already attached process. > > move debug disable to release worker to avoid race with disable from > > ioctl call. > > > > v2: relax debug trap disable and PTRACE ATTACH requirement. > > > > Signed-off-by: Jonathan Kim<jonathan.kim@xxxxxxx> > > --- > > drivers/gpu/drm/amd/amdkfd/Makefile | 3 +- > > drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 88 ++++++++++++++++- > > drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 94 > +++++++++++++++++++ > > drivers/gpu/drm/amd/amdkfd/kfd_debug.h | 33 +++++++ > > .../drm/amd/amdkfd/kfd_device_queue_manager.c | 22 ++++- > > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 34 ++++++- > > drivers/gpu/drm/amd/amdkfd/kfd_process.c | 63 +++++++++---- > > 7 files changed, 308 insertions(+), 29 deletions(-) > > create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_debug.c > > create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_debug.h > > > > diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile > b/drivers/gpu/drm/amd/amdkfd/Makefile > > index e758c2a24cd0..747754428073 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/Makefile > > +++ b/drivers/gpu/drm/amd/amdkfd/Makefile > > @@ -55,7 +55,8 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ > > $(AMDKFD_PATH)/kfd_int_process_v9.o \ > > $(AMDKFD_PATH)/kfd_int_process_v11.o \ > > $(AMDKFD_PATH)/kfd_smi_events.o \ > > - $(AMDKFD_PATH)/kfd_crat.o > > + $(AMDKFD_PATH)/kfd_crat.o \ > > + $(AMDKFD_PATH)/kfd_debug.o > > > > ifneq ($(CONFIG_AMD_IOMMU_V2),) > > AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > > index d3b019e64093..ee05c2e54ef6 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > > @@ -44,6 +44,7 @@ > > #include "amdgpu_amdkfd.h" > > #include "kfd_smi_events.h" > > #include "amdgpu_dma_buf.h" > > +#include "kfd_debug.h" > > > > static long kfd_ioctl(struct file *, unsigned int, unsigned long); > > static int kfd_open(struct inode *, struct file *); > > @@ -142,10 +143,15 @@ static int kfd_open(struct inode *inode, struct > file *filep) > > return -EPERM; > > } > > > > - process = kfd_create_process(filep); > > + process = kfd_create_process(current); > > if (IS_ERR(process)) > > return PTR_ERR(process); > > > > + if (kfd_process_init_cwsr_apu(process, filep)) { > > + kfd_unref_process(process); > > + return -EFAULT; > > + } > > + > > if (kfd_is_locked()) { > > dev_dbg(kfd_device, "kfd is locked!\n" > > "process %d unreferenced", process->pasid); > > @@ -2653,6 +2659,9 @@ static int kfd_ioctl_runtime_enable(struct file > *filep, struct kfd_process *p, v > > static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process > *p, void *data) > > { > > struct kfd_ioctl_dbg_trap_args *args = data; > > + struct task_struct *thread = NULL; > > + struct pid *pid = NULL; > > + struct kfd_process *target = NULL; > > int r = 0; > > > > if (sched_policy == KFD_SCHED_POLICY_NO_HWS) { > > @@ -2660,9 +2669,71 @@ static int kfd_ioctl_set_debug_trap(struct file > *filep, struct kfd_process *p, v > > return -EINVAL; > > } > > > > + pid = find_get_pid(args->pid); > > + if (!pid) { > > + pr_debug("Cannot find pid info for %i\n", args->pid); > > + r = -ESRCH; > > + goto out; > > + } > > + > > + thread = get_pid_task(pid, PIDTYPE_PID); > > + > > + if (args->op == KFD_IOC_DBG_TRAP_ENABLE) { > > + bool create_process; > > + > > + rcu_read_lock(); > > + create_process = thread && thread != current && > ptrace_parent(thread) == current; > > + rcu_read_unlock(); > > + > > + target = create_process ? kfd_create_process(thread) : > > + kfd_lookup_process_by_pid(pid); > > + } else { > > + target = kfd_lookup_process_by_pid(pid); > > + } > > + > > + if (!target) { > > + pr_debug("Cannot find process PID %i to debug\n", args- > >pid); > > + r = -ESRCH; > > + goto out; > > + } > > + > > + /* Check if target is still PTRACED. */ > > + rcu_read_lock(); > > + if (target != p && args->op != KFD_IOC_DBG_TRAP_DISABLE > > + && ptrace_parent(target->lead_thread) != > current) { > > + pr_err("PID %i is not PTRACED and cannot be debugged\n", > args->pid); > > + r = -EPERM; > > + } > > + rcu_read_unlock(); > > + > > + if (r) > > + goto out; > > + > > + mutex_lock(&target->mutex); > > + > > + if (args->op != KFD_IOC_DBG_TRAP_ENABLE && !target- > >debug_trap_enabled) { > > + pr_err("PID %i not debug enabled for op %i\n", args->pid, > args->op); > > + r = -EINVAL; > > + goto unlock_out; > > + } > > + > > switch (args->op) { > > case KFD_IOC_DBG_TRAP_ENABLE: > > + if (target != p) > > + target->debugger_process = p; > > + > > + r = kfd_dbg_trap_enable(target, > > + args->enable.dbg_fd, > > + (void __user *)args->enable.rinfo_ptr, > > + &args->enable.rinfo_size); > > + if (!r) > > + target->exception_enable_mask = args- > >enable.exception_mask; > > + > > + pr_warn("Debug functions limited\n"); > > + break; > > case KFD_IOC_DBG_TRAP_DISABLE: > > + r = kfd_dbg_trap_disable(target); > > + break; > > case KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT: > > case KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED: > > case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE: > > @@ -2676,7 +2747,7 @@ static int kfd_ioctl_set_debug_trap(struct file > *filep, struct kfd_process *p, v > > case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO: > > case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT: > > case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT: > > - pr_warn("Debugging not supported yet\n"); > > + pr_warn("Debug op %i not supported yet\n", args->op); > > r = -EACCES; > > break; > > default: > > @@ -2684,6 +2755,19 @@ static int kfd_ioctl_set_debug_trap(struct file > *filep, struct kfd_process *p, v > > r = -EINVAL; > > } > > > > +unlock_out: > > + mutex_unlock(&target->mutex); > > + > > +out: > > + if (thread) > > + put_task_struct(thread); > > + > > + if (pid) > > + put_pid(pid); > > + > > + if (target) > > + kfd_unref_process(target); > > + > > return r; > > } > > > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c > b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c > > new file mode 100644 > > index 000000000000..f6ea6db266b4 > > --- /dev/null > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c > > @@ -0,0 +1,94 @@ > > +/* > > + * Copyright 2022 Advanced Micro Devices, Inc. > > + * > > + * Permission is hereby granted, free of charge, to any person obtaining a > > + * copy of this software and associated documentation files (the > "Software"), > > + * to deal in the Software without restriction, including without limitation > > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > > + * and/or sell copies of the Software, and to permit persons to whom the > > + * Software is furnished to do so, subject to the following conditions: > > + * > > + * The above copyright notice and this permission notice shall be included > in > > + * all copies or substantial portions of the Software. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > KIND, EXPRESS OR > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > EVENT SHALL > > + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, > DAMAGES OR > > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR > OTHERWISE, > > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR > THE USE OR > > + * OTHER DEALINGS IN THE SOFTWARE. > > + */ > > + > > +#include "kfd_debug.h" > > +#include <linux/file.h> > > + > > +void debug_event_write_work_handler(struct work_struct *work) > > +{ > > + struct kfd_process *process; > > + > > + static const char write_data = '.'; > > + loff_t pos = 0; > > + > > + process = container_of(work, > > + struct kfd_process, > > + debug_event_workarea); > > + > > + kernel_write(process->dbg_ev_file, &write_data, 1, &pos); > > +} > > + > > +int kfd_dbg_trap_disable(struct kfd_process *target) > > +{ > > + if (!target->debug_trap_enabled) > > + return 0; > > + > > + fput(target->dbg_ev_file); > > + target->dbg_ev_file = NULL; > > + > > + if (target->debugger_process) { > > + atomic_dec(&target->debugger_process- > >debugged_process_count); > > + target->debugger_process = NULL; > > + } > > + > > + target->debug_trap_enabled = false; > > + kfd_unref_process(target); > > + > > + return 0; > > +} > > + > > +int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd, > > + void __user *runtime_info, uint32_t *runtime_size) > > +{ > > + struct file *f; > > + uint32_t copy_size; > > + int r = 0; > > + > > + if (target->debug_trap_enabled) > > + return -EALREADY; > > + > > + copy_size = min((size_t)(*runtime_size), sizeof(target- > >runtime_info)); > > + > > + f = fget(fd); > > + if (!f) { > > + pr_err("Failed to get file for (%i)\n", fd); > > + return -EBADF; > > + } > > + > > + target->dbg_ev_file = f; > > + > > + /* We already hold the process reference but hold another one for > the > > + * debug session. > > + */ > > + kref_get(&target->ref); > > + target->debug_trap_enabled = true; > > + > > + if (target->debugger_process) > > + atomic_inc(&target->debugger_process- > >debugged_process_count); > > + > > + if (copy_to_user(runtime_info, (void *)&target->runtime_info, > copy_size)) > > + r = -EFAULT; > > + > > + *runtime_size = sizeof(target->runtime_info); > > + > > + return r; > > +} > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h > b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h > > new file mode 100644 > > index 000000000000..b2217eb1399c > > --- /dev/null > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h > > @@ -0,0 +1,33 @@ > > +/* > > + * Copyright 2022 Advanced Micro Devices, Inc. > > + * > > + * Permission is hereby granted, free of charge, to any person obtaining a > > + * copy of this software and associated documentation files (the > "Software"), > > + * to deal in the Software without restriction, including without limitation > > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > > + * and/or sell copies of the Software, and to permit persons to whom the > > + * Software is furnished to do so, subject to the following conditions: > > + * > > + * The above copyright notice and this permission notice shall be included > in > > + * all copies or substantial portions of the Software. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > KIND, EXPRESS OR > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > EVENT SHALL > > + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, > DAMAGES OR > > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR > OTHERWISE, > > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR > THE USE OR > > + * OTHER DEALINGS IN THE SOFTWARE. > > + */ > > + > > +#ifndef KFD_DEBUG_EVENTS_H_INCLUDED > > +#define KFD_DEBUG_EVENTS_H_INCLUDED > > + > > +#include "kfd_priv.h" > > + > > +int kfd_dbg_trap_disable(struct kfd_process *target); > > +int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd, > > + void __user *runtime_info, > > + uint32_t *runtime_info_size); > > +void debug_event_write_work_handler(struct work_struct *work); > > +#endif > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > > index c06ada0844ba..a2ac98d06e71 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > > @@ -979,6 +979,14 @@ static int evict_process_queues_cpsch(struct > device_queue_manager *dqm, > > goto out; > > > > pdd = qpd_to_pdd(qpd); > > + > > + /* The debugger creates processes that temporarily have not > acquired > > + * all VMs for all devices and has no VMs itself. > > + * Skip queue eviction on process eviction. > > + */ > > + if (!pdd->drm_priv) > > + goto out; > > + > This should be before qpd-> Sorry I didn't quite catch what you were saying here (did your comment get cutoff?). Did you mean the pdd->drm_priv check needs to go before the if (qpd->evicted++ > 0) /* already evicted, do nothing */ check? Thanks, Jon > > pr_debug_ratelimited("Evicting PASID 0x%x queues\n", > > pdd->process->pasid); > > > > @@ -1100,13 +1108,10 @@ static int restore_process_queues_cpsch(struct > device_queue_manager *dqm, > > { > > struct queue *q; > > struct kfd_process_device *pdd; > > - uint64_t pd_base; > > uint64_t eviction_duration; > > int retval = 0; > > > > pdd = qpd_to_pdd(qpd); > > - /* Retrieve PD base */ > > - pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd- > >drm_priv); > > > > dqm_lock(dqm); > > if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing > */ > > @@ -1116,12 +1121,19 @@ static int restore_process_queues_cpsch(struct > device_queue_manager *dqm, > > goto out; > > } > > > > + /* The debugger creates processes that temporarily have not > acquired > > + * all VMs for all devices and has no VMs itself. > > + * Skip queue restore on process restore. > > + */ > > + if (!pdd->drm_priv) > > + goto out; > > + > > I had a comment here that "qpd->evicted = 0;" was duplicated. It is > still needed in this case. Otherwise the process will end up being > created with all queues in an evicted state and no way to execute > anything on the GPU. > > You only need one instance of "qpd->evicted = 0;", but it needs to be in > the right place (after the vm_not_acquired label you had in v1 of this > patch). > > Regards, > Felix > > > > pr_debug_ratelimited("Restoring PASID 0x%x queues\n", > > pdd->process->pasid); > > > > /* Update PD Base in QPD */ > > - qpd->page_table_base = pd_base; > > - pr_debug("Updated PD address to 0x%llx\n", pd_base); > > + qpd->page_table_base = > amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); > > + pr_debug("Updated PD address to 0x%llx\n", qpd- > >page_table_base); > > > > /* activate all active queues on the qpd */ > > list_for_each_entry(q, &qpd->queues_list, list) { > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > > index bfa30d12406b..62b75ba28425 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > > @@ -886,19 +886,48 @@ struct kfd_process { > > */ > > unsigned long last_restore_timestamp; > > > > + /* Indicates device process is debug attached with reserved vmid. */ > > + bool debug_trap_enabled; > > + > > + /* per-process-per device debug event fd file */ > > + struct file *dbg_ev_file; > > + > > + /* If the process is a kfd debugger, we need to know so we can clean > > + * up at exit time. If a process enables debugging on itself, it does > > + * its own clean-up, so we don't set the flag here. We track this by > > + * counting the number of processes this process is debugging. > > + */ > > + atomic_t debugged_process_count; > > + > > + /* If the process is a debugged, this is the debugger process */ > > + struct kfd_process *debugger_process; > > + > > /* Kobj for our procfs */ > > struct kobject *kobj; > > struct kobject *kobj_queues; > > struct attribute attr_pasid; > > > > + /* Keep track cwsr init */ > > + bool has_cwsr; > > + > > + /* Exception code enable mask and status */ > > + uint64_t exception_enable_mask; > > + > > /* shared virtual memory registered by this process */ > > struct svm_range_list svms; > > > > bool xnack_enabled; > > > > + /* Work area for debugger event writer worker. */ > > + struct work_struct debug_event_workarea; > > + > > atomic_t poison; > > /* Queues are in paused stated because we are in the process of > doing a CRIU checkpoint */ > > bool queues_paused; > > + > > + /* Tracks runtime enable status */ > > + struct kfd_runtime_info runtime_info; > > + > > }; > > > > #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ > > @@ -928,7 +957,7 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev); > > > > int kfd_process_create_wq(void); > > void kfd_process_destroy_wq(void); > > -struct kfd_process *kfd_create_process(struct file *filep); > > +struct kfd_process *kfd_create_process(struct task_struct *thread); > > struct kfd_process *kfd_get_process(const struct task_struct *task); > > struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid); > > struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct > *mm); > > @@ -1055,6 +1084,9 @@ void kfd_process_set_trap_handler(struct > qcm_process_device *qpd, > > uint64_t tba_addr, > > uint64_t tma_addr); > > > > +/* CWSR initialization */ > > +int kfd_process_init_cwsr_apu(struct kfd_process *process, struct file > *filep); > > + > > /* CRIU */ > > /* > > * Need to increment KFD_CRIU_PRIV_VERSION each time a change is > made to any of the CRIU private > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c > b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > > index 72df6286e240..e935158ab311 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > > @@ -44,6 +44,7 @@ struct mm_struct; > > #include "kfd_iommu.h" > > #include "kfd_svm.h" > > #include "kfd_smi_events.h" > > +#include "kfd_debug.h" > > > > /* > > * List of struct kfd_process (field kfd_process). > > @@ -69,7 +70,6 @@ static struct kfd_process *find_process(const struct > task_struct *thread, > > bool ref); > > static void kfd_process_ref_release(struct kref *ref); > > static struct kfd_process *create_process(const struct task_struct > *thread); > > -static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file > *filep); > > > > static void evict_process_worker(struct work_struct *work); > > static void restore_process_worker(struct work_struct *work); > > @@ -798,18 +798,19 @@ static void > kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd) > > kfd_process_free_gpuvm(qpd->ib_mem, pdd, &qpd->ib_kaddr); > > } > > > > -struct kfd_process *kfd_create_process(struct file *filep) > > +struct kfd_process *kfd_create_process(struct task_struct *thread) > > { > > struct kfd_process *process; > > - struct task_struct *thread = current; > > int ret; > > > > - if (!thread->mm) > > + if (!(thread->mm && mmget_not_zero(thread->mm))) > > return ERR_PTR(-EINVAL); > > > > /* Only the pthreads threading model is supported. */ > > - if (thread->group_leader->mm != thread->mm) > > + if (thread->group_leader->mm != thread->mm) { > > + mmput(thread->mm); > > return ERR_PTR(-EINVAL); > > + } > > > > /* > > * take kfd processes mutex before starting of process creation > > @@ -827,10 +828,6 @@ struct kfd_process *kfd_create_process(struct file > *filep) > > if (IS_ERR(process)) > > goto out; > > > > - ret = kfd_process_init_cwsr_apu(process, filep); > > - if (ret) > > - goto out_destroy; > > - > > if (!procfs.kobj) > > goto out; > > > > @@ -864,16 +861,9 @@ struct kfd_process *kfd_create_process(struct file > *filep) > > if (!IS_ERR(process)) > > kref_get(&process->ref); > > mutex_unlock(&kfd_processes_mutex); > > + mmput(thread->mm); > > > > return process; > > - > > -out_destroy: > > - hash_del_rcu(&process->kfd_processes); > > - mutex_unlock(&kfd_processes_mutex); > > - synchronize_srcu(&kfd_processes_srcu); > > - /* kfd_process_free_notifier will trigger the cleanup */ > > - mmu_notifier_put(&process->mmu_notifier); > > - return ERR_PTR(ret); > > } > > > > struct kfd_process *kfd_get_process(const struct task_struct *thread) > > @@ -1115,6 +1105,26 @@ static void kfd_process_wq_release(struct > work_struct *work) > > struct kfd_process *p = container_of(work, struct kfd_process, > > release_work); > > > > + kfd_dbg_trap_disable(p); > > + > > + if (atomic_read(&p->debugged_process_count) > 0) { > > + struct kfd_process *target; > > + unsigned int temp; > > + int idx = srcu_read_lock(&kfd_processes_srcu); > > + > > + hash_for_each_rcu(kfd_processes_table, temp, target, > kfd_processes) { > > + if (target->debugger_process && target- > >debugger_process == p) { > > + mutex_lock(&target->mutex); > > + kfd_dbg_trap_disable(target); > > + mutex_unlock(&target->mutex); > > + if (atomic_read(&p- > >debugged_process_count) == 0) > > + break; > > + } > > + } > > + > > + srcu_read_unlock(&kfd_processes_srcu, idx); > > + } > > + > > kfd_process_dequeue_from_all_devices(p); > > pqm_uninit(&p->pqm); > > > > @@ -1200,11 +1210,14 @@ static const struct mmu_notifier_ops > kfd_process_mmu_notifier_ops = { > > .free_notifier = kfd_process_free_notifier, > > }; > > > > -static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file > *filep) > > +int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) > > { > > unsigned long offset; > > int i; > > > > + if (p->has_cwsr) > > + return 0; > > + > > for (i = 0; i < p->n_pdds; i++) { > > struct kfd_dev *dev = p->pdds[i]->dev; > > struct qcm_process_device *qpd = &p->pdds[i]->qpd; > > @@ -1233,6 +1246,8 @@ static int kfd_process_init_cwsr_apu(struct > kfd_process *p, struct file *filep) > > qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); > > } > > > > + p->has_cwsr = true; > > + > > return 0; > > } > > > > @@ -1375,6 +1390,10 @@ static struct kfd_process *create_process(const > struct task_struct *thread) > > if (err) > > goto err_event_init; > > process->is_32bit_user_mode = in_compat_syscall(); > > + process->debug_trap_enabled = false; > > + process->debugger_process = NULL; > > + process->exception_enable_mask = 0; > > + atomic_set(&process->debugged_process_count, 0); > > > > process->pasid = kfd_pasid_alloc(); > > if (process->pasid == 0) { > > @@ -1422,6 +1441,8 @@ static struct kfd_process *create_process(const > struct task_struct *thread) > > kfd_unref_process(process); > > get_task_struct(process->lead_thread); > > > > + INIT_WORK(&process->debug_event_workarea, > debug_event_write_work_handler); > > + > > return process; > > > > err_register_notifier: > > @@ -1908,8 +1929,10 @@ static void restore_process_worker(struct > work_struct *work) > > */ > > > > p->last_restore_timestamp = get_jiffies_64(); > > - ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p- > >kgd_process_info, > > - &p->ef); > > + /* VMs may not have been acquired yet during debugging. */ > > + if (p->kgd_process_info) > > + ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p- > >kgd_process_info, > > + &p->ef); > > if (ret) { > > pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d > ms\n", > > p->pasid, PROCESS_BACK_OFF_TIME_MS);