RE: [PATCH 15/32] drm/amdkfd: prepare trap workaround for gfx11

"Kim, Jonathan" <Jonathan.Kim@xxxxxxx> · Thu, 23 Mar 2023 13:50:40 +0000

[Public]

> -----Original Message-----
> From: Kuehling, Felix <Felix.Kuehling@xxxxxxx>
> Sent: Monday, March 20, 2023 5:50 PM
> To: Kim, Jonathan <Jonathan.Kim@xxxxxxx>; amd-
> gfx@xxxxxxxxxxxxxxxxxxxxx; dri-devel@xxxxxxxxxxxxxxxxxxxxx
> Subject: Re: [PATCH 15/32] drm/amdkfd: prepare trap workaround for gfx11
>
>
> On 2023-01-25 14:53, Jonathan Kim wrote:
> > Due to a HW bug, waves in only half the shader arrays can enter trap.
> >
> > When starting a debug session, relocate all waves to the first shader
> > array of each shader engine and mask off the 2nd shader array as
> > unavailable.
> >
> > When ending a debug session, re-enable the 2nd shader array per
> > shader engine.
> >
> > User CU masking per queue cannot be guaranteed to remain functional
> > if requested during debugging (e.g. user cu mask requests only 2nd shader
> > array as an available resource leading to zero HW resources available)
> > nor can runtime be alerted of any of these changes during execution.
> >
> > Make user CU masking and debugging mutual exclusive with respect to
> > availability.
> >
> > If the debugger tries to attach to a process with a user cu masked
> > queue, return the runtime status as enabled but busy.
> >
> > If the debugger tries to attach and fails to reallocate queue waves to
> > the first shader array of each shader engine, return the runtime status
> > as enabled but with an error.
> >
> > In addition, like any other mutli-process debug supported devices,
> > disable trap temporary setup per-process to avoid performance impact
> from
> > setup overhead.
> >
> > Signed-off-by: Jonathan Kim <jonathan.kim@xxxxxxx>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h       |  2 +
> >   drivers/gpu/drm/amd/amdgpu/mes_v11_0.c        |  7 +-
> >   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      |  2 -
> >   drivers/gpu/drm/amd/amdkfd/kfd_debug.c        | 64
> +++++++++++++++++++
> >   drivers/gpu/drm/amd/amdkfd/kfd_debug.h        |  3 +-
> >   .../drm/amd/amdkfd/kfd_device_queue_manager.c |  7 ++
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c  |  3 +-
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  |  3 +-
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  | 42 ++++++++----
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |  3 +-
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c   |  3 +-
> >   drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  5 +-
> >   .../amd/amdkfd/kfd_process_queue_manager.c    |  9 ++-
> >   13 files changed, 124 insertions(+), 29 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > index d20df0cf0d88..b5f5eed2b5ef 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > @@ -219,6 +219,8 @@ struct mes_add_queue_input {
> >     uint32_t        gws_size;
> >     uint64_t        tba_addr;
> >     uint64_t        tma_addr;
> > +   uint32_t        trap_en;
> > +   uint32_t        skip_process_ctx_clear;
> >     uint32_t        is_kfd_process;
> >     uint32_t        is_aql_queue;
> >     uint32_t        queue_size;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> > index fbacdc42efac..38c7a0cbf264 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> > @@ -197,17 +197,14 @@ static int mes_v11_0_add_hw_queue(struct
> amdgpu_mes *mes,
> >     mes_add_queue_pkt.gws_size = input->gws_size;
> >     mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
> >     mes_add_queue_pkt.tma_addr = input->tma_addr;
> > +   mes_add_queue_pkt.trap_en = input->trap_en;
> > +   mes_add_queue_pkt.skip_process_ctx_clear = input-
> >skip_process_ctx_clear;
> >     mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
> >
> >     /* For KFD, gds_size is re-used for queue size (needed in MES for AQL
> queues) */
> >     mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
> >     mes_add_queue_pkt.gds_size = input->queue_size;
> >
> > -   if (!(((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >=
> 4) &&
> > -             (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
> &&
> > -             (adev->ip_versions[GC_HWIP][0] <= IP_VERSION(11, 0, 3))))
> > -           mes_add_queue_pkt.trap_en = 1;
> > -
> >     /* For KFD, gds_size is re-used for queue size (needed in MES for AQL
> queues) */
> >     mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
> >     mes_add_queue_pkt.gds_size = input->queue_size;
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > index ee05c2e54ef6..f5f639de28f0 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > @@ -530,8 +530,6 @@ static int kfd_ioctl_set_cu_mask(struct file *filp,
> struct kfd_process *p,
> >             goto out;
> >     }
> >
> > -   minfo.update_flag = UPDATE_FLAG_CU_MASK;
> > -
> >     mutex_lock(&p->mutex);
> >
> >     retval = pqm_update_mqd(&p->pqm, args->queue_id, &minfo);
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> > index f6ea6db266b4..6e99a0160275 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> > @@ -37,6 +37,70 @@ void debug_event_write_work_handler(struct
> work_struct *work)
> >     kernel_write(process->dbg_ev_file, &write_data, 1, &pos);
> >   }
> >
> > +static int kfd_dbg_set_queue_workaround(struct queue *q, bool enable)
> > +{
> > +   struct mqd_update_info minfo = {0};
> > +   int err;
> > +
> > +   if (!q || (!q->properties.is_dbg_wa && !enable))
>
> Should this condition be:
>
>      if (!q || q->properties.is_dbg_wa != enable)

The latter part should probably be q->properties.is_dbg_wa == enable.  q->properties.is_dbg_wa != enable would always skip a request to change the queue's current workaround state.
I think we can just drop the latter half of this test condition as a redundant queue workaround update is harmless.
It's a static call from a process wide call and the process wide call is static itself and only gets called twice, once on attach and once on detach.

Thanks,

Jon

>
>
> > +           return 0;
> > +
> > +   if (KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) ||
> > +                   KFD_GC_VERSION(q->device) >= IP_VERSION(12, 0, 0))
>
> Indentation. It would be more readable if the KFD_GC_VERSIONs were
> aligned.
>
>
> > +           return 0;
> > +
> > +   if (enable && q->properties.is_user_cu_masked)
> > +           return -EBUSY;
> > +
> > +   minfo.update_flag = enable ? UPDATE_FLAG_DBG_WA_ENABLE :
> UPDATE_FLAG_DBG_WA_DISABLE;
> > +
> > +   q->properties.is_dbg_wa = enable;
> > +   err = q->device->dqm->ops.update_queue(q->device->dqm, q,
> &minfo);
> > +   if (err)
> > +           q->properties.is_dbg_wa = false;
> > +
> > +   return err;
> > +}
> > +
> > +static int kfd_dbg_set_workaround(struct kfd_process *target, bool
> enable)
> > +{
> > +   struct process_queue_manager *pqm = &target->pqm;
> > +   struct process_queue_node *pqn;
> > +   int r = 0;
> > +
> > +   list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
> > +           r = kfd_dbg_set_queue_workaround(pqn->q, enable);
> > +           if (enable && r)
> > +                   goto unwind;
> > +   }
> > +
> > +   return 0;
> > +
> > +unwind:
> > +   list_for_each_entry(pqn, &pqm->queues, process_queue_list)
> > +           kfd_dbg_set_queue_workaround(pqn->q, false);
> > +
> > +   if (enable) {
> > +           target->runtime_info.runtime_state = r == -EBUSY ?
> > +                           DEBUG_RUNTIME_STATE_ENABLED_BUSY :
> > +                           DEBUG_RUNTIME_STATE_ENABLED_ERROR;
> > +   }
>
> Braces are not needed here.
>
>
> > +
> > +   return r;
> > +}
> > +
> > +static int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd)
> > +{
> > +   uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd-
> >spi_dbg_launch_mode;
> > +   uint32_t flags = pdd->process->dbg_flags;
> > +
> > +   if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
> > +           return 0;
> > +
> > +   return amdgpu_mes_set_shader_debugger(pdd->dev->adev, pdd-
> >proc_ctx_gpu_addr, spi_dbg_cntl,
> > +                                           pdd->watch_points, flags);
> > +}
> > +
>
> You're adding some unused static functions here. This will cause compile
> warnings until the patch that starts using them. You could avoid this by
> reordering this and the next patch and moving the function calls into
> this patch. That would also make it more obvious where the workaround
> plugs into the debug code.
>
> Regards,
>    Felix
>
>
> >   int kfd_dbg_trap_disable(struct kfd_process *target)
> >   {
> >     if (!target->debug_trap_enabled)
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> > index 53c5a3e55bd2..0c09f1729325 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> > @@ -35,7 +35,8 @@ int kfd_dbg_trap_enable(struct kfd_process *target,
> uint32_t fd,
> >
> >   static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_dev *dev)
> >   {
> > -   return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2);
> > +   return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
> > +          KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0);
> >   }
> >
> >   void debug_event_write_work_handler(struct work_struct *work);
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > index 2517716d7cbc..be1985b87ea7 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > @@ -214,6 +214,10 @@ static int add_queue_mes(struct
> device_queue_manager *dqm, struct queue *q,
> >     queue_input.paging = false;
> >     queue_input.tba_addr = qpd->tba_addr;
> >     queue_input.tma_addr = qpd->tma_addr;
> > +   queue_input.trap_en = KFD_GC_VERSION(q->device) <
> IP_VERSION(11, 0, 0) ||
> > +                         KFD_GC_VERSION(q->device) >= IP_VERSION(12, 0,
> 0) ||
> > +                         q->properties.is_dbg_wa;
> > +   queue_input.skip_process_ctx_clear = qpd->pqm->process-
> >debug_trap_enabled;
> >
> >     queue_type = convert_to_mes_queue_type(q->properties.type);
> >     if (queue_type < 0) {
> > @@ -1679,6 +1683,9 @@ static int create_queue_cpsch(struct
> device_queue_manager *dqm, struct queue *q,
> >      * updates the is_evicted flag but is a no-op otherwise.
> >      */
> >     q->properties.is_evicted = !!qpd->evicted;
> > +   q->properties.is_dbg_wa = qpd->pqm->process-
> >debug_trap_enabled &&
> > +                   KFD_GC_VERSION(q->device) >= IP_VERSION(11, 0, 0)
> &&
> > +                   KFD_GC_VERSION(q->device) < IP_VERSION(12, 0, 0);
> >
> >     if (qd)
> >             mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q-
> >mqd_mem_obj, &q->gart_mqd_addr,
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
> > index 4889865c725c..c2a7226fc588 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
> > @@ -48,8 +48,7 @@ static void update_cu_mask(struct mqd_manager
> *mm, void *mqd,
> >     struct cik_mqd *m;
> >     uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
> >
> > -   if (!minfo || (minfo->update_flag != UPDATE_FLAG_CU_MASK) ||
> > -       !minfo->cu_mask.ptr)
> > +   if (!minfo || !minfo->cu_mask.ptr)
> >             return;
> >
> >     mqd_symmetrically_map_cu_mask(mm,
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
> > index cb484ace17de..8248e77751e7 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
> > @@ -48,8 +48,7 @@ static void update_cu_mask(struct mqd_manager
> *mm, void *mqd,
> >     struct v10_compute_mqd *m;
> >     uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
> >
> > -   if (!minfo || (minfo->update_flag != UPDATE_FLAG_CU_MASK) ||
> > -       !minfo->cu_mask.ptr)
> > +   if (!minfo || !minfo->cu_mask.ptr)
> >             return;
> >
> >     mqd_symmetrically_map_cu_mask(mm,
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
> > index ac7c8fc83c94..18ab613e787c 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
> > @@ -46,15 +46,33 @@ static void update_cu_mask(struct mqd_manager
> *mm, void *mqd,
> >   {
> >     struct v11_compute_mqd *m;
> >     uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
> > +   bool has_wa_flag = minfo && (minfo->update_flag &
> (UPDATE_FLAG_DBG_WA_ENABLE |
> > +                   UPDATE_FLAG_DBG_WA_DISABLE));
> >
> > -   if (!minfo || (minfo->update_flag != UPDATE_FLAG_CU_MASK) ||
> > -       !minfo->cu_mask.ptr)
> > +   if (!minfo || !(has_wa_flag || minfo->cu_mask.ptr))
> >             return;
> >
> > +   m = get_mqd(mqd);
> > +
> > +   if (has_wa_flag) {
> > +           uint32_t wa_mask = minfo->update_flag ==
> UPDATE_FLAG_DBG_WA_ENABLE ?
> > +                                           0xffff : 0xffffffff;
> > +
> > +           m->compute_static_thread_mgmt_se0 = wa_mask;
> > +           m->compute_static_thread_mgmt_se1 = wa_mask;
> > +           m->compute_static_thread_mgmt_se2 = wa_mask;
> > +           m->compute_static_thread_mgmt_se3 = wa_mask;
> > +           m->compute_static_thread_mgmt_se4 = wa_mask;
> > +           m->compute_static_thread_mgmt_se5 = wa_mask;
> > +           m->compute_static_thread_mgmt_se6 = wa_mask;
> > +           m->compute_static_thread_mgmt_se7 = wa_mask;
> > +
> > +           return;
> > +   }
> > +
> >     mqd_symmetrically_map_cu_mask(mm,
> >             minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
> >
> > -   m = get_mqd(mqd);
> >     m->compute_static_thread_mgmt_se0 = se_mask[0];
> >     m->compute_static_thread_mgmt_se1 = se_mask[1];
> >     m->compute_static_thread_mgmt_se2 = se_mask[2];
> > @@ -109,6 +127,7 @@ static void init_mqd(struct mqd_manager *mm,
> void **mqd,
> >     uint64_t addr;
> >     struct v11_compute_mqd *m;
> >     int size;
> > +   uint32_t wa_mask = q->is_dbg_wa ? 0xffff : 0xffffffff;
> >
> >     m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr;
> >     addr = mqd_mem_obj->gpu_addr;
> > @@ -122,14 +141,15 @@ static void init_mqd(struct mqd_manager *mm,
> void **mqd,
> >
> >     m->header = 0xC0310800;
> >     m->compute_pipelinestat_enable = 1;
> > -   m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
> > -   m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
> > -   m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
> > -   m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
> > -   m->compute_static_thread_mgmt_se4 = 0xFFFFFFFF;
> > -   m->compute_static_thread_mgmt_se5 = 0xFFFFFFFF;
> > -   m->compute_static_thread_mgmt_se6 = 0xFFFFFFFF;
> > -   m->compute_static_thread_mgmt_se7 = 0xFFFFFFFF;
> > +
> > +   m->compute_static_thread_mgmt_se0 = wa_mask;
> > +   m->compute_static_thread_mgmt_se1 = wa_mask;
> > +   m->compute_static_thread_mgmt_se2 = wa_mask;
> > +   m->compute_static_thread_mgmt_se3 = wa_mask;
> > +   m->compute_static_thread_mgmt_se4 = wa_mask;
> > +   m->compute_static_thread_mgmt_se5 = wa_mask;
> > +   m->compute_static_thread_mgmt_se6 = wa_mask;
> > +   m->compute_static_thread_mgmt_se7 = wa_mask;
> >
> >     m->cp_hqd_persistent_state =
> CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
> >                     0x55 <<
> CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> > index 86f1cf090246..50da16dd4c96 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> > @@ -49,8 +49,7 @@ static void update_cu_mask(struct mqd_manager
> *mm, void *mqd,
> >     struct v9_mqd *m;
> >     uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
> >
> > -   if (!minfo || (minfo->update_flag != UPDATE_FLAG_CU_MASK) ||
> > -       !minfo->cu_mask.ptr)
> > +   if (!minfo || !minfo->cu_mask.ptr)
> >             return;
> >
> >     mqd_symmetrically_map_cu_mask(mm,
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> > index 530ba6f5b57e..58b40bff3e0c 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> > @@ -51,8 +51,7 @@ static void update_cu_mask(struct mqd_manager
> *mm, void *mqd,
> >     struct vi_mqd *m;
> >     uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
> >
> > -   if (!minfo || (minfo->update_flag != UPDATE_FLAG_CU_MASK) ||
> > -       !minfo->cu_mask.ptr)
> > +   if (!minfo || !minfo->cu_mask.ptr)
> >             return;
> >
> >     mqd_symmetrically_map_cu_mask(mm,
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> > index 8f1e2f9023db..75521d96e937 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> > @@ -479,6 +479,8 @@ struct queue_properties {
> >     bool is_evicted;
> >     bool is_active;
> >     bool is_gws;
> > +   bool is_dbg_wa;
> > +   bool is_user_cu_masked;
> >     /* Not relevant for user mode queues in cp scheduling */
> >     unsigned int vmid;
> >     /* Relevant only for sdma queues*/
> > @@ -501,7 +503,8 @@ struct queue_properties {
> >                         !(q).is_evicted)
> >
> >   enum mqd_update_flag {
> > -   UPDATE_FLAG_CU_MASK = 0,
> > +   UPDATE_FLAG_DBG_WA_ENABLE = 1,
> > +   UPDATE_FLAG_DBG_WA_DISABLE = 2,
> >   };
> >
> >   struct mqd_update_info {
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> > index 5137476ec18e..d8f032214481 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> > @@ -498,8 +498,12 @@ int pqm_update_mqd(struct
> process_queue_manager *pqm,
> >             return -EFAULT;
> >     }
> >
> > +   /* CUs are masked for debugger requirements so deny user mask  */
> > +   if (pqn->q->properties.is_dbg_wa && minfo && minfo->cu_mask.ptr)
> > +           return -EBUSY;
> > +
> >     /* ASICs that have WGPs must enforce pairwise enabled mask
> checks. */
> > -   if (minfo && minfo->update_flag == UPDATE_FLAG_CU_MASK &&
> minfo->cu_mask.ptr &&
> > +   if (minfo && minfo->cu_mask.ptr &&
> >                     KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(10,
> 0, 0)) {
> >             int i;
> >
> > @@ -518,6 +522,9 @@ int pqm_update_mqd(struct
> process_queue_manager *pqm,
> >     if (retval != 0)
> >             return retval;
> >
> > +   if (minfo && minfo->cu_mask.ptr)
> > +           pqn->q->properties.is_user_cu_masked = true;
> > +
> >     return 0;
> >   }
> >