On Tue, Nov 28, 2017 at 1:29 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote: > Allow HWS to to execute multiple processes on the hardware > concurrently. The number of concurrent processes is limited by > the number of VMIDs allocated to the HWS. > > A module parameter can be used for limiting this further or turn > it off altogether (mainly for debugging purposes). > > Signed-off-by: Yong Zhao <yong.zhao at amd.com> > Signed-off-by: Jay Cornwall <Jay.Cornwall at amd.com> > Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com> > --- > drivers/gpu/drm/amd/amdkfd/kfd_device.c | 11 +++++++++ > drivers/gpu/drm/amd/amdkfd/kfd_module.c | 5 +++++ > drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 30 +++++++++++++++++++++++-- > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 9 ++++++++ > 4 files changed, 53 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c > index 4f05eac..a8fa33a 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c > @@ -238,6 +238,17 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, > kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd > - kfd->vm_info.first_vmid_kfd + 1; > > + /* Verify module parameters regarding mapped process number*/ > + if ((hws_max_conc_proc < 0) > + || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) { > + dev_err(kfd_device, > + "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n", > + hws_max_conc_proc, kfd->vm_info.vmid_num_kfd, > + kfd->vm_info.vmid_num_kfd); > + kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd; > + } else > + kfd->max_proc_per_quantum = hws_max_conc_proc; > + > /* calculate max size of mqds needed for queues */ > size = max_num_of_queues_per_device * > kfd->device_info->mqd_size_aligned; > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c > index ee8adf6..4e060c8 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c > @@ -50,6 +50,11 @@ module_param(sched_policy, int, 0444); > MODULE_PARM_DESC(sched_policy, > "Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)"); > > +int hws_max_conc_proc = 8; > +module_param(hws_max_conc_proc, int, 0444); > +MODULE_PARM_DESC(hws_max_conc_proc, > + "Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))"); > + > int cwsr_enable = 1; > module_param(cwsr_enable, int, 0444); > MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))"); > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c > index 69c147a..0b7092e 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c > @@ -57,13 +57,24 @@ static void pm_calc_rlib_size(struct packet_manager *pm, > { > unsigned int process_count, queue_count; > unsigned int map_queue_size; > + unsigned int max_proc_per_quantum = 1; > + struct kfd_dev *dev = pm->dqm->dev; > > process_count = pm->dqm->processes_count; > queue_count = pm->dqm->queue_count; > > - /* check if there is over subscription*/ > + /* check if there is over subscription > + * Note: the arbitration between the number of VMIDs and > + * hws_max_conc_proc has been done in > + * kgd2kfd_device_init(). > + */ > *over_subscription = false; > - if ((process_count > 1) || queue_count > get_queues_num(pm->dqm)) { > + > + if (dev->max_proc_per_quantum > 1) > + max_proc_per_quantum = dev->max_proc_per_quantum; > + > + if ((process_count > max_proc_per_quantum) || > + queue_count > get_queues_num(pm->dqm)) { > *over_subscription = true; > pr_debug("Over subscribed runlist\n"); > } > @@ -116,10 +127,24 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, > uint64_t ib, size_t ib_size_in_dwords, bool chain) > { > struct pm4_mes_runlist *packet; > + int concurrent_proc_cnt = 0; > + struct kfd_dev *kfd = pm->dqm->dev; > > if (WARN_ON(!ib)) > return -EFAULT; > > + /* Determine the number of processes to map together to HW: > + * it can not exceed the number of VMIDs available to the > + * scheduler, and it is determined by the smaller of the number > + * of processes in the runlist and kfd module parameter > + * hws_max_conc_proc. > + * Note: the arbitration between the number of VMIDs and > + * hws_max_conc_proc has been done in > + * kgd2kfd_device_init(). > + */ > + concurrent_proc_cnt = min(pm->dqm->processes_count, > + kfd->max_proc_per_quantum); > + > packet = (struct pm4_mes_runlist *)buffer; > > memset(buffer, 0, sizeof(struct pm4_mes_runlist)); > @@ -130,6 +155,7 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, > packet->bitfields4.chain = chain ? 1 : 0; > packet->bitfields4.offload_polling = 0; > packet->bitfields4.valid = 1; > + packet->bitfields4.process_cnt = concurrent_proc_cnt; > packet->ordinal2 = lower_32_bits(ib); > packet->bitfields3.ib_base_hi = upper_32_bits(ib); > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > index a668764..1edab21 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > @@ -88,6 +88,12 @@ extern int max_num_of_queues_per_device; > /* Kernel module parameter to specify the scheduling policy */ > extern int sched_policy; > > +/* > + * Kernel module parameter to specify the maximum process > + * number per HW scheduler > + */ > +extern int hws_max_conc_proc; > + > extern int cwsr_enable; > > /* > @@ -214,6 +220,9 @@ struct kfd_dev { > /* Debug manager */ > struct kfd_dbgmgr *dbgmgr; > > + /* Maximum process number mapped to HW scheduler */ > + unsigned int max_proc_per_quantum; > + > /* CWSR */ > bool cwsr_enabled; > const void *cwsr_isa; > -- > 2.7.4 > This patch is: Acked-by: Oded Gabbay <oded.gabbay at gmail.com>