On Sat, Aug 12, 2017 at 7:47 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote: > From: Moses Reuben <moses.reuben at amd.com> > > Signed-off-by: Moses Reuben <moses.reuben at amd.com> > Signed-off-by: Ben Goz <ben.goz at amd.com> > Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com> > --- > drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 44 ++++++++++++++++++++++ > drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 ++ > .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 3 ++ > .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c | 2 + > .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c | 2 + > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + > include/uapi/linux/kfd_ioctl.h | 19 +++++++++- > 7 files changed, 73 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > index 7d78119..6be1bba 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > @@ -848,6 +848,47 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, > > return err; > } > +static int kfd_ioctl_alloc_scratch_memory(struct file *filep, > + struct kfd_process *p, void *data) > +{ > + struct kfd_ioctl_alloc_memory_of_scratch_args *args = data; > + struct kfd_process_device *pdd; > + struct kfd_dev *dev; > + long err; > + > + if (args->size == 0) > + return -EINVAL; > + > + dev = kfd_device_by_id(args->gpu_id); > + if (!dev) > + return -EINVAL; > + > + mutex_lock(&p->mutex); > + > + pdd = kfd_bind_process_to_device(dev, p); > + if (IS_ERR(pdd)) { > + err = PTR_ERR(pdd); > + goto bind_process_to_device_fail; > + } > + > + pdd->qpd.sh_hidden_private_base = args->va_addr; > + > + mutex_unlock(&p->mutex); > + > + if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0) { > + err = dev->kfd2kgd->alloc_memory_of_scratch( > + dev->kgd, args->va_addr, pdd->qpd.vmid); > + if (err) > + goto alloc_memory_of_scratch_failed; > + } > + > + return 0; > + > +bind_process_to_device_fail: > + mutex_unlock(&p->mutex); > +alloc_memory_of_scratch_failed: > + return -EFAULT; Should be return err to reflect actual error code; > +} > > #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ > [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ > @@ -902,6 +943,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { > > AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL, > kfd_ioctl_dbg_wave_control, 0), > + > + AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH, > + kfd_ioctl_alloc_scratch_memory, 0), > }; > > #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c > index e790e7f..33c43b7 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c > @@ -304,6 +304,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, > > kfd->dbgmgr = NULL; > > + /* Initialize scratch memory access */ > + kfd->kfd2kgd->write_config_static_mem(kfd->kgd, true, 1, 3, 0); > + > kfd->init_complete = true; > dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor, > kfd->pdev->device); > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > index 3891fe5..7bd8c39f 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > @@ -268,6 +268,9 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, > pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", > q->pipe, q->queue); > > + dqm->dev->kfd2kgd->alloc_memory_of_scratch( > + dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid); > + > retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties, > q->process->mm); > if (retval) > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c > index fadc56a..72c3cba 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c > @@ -24,6 +24,7 @@ > #include "kfd_device_queue_manager.h" > #include "cik_regs.h" > #include "oss/oss_2_4_sh_mask.h" > +#include "gca/gfx_7_2_sh_mask.h" > > static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, > struct qcm_process_device *qpd, > @@ -123,6 +124,7 @@ static int register_process_cik(struct device_queue_manager *dqm, > } else { > temp = get_sh_mem_bases_nybble_64(pdd); > qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); > + qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__PRIVATE_ATC__SHIFT; > } > > pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c > index 15e81ae..40e9ddd 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c > @@ -135,6 +135,8 @@ static int register_process_vi(struct device_queue_manager *dqm, > qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); > qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA64 << > SH_MEM_CONFIG__ADDRESS_MODE__SHIFT; > + qpd->sh_mem_config |= 1 << > + SH_MEM_CONFIG__PRIVATE_ATC__SHIFT; > } > > pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > index 30ce92c..b397ec7 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > @@ -432,6 +432,7 @@ struct qcm_process_device { > uint32_t gds_size; > uint32_t num_gws; > uint32_t num_oac; > + uint32_t sh_hidden_private_base; > }; > > /* Data that is per-process-per device. */ > diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h > index d683342..3dd86e1 100644 > --- a/include/uapi/linux/kfd_ioctl.h > +++ b/include/uapi/linux/kfd_ioctl.h > @@ -232,6 +232,13 @@ struct kfd_ioctl_wait_events_args { > uint32_t wait_result; /* from KFD */ > }; > > +struct kfd_ioctl_alloc_memory_of_scratch_args { > + uint64_t va_addr; /* to KFD */ > + uint64_t size; /* to KFD */ > + uint32_t gpu_id; /* to KFD */ > + uint32_t pad; > +}; > + > #define AMDKFD_IOCTL_BASE 'K' > #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) > #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) > @@ -286,7 +293,17 @@ struct kfd_ioctl_wait_events_args { > #define AMDKFD_IOC_DBG_WAVE_CONTROL \ > AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args) > > +/* TODO: > + * - AMDKFD_IOC_ALLOC_MEMORY_OF_GPU > + * - AMDKFD_IOC_FREE_MEMORY_OF_GPU > + * - AMDKFD_IOC_MAP_MEMORY_TO_GPU > + * - AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU > + */ > + > +#define AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH \ > + AMDKFD_IOWR(0x15, struct kfd_ioctl_alloc_memory_of_scratch_args) > + > #define AMDKFD_COMMAND_START 0x01 > -#define AMDKFD_COMMAND_END 0x11 > +#define AMDKFD_COMMAND_END 0x16 You create a hole here, between 0x11 to 0x16. This would make the sanity check in kfd_ioctl() to be useless. Also, why not do a generic IOCTL for allocating GPU memory, and pass parameter if its scratch or something else, similar to amdgpu's "AMDGPU_GEM_DOMAIN_*" defines ? > > #endif > -- > 2.7.4 >