On Wed, Apr 11, 2018 at 12:33 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote: > From: Harish Kasiviswanathan <Harish.Kasiviswanathan at amd.com> > > Use bit-rotate for better clarity and remove _MASK from the #defines as > these represent mmap types. > > Centralize all the parsing of the mmap offset in kfd_mmap and add device > parameter to doorbell and reserved_mem map functions. > > Encode gpu_id into upper bits of vm_pgoff. This frees up the lower bits > for encoding the the doorbell ID on Vega10. > > Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan at amd.com> > Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com> > --- > drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 35 ++++++++++++++++++---------- > drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 9 ++------ > drivers/gpu/drm/amd/amdkfd/kfd_events.c | 2 +- > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 38 ++++++++++++++++++++++++------- > drivers/gpu/drm/amd/amdkfd/kfd_process.c | 8 +++---- > 5 files changed, 59 insertions(+), 33 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > index b5e5f0e..f6b35f4 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > @@ -292,7 +292,8 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, > > > /* Return gpu_id as doorbell offset for mmap usage */ > - args->doorbell_offset = (KFD_MMAP_DOORBELL_MASK | args->gpu_id); > + args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL; > + args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id); > args->doorbell_offset <<= PAGE_SHIFT; > > mutex_unlock(&p->mutex); > @@ -1645,23 +1646,33 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) > static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) > { > struct kfd_process *process; > + struct kfd_dev *dev = NULL; > + unsigned long vm_pgoff; > + unsigned int gpu_id; > > process = kfd_get_process(current); > if (IS_ERR(process)) > return PTR_ERR(process); > > - if ((vma->vm_pgoff & KFD_MMAP_DOORBELL_MASK) == > - KFD_MMAP_DOORBELL_MASK) { > - vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_DOORBELL_MASK; > - return kfd_doorbell_mmap(process, vma); > - } else if ((vma->vm_pgoff & KFD_MMAP_EVENTS_MASK) == > - KFD_MMAP_EVENTS_MASK) { > - vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK; > + vm_pgoff = vma->vm_pgoff; > + vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff); > + gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff); > + if (gpu_id) > + dev = kfd_device_by_id(gpu_id); > + > + switch (vm_pgoff & KFD_MMAP_TYPE_MASK) { > + case KFD_MMAP_TYPE_DOORBELL: > + if (!dev) > + return -ENODEV; > + return kfd_doorbell_mmap(dev, process, vma); > + > + case KFD_MMAP_TYPE_EVENTS: > return kfd_event_mmap(process, vma); > - } else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) == > - KFD_MMAP_RESERVED_MEM_MASK) { > - vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK; > - return kfd_reserved_mem_mmap(process, vma); > + > + case KFD_MMAP_TYPE_RESERVED_MEM: > + if (!dev) > + return -ENODEV; > + return kfd_reserved_mem_mmap(dev, process, vma); > } > > return -EFAULT; > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c > index 4840314..efc59de 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c > @@ -126,15 +126,10 @@ void kfd_doorbell_fini(struct kfd_dev *kfd) > iounmap(kfd->doorbell_kernel_ptr); > } > > -int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) > +int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, > + struct vm_area_struct *vma) > { > phys_addr_t address; > - struct kfd_dev *dev; > - > - /* Find kfd device according to gpu id */ > - dev = kfd_device_by_id(vma->vm_pgoff); > - if (!dev) > - return -EINVAL; > > /* > * For simplicitly we only allow mapping of the entire doorbell > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c > index 4890a90..bccf2f7 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c > @@ -345,7 +345,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, > case KFD_EVENT_TYPE_DEBUG: > ret = create_signal_event(devkfd, p, ev); > if (!ret) { > - *event_page_offset = KFD_MMAP_EVENTS_MASK; > + *event_page_offset = KFD_MMAP_TYPE_EVENTS; > *event_page_offset <<= PAGE_SHIFT; > *event_slot_index = ev->event_id; > } > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > index d9c0fe12..2d575c0 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > @@ -41,9 +41,33 @@ > > #define KFD_SYSFS_FILE_MODE 0444 > > -#define KFD_MMAP_DOORBELL_MASK 0x8000000000000ull > -#define KFD_MMAP_EVENTS_MASK 0x4000000000000ull > -#define KFD_MMAP_RESERVED_MEM_MASK 0x2000000000000ull > +/* GPU ID hash width in bits */ > +#define KFD_GPU_ID_HASH_WIDTH 16 > + > +/* Use upper bits of mmap offset to store KFD driver specific information. > + * BITS[63:62] - Encode MMAP type > + * BITS[61:46] - Encode gpu_id. To identify to which GPU the offset belongs to > + * BITS[45:0] - MMAP offset value > + * > + * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these > + * defines are w.r.t to PAGE_SIZE > + */ > +#define KFD_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT) > +#define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT) > +#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT) > +#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT) > +#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT) Isn't this new definition breaks existing user-space library (kfd thunk) ? If that is the case we have a problem here. Oded > + > +#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT) > +#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \ > + << KFD_MMAP_GPU_ID_SHIFT) > +#define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\ > + & KFD_MMAP_GPU_ID_MASK) > +#define KFD_MMAP_GPU_ID_GET(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \ > + >> KFD_MMAP_GPU_ID_SHIFT) > + > +#define KFD_MMAP_OFFSET_VALUE_MASK (0x3FFFFFFFFFFFULL >> PAGE_SHIFT) > +#define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK) > > /* > * When working with cp scheduler we should assign the HIQ manually or via > @@ -55,9 +79,6 @@ > #define KFD_CIK_HIQ_PIPE 4 > #define KFD_CIK_HIQ_QUEUE 0 > > -/* GPU ID hash width in bits */ > -#define KFD_GPU_ID_HASH_WIDTH 16 > - > /* Macro for allocating structures */ > #define kfd_alloc_struct(ptr_to_struct) \ > ((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL)) > @@ -698,7 +719,7 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, > struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, > struct kfd_process *p); > > -int kfd_reserved_mem_mmap(struct kfd_process *process, > +int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, > struct vm_area_struct *vma); > > /* KFD process API for creating and translating handles */ > @@ -728,7 +749,8 @@ void kfd_pasid_free(unsigned int pasid); > /* Doorbells */ > int kfd_doorbell_init(struct kfd_dev *kfd); > void kfd_doorbell_fini(struct kfd_dev *kfd); > -int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma); > +int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, > + struct vm_area_struct *vma); > void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, > unsigned int *doorbell_off); > void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr); > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > index 2791e72..131fe2a 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > @@ -451,7 +451,8 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) > if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base) > continue; > > - offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT; > + offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id)) > + << PAGE_SHIFT; > qpd->tba_addr = (int64_t)vm_mmap(filep, 0, > KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, > MAP_SHARED, offset); > @@ -989,15 +990,12 @@ int kfd_resume_all_processes(void) > return ret; > } > > -int kfd_reserved_mem_mmap(struct kfd_process *process, > +int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, > struct vm_area_struct *vma) > { > - struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff); > struct kfd_process_device *pdd; > struct qcm_process_device *qpd; > > - if (!dev) > - return -EINVAL; > if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) { > pr_err("Incorrect CWSR mapping size.\n"); > return -EINVAL; > -- > 2.7.4 >