On 21/07/14 02:02, Jerome Glisse wrote: > On Thu, Jul 17, 2014 at 04:29:18PM +0300, Oded Gabbay wrote: >> From: Andrew Lewycky <Andrew.Lewycky@xxxxxxx> >> >> This patch adds the process module and 4 helper modules: >> >> - kfd_process, which handles process which open /dev/kfd >> - kfd_doorbell, which provides helper functions for doorbell allocation, release and mapping to userspace >> - kfd_pasid, which provides helper functions for pasid allocation and release >> - kfd_vidmem, which provides helper functions for allocation and release of memory from the gfx driver >> - kfd_aperture, which provides helper functions for managing the LDS, Local GPU memory and Scratch memory apertures of the process >> >> This patch only contains the basic kfd_process module, which doesn't contain the reference to the queue scheduler. This was done to allow easier code review. >> >> Also, this patch doesn't contain the calls to the IOMMU driver for binding the pasid to the device. Again, this was done to allow easier code review >> >> The kfd_process object is created when a process opens /dev/kfd and is closed when the mm_struct of that process is teared-down. > > So i valid argument were made to have one file per device and because this is not > a common hsa architecture i am rather reluctant to add the /dev/kfd directory just > for a temporary solution until people inside the HSA foundation get there act to- > gether and work on a common API. > > So i rather have all kfd temporary solution inside the radeon driver under the > drm folder. I think we have enough ioctl left to accomodate you. > >> >> Signed-off-by: Andrew Lewycky <Andrew.Lewycky@xxxxxxx> >> Signed-off-by: Oded Gabbay <oded.gabbay@xxxxxxx> >> --- >> drivers/gpu/drm/radeon/amdkfd/Makefile | 4 +- >> drivers/gpu/drm/radeon/amdkfd/kfd_aperture.c | 123 +++++++++ >> drivers/gpu/drm/radeon/amdkfd/kfd_chardev.c | 36 ++- >> drivers/gpu/drm/radeon/amdkfd/kfd_device.c | 2 + >> drivers/gpu/drm/radeon/amdkfd/kfd_doorbell.c | 264 +++++++++++++++++++ >> drivers/gpu/drm/radeon/amdkfd/kfd_module.c | 22 ++ >> drivers/gpu/drm/radeon/amdkfd/kfd_pasid.c | 97 +++++++ >> drivers/gpu/drm/radeon/amdkfd/kfd_priv.h | 148 +++++++++++ >> drivers/gpu/drm/radeon/amdkfd/kfd_process.c | 374 +++++++++++++++++++++++++++ >> drivers/gpu/drm/radeon/amdkfd/kfd_vidmem.c | 96 +++++++ >> 10 files changed, 1163 insertions(+), 3 deletions(-) >> create mode 100644 drivers/gpu/drm/radeon/amdkfd/kfd_aperture.c >> create mode 100644 drivers/gpu/drm/radeon/amdkfd/kfd_doorbell.c >> create mode 100644 drivers/gpu/drm/radeon/amdkfd/kfd_pasid.c >> create mode 100644 drivers/gpu/drm/radeon/amdkfd/kfd_process.c >> create mode 100644 drivers/gpu/drm/radeon/amdkfd/kfd_vidmem.c >> >> diff --git a/drivers/gpu/drm/radeon/amdkfd/Makefile b/drivers/gpu/drm/radeon/amdkfd/Makefile >> index 08ecfcd..daf75a8 100644 >> --- a/drivers/gpu/drm/radeon/amdkfd/Makefile >> +++ b/drivers/gpu/drm/radeon/amdkfd/Makefile >> @@ -4,6 +4,8 @@ >> >> ccflags-y := -Iinclude/drm >> >> -amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o >> +amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ >> + kfd_pasid.o kfd_doorbell.o kfd_vidmem.o kfd_aperture.o \ >> + kfd_process.o >> >> obj-$(CONFIG_HSA_RADEON) += amdkfd.o >> diff --git a/drivers/gpu/drm/radeon/amdkfd/kfd_aperture.c b/drivers/gpu/drm/radeon/amdkfd/kfd_aperture.c >> new file mode 100644 >> index 0000000..0468114 >> --- /dev/null >> +++ b/drivers/gpu/drm/radeon/amdkfd/kfd_aperture.c >> @@ -0,0 +1,123 @@ >> +/* >> + * Copyright 2014 Advanced Micro Devices, Inc. >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a >> + * copy of this software and associated documentation files (the "Software"), >> + * to deal in the Software without restriction, including without limitation >> + * the rights to use, copy, modify, merge, publish, distribute, sublicense, >> + * and/or sell copies of the Software, and to permit persons to whom the >> + * Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice shall be included in >> + * all copies or substantial portions of the Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >> + * OTHER DEALINGS IN THE SOFTWARE. >> + * >> + */ >> + >> +#include <linux/device.h> >> +#include <linux/export.h> >> +#include <linux/err.h> >> +#include <linux/fs.h> >> +#include <linux/sched.h> >> +#include <linux/slab.h> >> +#include <linux/uaccess.h> >> +#include <linux/compat.h> >> +#include <uapi/linux/kfd_ioctl.h> >> +#include <linux/time.h> >> +#include "kfd_priv.h" >> +#include <linux/mm.h> >> +#include <uapi/asm-generic/mman-common.h> >> +#include <asm/processor.h> >> + >> + >> +#define MAKE_GPUVM_APP_BASE(gpu_num) (((uint64_t)(gpu_num) << 61) + 0x1000000000000) >> +#define MAKE_GPUVM_APP_LIMIT(base) (((uint64_t)(base) & 0xFFFFFF0000000000) | 0xFFFFFFFFFF) >> +#define MAKE_SCRATCH_APP_BASE(gpu_num) (((uint64_t)(gpu_num) << 61) + 0x100000000) >> +#define MAKE_SCRATCH_APP_LIMIT(base) (((uint64_t)base & 0xFFFFFFFF00000000) | 0xFFFFFFFF) >> +#define MAKE_LDS_APP_BASE(gpu_num) (((uint64_t)(gpu_num) << 61) + 0x0) >> +#define MAKE_LDS_APP_LIMIT(base) (((uint64_t)(base) & 0xFFFFFFFF00000000) | 0xFFFFFFFF) >> + >> +#define HSA_32BIT_LDS_APP_SIZE 0x10000 >> +#define HSA_32BIT_LDS_APP_ALIGNMENT 0x10000 >> + >> +static unsigned long kfd_reserve_aperture(struct kfd_process *process, unsigned long len, unsigned long alignment) >> +{ >> + >> + unsigned long addr = 0; >> + unsigned long start_address; >> + >> + /* >> + * Go bottom up and find the first available aligned address. >> + * We may narrow space to scan by getting mmap range limits. >> + */ >> + for (start_address = alignment; start_address < (TASK_SIZE - alignment); start_address += alignment) { >> + addr = vm_mmap(NULL, start_address, len, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, 0); > > So this forcing aperture into address space process is not really > welcome. Userspace have no idea this will happen and valid existing > program may already staticly allocate those address through mmap > either after or before they might trigger this code. > > As i said in the general answer, i think best here is to use the > kernel reserved area to map this. You can work around the gate > page if gate page matter to you. We talked about it in another thread, but to sum it up, I removed the support for LDS aperture in 32 bit mode (which is the mode that uses the above function). > > This of course beg the question what happen if gpu try to access > inside the kernel region ? Does the iommu respect the system flag > of the page table ? Or does it just happily allow the gpu to access > the whole kernel area ? > > I guess i should go dive into the iommuv2 datasheet to find out. > >> + if (!IS_ERR_VALUE(addr)) { >> + if (addr == start_address) >> + return addr; >> + vm_munmap(addr, len); >> + } >> + } >> + return 0; >> + >> +} >> + >> +int kfd_init_apertures(struct kfd_process *process) >> +{ >> + uint8_t id = 0; >> + struct kfd_dev *dev; >> + struct kfd_process_device *pdd; >> + >> + mutex_lock(&process->mutex); >> + >> + /*Iterating over all devices*/ >> + while ((dev = kfd_topology_enum_kfd_devices(id)) != NULL && id < NUM_OF_SUPPORTED_GPUS) { >> + >> + pdd = kfd_get_process_device_data(dev, process); >> + >> + /*for 64 bit process aperture will be statically reserved in the non canonical process address space > > What does non canonical process address space means ? This is the x86-64 terminology > or something else ? This is the x86_64 terminology. In v3 I will add detailed explanation on this subject. > >> + *for 32 bit process the aperture will be reserved in the process address space >> + */ >> + if (process->is_32bit_user_mode) { >> + /*try to reserve aperture. continue on failure, just put the aperture size to be 0*/ >> + pdd->lds_base = kfd_reserve_aperture( >> + process, >> + HSA_32BIT_LDS_APP_SIZE, >> + HSA_32BIT_LDS_APP_ALIGNMENT); >> + >> + if (pdd->lds_base) >> + pdd->lds_limit = pdd->lds_base + HSA_32BIT_LDS_APP_SIZE - 1; >> + else >> + pdd->lds_limit = 0; >> + >> + /*GPUVM and Scratch apertures are not supported*/ >> + pdd->gpuvm_base = pdd->gpuvm_limit = pdd->scratch_base = pdd->scratch_limit = 0; >> + } else { >> + /*node id couldn't be 0 - the three MSB bits of aperture shoudn't be 0*/ >> + pdd->lds_base = MAKE_LDS_APP_BASE(id + 1); >> + pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); >> + pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1); >> + pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base); >> + pdd->scratch_base = MAKE_SCRATCH_APP_BASE(id + 1); >> + pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); >> + } >> + >> + dev_dbg(kfd_device, "node id %u, gpu id %u, lds_base %llX lds_limit %llX gpuvm_base %llX gpuvm_limit %llX scratch_base %llX scratch_limit %llX", >> + id, pdd->dev->id, pdd->lds_base, pdd->lds_limit, pdd->gpuvm_base, pdd->gpuvm_limit, pdd->scratch_base, pdd->scratch_limit); > > Break this debug output into several debug message. Not all of us have 30" > monitor. Done in v3 > >> + >> + id++; >> + } >> + >> + mutex_unlock(&process->mutex); >> + >> + return 0; >> +} >> + >> + >> diff --git a/drivers/gpu/drm/radeon/amdkfd/kfd_chardev.c b/drivers/gpu/drm/radeon/amdkfd/kfd_chardev.c >> index b98bcb7..d6580a6 100644 >> --- a/drivers/gpu/drm/radeon/amdkfd/kfd_chardev.c >> +++ b/drivers/gpu/drm/radeon/amdkfd/kfd_chardev.c >> @@ -38,6 +38,7 @@ >> >> static long kfd_ioctl(struct file *, unsigned int, unsigned long); >> static int kfd_open(struct inode *, struct file *); >> +static int kfd_mmap(struct file *, struct vm_area_struct *); >> >> static const char kfd_dev_name[] = "kfd"; >> >> @@ -46,6 +47,7 @@ static const struct file_operations kfd_fops = { >> .unlocked_ioctl = kfd_ioctl, >> .compat_ioctl = kfd_ioctl, >> .open = kfd_open, >> + .mmap = kfd_mmap, >> }; >> >> static int kfd_char_dev_major = -1; >> @@ -96,9 +98,22 @@ struct device *kfd_chardev(void) >> >> static int kfd_open(struct inode *inode, struct file *filep) >> { >> + struct kfd_process *process; >> + >> if (iminor(inode) != 0) >> return -ENODEV; >> >> + process = kfd_create_process(current); >> + if (IS_ERR(process)) >> + return PTR_ERR(process); >> + >> + process->is_32bit_user_mode = is_compat_task(); >> + >> + dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n", >> + process->pasid, process->is_32bit_user_mode); >> + >> + kfd_init_apertures(process); >> + >> return 0; >> } >> >> @@ -152,8 +167,9 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) >> "ioctl cmd 0x%x (#%d), arg 0x%lx\n", >> cmd, _IOC_NR(cmd), arg); >> >> - /* TODO: add function that retrieves process */ >> - process = NULL; >> + process = kfd_get_process(current); >> + if (IS_ERR(process)) >> + return PTR_ERR(process); >> >> switch (cmd) { >> case KFD_IOC_CREATE_QUEUE: >> @@ -201,3 +217,19 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) >> >> return err; >> } >> + >> +static int >> +kfd_mmap(struct file *filp, struct vm_area_struct *vma) >> +{ >> + unsigned long pgoff = vma->vm_pgoff; >> + struct kfd_process *process; >> + >> + process = kfd_get_process(current); >> + if (IS_ERR(process)) >> + return PTR_ERR(process); >> + >> + if (pgoff >= KFD_MMAP_DOORBELL_START && pgoff < KFD_MMAP_DOORBELL_END) >> + return kfd_doorbell_mmap(process, vma); >> + >> + return -EINVAL; >> +} >> diff --git a/drivers/gpu/drm/radeon/amdkfd/kfd_device.c b/drivers/gpu/drm/radeon/amdkfd/kfd_device.c >> index 4138694..f6a7cf7 100644 >> --- a/drivers/gpu/drm/radeon/amdkfd/kfd_device.c >> +++ b/drivers/gpu/drm/radeon/amdkfd/kfd_device.c >> @@ -100,6 +100,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, >> { >> kfd->shared_resources = *gpu_resources; >> >> + kfd_doorbell_init(kfd); >> + >> if (kfd_topology_add_device(kfd) != 0) >> return false; >> >> diff --git a/drivers/gpu/drm/radeon/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/radeon/amdkfd/kfd_doorbell.c >> new file mode 100644 >> index 0000000..972eaea >> --- /dev/null >> +++ b/drivers/gpu/drm/radeon/amdkfd/kfd_doorbell.c >> @@ -0,0 +1,264 @@ >> +/* >> + * Copyright 2014 Advanced Micro Devices, Inc. >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a >> + * copy of this software and associated documentation files (the "Software"), >> + * to deal in the Software without restriction, including without limitation >> + * the rights to use, copy, modify, merge, publish, distribute, sublicense, >> + * and/or sell copies of the Software, and to permit persons to whom the >> + * Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice shall be included in >> + * all copies or substantial portions of the Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >> + * OTHER DEALINGS IN THE SOFTWARE. >> + */ >> + >> +#include "kfd_priv.h" >> +#include <linux/mm.h> >> +#include <linux/mman.h> >> +#include <linux/slab.h> >> + >> +/* >> + * This extension supports a kernel level doorbells management for >> + * the kernel queues. >> + * Basically the last doorbells page is devoted to kernel queues >> + * and that's assures that any user process won't get access to the >> + * kernel doorbells page >> + */ >> +static DEFINE_MUTEX(doorbell_mutex); >> +static unsigned long doorbell_available_index[DIV_ROUND_UP(MAX_PROCESS_QUEUES, BITS_PER_LONG)] = { 0 }; >> +#define KERNEL_DOORBELL_PASID 1 >> + >> +/* >> + * Each device exposes a doorbell aperture, a PCI MMIO aperture that >> + * receives 32-bit writes that are passed to queues as wptr values. >> + * The doorbells are intended to be written by applications as part >> + * of queueing work on user-mode queues. >> + * We assign doorbells to applications in PAGE_SIZE-sized and aligned chunks. >> + * We map the doorbell address space into user-mode when a process creates >> + * its first queue on each device. >> + * Although the mapping is done by KFD, it is equivalent to an mmap of >> + * the /dev/kfd with the particular device encoded in the mmap offset. >> + * There will be other uses for mmap of /dev/kfd, so only a range of >> + * offsets (KFD_MMAP_DOORBELL_START-END) is used for doorbells. >> + */ > > Mapping should not be done by the driver instead you should provide the > offset to userspace and have userspace call mmap with proper argument. > I do not think having device driver doing mmap in the back of an ioctl > would be a welcome idea. > Done in v3 >> + >> +/* # of doorbell bytes allocated for each process. */ >> +static inline size_t doorbell_process_allocation(void) >> +{ >> + return roundup(sizeof(doorbell_t) * MAX_PROCESS_QUEUES, PAGE_SIZE); >> +} > > This whole doorbell situation needs some cleanup instead of passing every > things as byte and byte offset you should rather pass everything as pfn and > pgoffset so it is clear that a doorbell is on page granularity and you will > not have to clutter all kind of align and round up accross code. Just cleaner > and safer. > Done in v3 >> + >> +/* Doorbell calculations for device init. */ >> +void kfd_doorbell_init(struct kfd_dev *kfd) >> +{ >> + size_t doorbell_start_offset; >> + size_t doorbell_aperture_size; >> + size_t doorbell_process_limit; >> + >> + /* >> + * We start with calculations in bytes because the input data might >> + * only be byte-aligned. >> + * Only after we have done the rounding can we assume any alignment. >> + */ >> + >> + doorbell_start_offset = roundup(kfd->shared_resources.doorbell_start_offset, >> + doorbell_process_allocation()); >> + doorbell_aperture_size = rounddown(kfd->shared_resources.doorbell_aperture_size, >> + doorbell_process_allocation()); >> + >> + if (doorbell_aperture_size > doorbell_start_offset) >> + doorbell_process_limit = >> + (doorbell_aperture_size - doorbell_start_offset) / doorbell_process_allocation(); >> + else >> + doorbell_process_limit = 0; >> + >> + kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address + doorbell_start_offset; >> + kfd->doorbell_id_offset = doorbell_start_offset / sizeof(doorbell_t); >> + kfd->doorbell_process_limit = doorbell_process_limit - 1; >> + >> + kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base, doorbell_process_allocation()); >> + BUG_ON(!kfd->doorbell_kernel_ptr); >> + >> + pr_debug("kfd: doorbell initialization\n" >> + " doorbell base == 0x%08lX\n" >> + " doorbell_id_offset == 0x%08lu\n" >> + " doorbell_process_limit == 0x%08lu\n" >> + " doorbell_kernel_offset == 0x%08lX\n" >> + " doorbell aperture size == 0x%08lX\n" >> + " doorbell kernel address == 0x%08lX\n", >> + (uintptr_t)kfd->doorbell_base, >> + kfd->doorbell_id_offset, >> + doorbell_process_limit, >> + (uintptr_t)kfd->doorbell_base, >> + kfd->shared_resources.doorbell_aperture_size, >> + (uintptr_t)kfd->doorbell_kernel_ptr); > > Kind of ugly, will break some of the kernel log manager, you need to do one > pr_debug call per line. > Done in v3 >> + >> +} >> + >> +/* >> + * This is the /dev/kfd mmap (for doorbell) implementation. >> + * We intend that this is only called through map_doorbells, not through >> + * user-mode mmap of /dev/kfd >> + */ >> +int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) >> +{ >> + unsigned int device_index; >> + struct kfd_dev *dev; >> + phys_addr_t start; >> + >> + BUG_ON(vma->vm_pgoff < KFD_MMAP_DOORBELL_START || vma->vm_pgoff >= KFD_MMAP_DOORBELL_END); >> + >> + /* For simplicitly we only allow mapping of the entire doorbell allocation of a single device & process. */ >> + if (vma->vm_end - vma->vm_start != doorbell_process_allocation()) >> + return -EINVAL; >> + >> + /* device_index must be GPU ID!! */ >> + device_index = vma->vm_pgoff - KFD_MMAP_DOORBELL_START; >> + >> + dev = kfd_device_by_id(device_index); >> + if (dev == NULL) >> + return -EINVAL; >> + >> + vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP; >> + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); >> + >> + start = dev->doorbell_base + process->pasid * doorbell_process_allocation(); >> + >> + pr_debug("kfd: mapping doorbell page in kfd_doorbell_mmap\n" >> + " target user address == 0x%016llX\n" >> + " physical address == 0x%016llX\n" >> + " vm_flags == 0x%08lX\n" >> + " size == 0x%08lX\n", >> + (long long unsigned int) vma->vm_start, start, vma->vm_flags, >> + doorbell_process_allocation()); >> + >> + return io_remap_pfn_range(vma, >> + vma->vm_start, >> + start >> PAGE_SHIFT, >> + doorbell_process_allocation(), >> + vma->vm_page_prot); >> +} >> + >> +/* >> + * Map the doorbells for a single process & device. >> + * This will indirectly call kfd_doorbell_mmap. >> + * This assumes that the process mutex is being held. >> + */ >> +static int map_doorbells(struct file *devkfd, struct kfd_process *process, >> + struct kfd_dev *dev) >> +{ >> + struct kfd_process_device *pdd = kfd_get_process_device_data(dev, process); >> + >> + if (pdd == NULL) >> + return -ENOMEM; >> + >> + if (pdd->doorbell_mapping == NULL) { >> + unsigned long offset = (KFD_MMAP_DOORBELL_START + dev->id) << PAGE_SHIFT; >> + doorbell_t __user *doorbell_mapping; >> + >> + doorbell_mapping = (doorbell_t __user *)vm_mmap(devkfd, 0, doorbell_process_allocation(), PROT_WRITE, >> + MAP_SHARED, offset); > > Like said above have the userspace do that. Do not do it inside > the kernel. > Done in v3 >> + if (IS_ERR(doorbell_mapping)) >> + return PTR_ERR(doorbell_mapping); >> + >> + pdd->doorbell_mapping = doorbell_mapping; >> + } >> + >> + return 0; >> +} >> + >> +/* get kernel iomem pointer for a doorbell */ >> +u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, unsigned int *doorbell_off) >> +{ >> + u32 inx; >> + >> + BUG_ON(!kfd || !doorbell_off); >> + >> + mutex_lock(&doorbell_mutex); >> + inx = find_first_zero_bit(doorbell_available_index, MAX_PROCESS_QUEUES); >> + __set_bit(inx, doorbell_available_index); >> + mutex_unlock(&doorbell_mutex); >> + >> + if (inx >= MAX_PROCESS_QUEUES) >> + return NULL; >> + >> + /* caluculating the kernel doorbell offset using "faked" kernel pasid that allocated for kernel queues only */ >> + *doorbell_off = KERNEL_DOORBELL_PASID * (doorbell_process_allocation()/sizeof(doorbell_t)) + inx; >> + >> + pr_debug("kfd: get kernel queue doorbell\n" >> + " doorbell offset == 0x%08d\n" >> + " kernel address == 0x%08lX\n", >> + *doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx)); >> + >> + return kfd->doorbell_kernel_ptr + inx; >> +} >> + >> +void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) >> +{ >> + unsigned int inx; >> + >> + BUG_ON(!kfd || !db_addr); >> + >> + inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr); >> + >> + mutex_lock(&doorbell_mutex); >> + __clear_bit(inx, doorbell_available_index); >> + mutex_unlock(&doorbell_mutex); >> +} >> + >> +inline void write_kernel_doorbell(u32 __iomem *db, u32 value) >> +{ >> + if (db) { >> + writel(value, db); >> + pr_debug("writing %d to doorbell address 0x%p\n", value, db); >> + } >> +} >> + >> +/* >> + * Get the user-mode address of a doorbell. >> + * Assumes that the process mutex is being held. >> + */ >> +doorbell_t __user *kfd_get_doorbell(struct file *devkfd, >> + struct kfd_process *process, >> + struct kfd_dev *dev, >> + unsigned int doorbell_index) >> +{ >> + struct kfd_process_device *pdd; >> + int err; >> + >> + BUG_ON(doorbell_index > MAX_DOORBELL_INDEX); >> + >> + err = map_doorbells(devkfd, process, dev); >> + if (err) >> + return ERR_PTR(err); >> + >> + pdd = kfd_get_process_device_data(dev, process); >> + BUG_ON(pdd == NULL); /* map_doorbells would have failed otherwise */ >> + >> + pr_debug("doorbell value on creation 0x%x\n", pdd->doorbell_mapping[doorbell_index]); >> + >> + return &pdd->doorbell_mapping[doorbell_index]; >> +} >> + >> +/* >> + * queue_ids are in the range [0,MAX_PROCESS_QUEUES) and are mapped 1:1 >> + * to doorbells with the process's doorbell page >> + */ >> +unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd, struct kfd_process *process, unsigned int queue_id) >> +{ >> + /* >> + * doorbell_id_offset accounts for doorbells taken by KGD. >> + * pasid * doorbell_process_allocation/sizeof(doorbell_t) adjusts >> + * to the process's doorbells >> + */ >> + return kfd->doorbell_id_offset + process->pasid * (doorbell_process_allocation()/sizeof(doorbell_t)) + queue_id; >> +} >> + >> diff --git a/drivers/gpu/drm/radeon/amdkfd/kfd_module.c b/drivers/gpu/drm/radeon/amdkfd/kfd_module.c >> index c51f981..dc08f51 100644 >> --- a/drivers/gpu/drm/radeon/amdkfd/kfd_module.c >> +++ b/drivers/gpu/drm/radeon/amdkfd/kfd_module.c >> @@ -65,14 +65,30 @@ void kgd2kfd_exit(void) >> { >> } >> >> +extern int kfd_process_exit(struct notifier_block *nb, >> + unsigned long action, void *data); >> + >> +static struct notifier_block kfd_mmput_nb = { >> + .notifier_call = kfd_process_exit, >> + .priority = 3, >> +}; >> + >> static int __init kfd_module_init(void) >> { >> int err; >> >> + err = kfd_pasid_init(); >> + if (err < 0) >> + goto err_pasid; >> + >> err = kfd_chardev_init(); >> if (err < 0) >> goto err_ioctl; >> >> + err = mmput_register_notifier(&kfd_mmput_nb); >> + if (err) >> + goto err_mmu_notifier; >> + >> err = kfd_topology_init(); >> if (err < 0) >> goto err_topology; >> @@ -82,15 +98,21 @@ static int __init kfd_module_init(void) >> return 0; >> >> err_topology: >> + mmput_unregister_notifier(&kfd_mmput_nb); >> +err_mmu_notifier: >> kfd_chardev_exit(); >> err_ioctl: >> + kfd_pasid_exit(); >> +err_pasid: >> return err; >> } >> >> static void __exit kfd_module_exit(void) >> { >> kfd_topology_shutdown(); >> + mmput_unregister_notifier(&kfd_mmput_nb); >> kfd_chardev_exit(); >> + kfd_pasid_exit(); >> dev_info(kfd_device, "Removed module\n"); >> } >> >> diff --git a/drivers/gpu/drm/radeon/amdkfd/kfd_pasid.c b/drivers/gpu/drm/radeon/amdkfd/kfd_pasid.c >> new file mode 100644 >> index 0000000..0b594e4 >> --- /dev/null >> +++ b/drivers/gpu/drm/radeon/amdkfd/kfd_pasid.c >> @@ -0,0 +1,97 @@ >> +/* >> + * Copyright 2014 Advanced Micro Devices, Inc. >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a >> + * copy of this software and associated documentation files (the "Software"), >> + * to deal in the Software without restriction, including without limitation >> + * the rights to use, copy, modify, merge, publish, distribute, sublicense, >> + * and/or sell copies of the Software, and to permit persons to whom the >> + * Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice shall be included in >> + * all copies or substantial portions of the Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >> + * OTHER DEALINGS IN THE SOFTWARE. >> + */ >> + >> +#include <linux/slab.h> >> +#include <linux/types.h> >> +#include "kfd_priv.h" >> + >> +#define INITIAL_PASID_LIMIT (1<<20) >> + >> +static unsigned long *pasid_bitmap; >> +static pasid_t pasid_limit; >> +static DEFINE_MUTEX(pasid_mutex); >> + >> +int kfd_pasid_init(void) >> +{ >> + pasid_limit = INITIAL_PASID_LIMIT; >> + >> + pasid_bitmap = kzalloc(DIV_ROUND_UP(INITIAL_PASID_LIMIT, BITS_PER_BYTE), GFP_KERNEL); >> + if (!pasid_bitmap) >> + return -ENOMEM; >> + >> + set_bit(0, pasid_bitmap); /* PASID 0 is reserved. */ >> + >> + return 0; >> +} >> + >> +void kfd_pasid_exit(void) >> +{ >> + kfree(pasid_bitmap); >> +} >> + >> +bool kfd_set_pasid_limit(pasid_t new_limit) >> +{ >> + if (new_limit < pasid_limit) { >> + bool ok; >> + >> + mutex_lock(&pasid_mutex); >> + >> + /* ensure that no pasids >= new_limit are in-use */ >> + ok = (find_next_bit(pasid_bitmap, pasid_limit, new_limit) == pasid_limit); >> + if (ok) >> + pasid_limit = new_limit; >> + >> + mutex_unlock(&pasid_mutex); >> + >> + return ok; >> + } >> + >> + return true; >> +} >> + >> +inline pasid_t kfd_get_pasid_limit(void) >> +{ >> + return pasid_limit; >> +} >> + >> +pasid_t kfd_pasid_alloc(void) >> +{ >> + pasid_t found; >> + >> + mutex_lock(&pasid_mutex); >> + >> + found = find_first_zero_bit(pasid_bitmap, pasid_limit); >> + if (found == pasid_limit) >> + found = 0; >> + else >> + set_bit(found, pasid_bitmap); >> + >> + mutex_unlock(&pasid_mutex); >> + >> + return found; >> +} >> + >> +void kfd_pasid_free(pasid_t pasid) >> +{ >> + BUG_ON(pasid == 0 || pasid >= pasid_limit); >> + clear_bit(pasid, pasid_bitmap); >> +} >> diff --git a/drivers/gpu/drm/radeon/amdkfd/kfd_priv.h b/drivers/gpu/drm/radeon/amdkfd/kfd_priv.h >> index b391e24..af5a5e4 100644 >> --- a/drivers/gpu/drm/radeon/amdkfd/kfd_priv.h >> +++ b/drivers/gpu/drm/radeon/amdkfd/kfd_priv.h >> @@ -32,14 +32,39 @@ >> #include <linux/spinlock.h> >> #include "../radeon_kfd.h" >> >> +/* >> + * Per-process limit. Each process can only >> + * create MAX_PROCESS_QUEUES across all devices >> + */ >> +#define MAX_PROCESS_QUEUES 1024 >> + >> +#define MAX_DOORBELL_INDEX MAX_PROCESS_QUEUES >> #define KFD_SYSFS_FILE_MODE 0444 >> >> +/* >> + * We multiplex different sorts of mmap-able memory onto /dev/kfd. >> + * We figure out what type of memory the caller wanted by comparing >> + * the mmap page offset to known ranges. >> + */ >> +#define KFD_MMAP_DOORBELL_START (((1ULL << 32)*1) >> PAGE_SHIFT) >> +#define KFD_MMAP_DOORBELL_END (((1ULL << 32)*2) >> PAGE_SHIFT) >> + >> /* GPU ID hash width in bits */ >> #define KFD_GPU_ID_HASH_WIDTH 16 >> >> /* Macro for allocating structures */ >> #define kfd_alloc_struct(ptr_to_struct) ((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL)) >> >> +/* >> + * Large enough to hold the maximum usable pasid + 1. >> + * It must also be able to store the number of doorbells >> + * reported by a KFD device. >> + */ >> +typedef unsigned int pasid_t; >> + >> +/* Type that represents a HW doorbell slot. */ >> +typedef u32 doorbell_t; >> + >> struct kfd_device_info { >> const struct kfd_scheduler_class *scheduler_class; >> unsigned int max_pasid_bits; >> @@ -56,6 +81,17 @@ struct kfd_dev { >> >> unsigned int id; /* topology stub index */ >> >> + phys_addr_t doorbell_base; /* Start of actual doorbells used by >> + * KFD. It is aligned for mapping >> + * into user mode >> + */ >> + size_t doorbell_id_offset; /* Doorbell offset (from KFD doorbell >> + * to HW doorbell, GFX reserved some >> + * at the start) >> + */ >> + size_t doorbell_process_limit; /* Number of processes we have doorbell space for. */ >> + u32 __iomem *doorbell_kernel_ptr; /* this is a pointer for a doorbells page used by kernel queue */ >> + >> struct kgd2kfd_shared_resources shared_resources; >> }; >> >> @@ -68,15 +104,124 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd); >> >> extern const struct kfd2kgd_calls *kfd2kgd; >> >> +/* Dummy struct just to make kfd_mem_obj* a unique pointer type. */ >> +struct kfd_mem_obj_s; >> +typedef struct kfd_mem_obj_s *kfd_mem_obj; > > IIRC the rule is no more typedef in kernel. Or maybe i just dreamt > that rule. > Removed all typedefs in v3 >> + >> +enum kfd_mempool { >> + KFD_MEMPOOL_SYSTEM_CACHEABLE = 1, >> + KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2, >> + KFD_MEMPOOL_FRAMEBUFFER = 3, >> +}; >> + >> + >> +int kfd_vidmem_alloc(struct kfd_dev *kfd, size_t size, size_t alignment, >> + enum kfd_mempool pool, kfd_mem_obj *mem_obj); >> +void kfd_vidmem_free(struct kfd_dev *kfd, kfd_mem_obj mem_obj); >> +int kfd_vidmem_gpumap(struct kfd_dev *kfd, kfd_mem_obj mem_obj, uint64_t *vmid0_address); >> +void kfd_vidmem_ungpumap(struct kfd_dev *kfd, kfd_mem_obj mem_obj); >> +int kfd_vidmem_kmap(struct kfd_dev *kfd, kfd_mem_obj mem_obj, void **ptr); >> +void kfd_vidmem_unkmap(struct kfd_dev *kfd, kfd_mem_obj mem_obj); >> +int kfd_vidmem_alloc_map(struct kfd_dev *kfd, kfd_mem_obj *mem_obj, void **ptr, >> + uint64_t *vmid0_address, size_t size); >> +void kfd_vidmem_free_unmap(struct kfd_dev *kfd, kfd_mem_obj mem_obj); >> /* Character device interface */ >> int kfd_chardev_init(void); >> void kfd_chardev_exit(void); >> struct device *kfd_chardev(void); >> >> + >> +/* Data that is per-process-per device. */ >> +struct kfd_process_device { >> + /* >> + * List of all per-device data for a process. >> + * Starts from kfd_process.per_device_data. >> + */ >> + struct list_head per_device_list; >> + >> + /* The device that owns this data. */ >> + struct kfd_dev *dev; >> + >> + /* The user-mode address of the doorbell mapping for this device. */ >> + doorbell_t __user *doorbell_mapping; >> + >> + /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ >> + bool bound; > > Best to put the boolean at the end of the structure ... > Done in v3 >> + >> + /*Apertures*/ >> + uint64_t lds_base; >> + uint64_t lds_limit; >> + uint64_t gpuvm_base; >> + uint64_t gpuvm_limit; >> + uint64_t scratch_base; >> + uint64_t scratch_limit; >> +}; >> + >> /* Process data */ >> struct kfd_process { >> + struct list_head processes_list; >> + >> + struct mm_struct *mm; >> + >> + struct mutex mutex; >> + >> + /* >> + * In any process, the thread that started main() is the lead >> + * thread and outlives the rest. >> + * It is here because amd_iommu_bind_pasid wants a task_struct. >> + */ >> + struct task_struct *lead_thread; >> + >> + pasid_t pasid; >> + >> + /* >> + * List of kfd_process_device structures, >> + * one for each device the process is using. >> + */ >> + struct list_head per_device_data; >> + >> + /* The process's queues. */ >> + size_t queue_array_size; >> + >> + /* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */ >> + struct kfd_queue **queues; >> + >> + unsigned long allocated_queue_bitmap[DIV_ROUND_UP(MAX_PROCESS_QUEUES, BITS_PER_LONG)]; >> + >> + /*Is the user space process 32 bit?*/ >> + bool is_32bit_user_mode; >> }; >> >> +struct kfd_process *kfd_create_process(const struct task_struct *); >> +struct kfd_process *kfd_get_process(const struct task_struct *); >> + >> +struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, >> + struct kfd_process *p); >> + >> +/* PASIDs */ >> +int kfd_pasid_init(void); >> +void kfd_pasid_exit(void); >> +bool kfd_set_pasid_limit(pasid_t new_limit); >> +pasid_t kfd_get_pasid_limit(void); >> +pasid_t kfd_pasid_alloc(void); >> +void kfd_pasid_free(pasid_t pasid); >> + >> +/* Doorbells */ >> +void kfd_doorbell_init(struct kfd_dev *kfd); >> +int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma); >> +doorbell_t __user *kfd_get_doorbell(struct file *devkfd, >> + struct kfd_process *process, >> + struct kfd_dev *dev, >> + unsigned int doorbell_index); >> +u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, >> + unsigned int *doorbell_off); >> +void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr); >> +u32 read_kernel_doorbell(u32 __iomem *db); >> +void write_kernel_doorbell(u32 __iomem *db, u32 value); >> +unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd, >> + struct kfd_process *process, >> + unsigned int queue_id); >> + >> extern struct device *kfd_device; >> >> /* Topology */ >> @@ -95,4 +240,7 @@ void kgd2kfd_interrupt(struct kfd_dev *dev, const void *ih_ring_entry); >> void kgd2kfd_suspend(struct kfd_dev *dev); >> int kgd2kfd_resume(struct kfd_dev *dev); >> >> +/* amdkfd Apertures */ >> +int kfd_init_apertures(struct kfd_process *process); >> + >> #endif >> diff --git a/drivers/gpu/drm/radeon/amdkfd/kfd_process.c b/drivers/gpu/drm/radeon/amdkfd/kfd_process.c >> new file mode 100644 >> index 0000000..5efbce0 >> --- /dev/null >> +++ b/drivers/gpu/drm/radeon/amdkfd/kfd_process.c >> @@ -0,0 +1,374 @@ >> +/* >> + * Copyright 2014 Advanced Micro Devices, Inc. >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a >> + * copy of this software and associated documentation files (the "Software"), >> + * to deal in the Software without restriction, including without limitation >> + * the rights to use, copy, modify, merge, publish, distribute, sublicense, >> + * and/or sell copies of the Software, and to permit persons to whom the >> + * Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice shall be included in >> + * all copies or substantial portions of the Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >> + * OTHER DEALINGS IN THE SOFTWARE. >> + */ >> + >> +#include <linux/mutex.h> >> +#include <linux/log2.h> >> +#include <linux/sched.h> >> +#include <linux/slab.h> >> +#include <linux/notifier.h> >> +struct mm_struct; >> + >> +#include "kfd_priv.h" >> + >> +/* >> + * Initial size for the array of queues. >> + * The allocated size is doubled each time >> + * it is exceeded up to MAX_PROCESS_QUEUES. >> + */ >> +#define INITIAL_QUEUE_ARRAY_SIZE 16 >> + >> +/* List of struct kfd_process */ >> +static struct list_head kfd_processes_list = LIST_HEAD_INIT(kfd_processes_list); >> + >> +static DEFINE_MUTEX(kfd_processes_mutex); >> + >> +static struct kfd_process *create_process(const struct task_struct *thread); >> + >> +struct kfd_process *kfd_create_process(const struct task_struct *thread) >> +{ >> + struct kfd_process *process; >> + >> + if (thread->mm == NULL) >> + return ERR_PTR(-EINVAL); >> + >> + /* Only the pthreads threading model is supported. */ >> + if (thread->group_leader->mm != thread->mm) >> + return ERR_PTR(-EINVAL); >> + >> + /* >> + * take kfd processes mutex before starting of process creation >> + * so there won't be a case where two threads of the same process >> + * create two kfd_process structures >> + */ >> + mutex_lock(&kfd_processes_mutex); >> + >> + /* A prior open of /dev/kfd could have already created the process. */ >> + process = thread->mm->kfd_process; >> + if (process) >> + pr_debug("kfd: process already found\n"); >> + >> + if (!process) >> + process = create_process(thread); >> + >> + mutex_unlock(&kfd_processes_mutex); >> + >> + return process; >> +} >> + >> +struct kfd_process *kfd_get_process(const struct task_struct *thread) >> +{ >> + struct kfd_process *process; >> + >> + if (thread->mm == NULL) >> + return ERR_PTR(-EINVAL); >> + >> + /* Only the pthreads threading model is supported. */ >> + if (thread->group_leader->mm != thread->mm) >> + return ERR_PTR(-EINVAL); >> + >> + process = thread->mm->kfd_process; >> + >> + return process; >> +} >> + >> +static void free_process(struct kfd_process *p) >> +{ >> + struct kfd_process_device *pdd, *temp; >> + >> + BUG_ON(p == NULL); >> + >> + list_for_each_entry_safe(pdd, temp, &p->per_device_data, per_device_list) { >> + list_del(&pdd->per_device_list); >> + kfree(pdd); >> + } >> + >> + kfd_pasid_free(p->pasid); >> + >> + mutex_destroy(&p->mutex); >> + >> + kfree(p->queues); >> + >> + list_del(&p->processes_list); >> + >> + kfree(p); >> +} >> + >> +int kfd_process_exit(struct notifier_block *nb, >> + unsigned long action, void *data) >> +{ >> + struct mm_struct *mm = data; >> + struct kfd_process *p; >> + >> + mutex_lock(&kfd_processes_mutex); >> + >> + p = mm->kfd_process; >> + if (p) { >> + free_process(p); >> + mm->kfd_process = NULL; >> + } >> + >> + mutex_unlock(&kfd_processes_mutex); >> + >> + return 0; >> +} >> + >> +static struct kfd_process *create_process(const struct task_struct *thread) >> +{ >> + struct kfd_process *process; >> + int err = -ENOMEM; >> + >> + process = kzalloc(sizeof(*process), GFP_KERNEL); >> + >> + if (!process) >> + goto err_alloc_process; >> + >> + process->queues = kmalloc_array(INITIAL_QUEUE_ARRAY_SIZE, sizeof(process->queues[0]), GFP_KERNEL); >> + if (!process->queues) >> + goto err_alloc_queues; >> + >> + process->pasid = kfd_pasid_alloc(); >> + if (process->pasid == 0) >> + goto err_alloc_pasid; >> + >> + mutex_init(&process->mutex); >> + >> + process->mm = thread->mm; >> + thread->mm->kfd_process = process; >> + list_add_tail(&process->processes_list, &kfd_processes_list); >> + >> + process->lead_thread = thread->group_leader; >> + >> + process->queue_array_size = INITIAL_QUEUE_ARRAY_SIZE; >> + >> + INIT_LIST_HEAD(&process->per_device_data); >> + >> + return process; >> + >> +err_alloc_pasid: >> + kfree(process->queues); >> +err_alloc_queues: >> + kfree(process); >> +err_alloc_process: >> + return ERR_PTR(err); >> +} >> + >> +struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, >> + struct kfd_process *p) >> +{ >> + struct kfd_process_device *pdd; >> + >> + list_for_each_entry(pdd, &p->per_device_data, per_device_list) >> + if (pdd->dev == dev) >> + return pdd; >> + >> + pdd = kzalloc(sizeof(*pdd), GFP_KERNEL); >> + if (pdd != NULL) { >> + pdd->dev = dev; >> + list_add(&pdd->per_device_list, &p->per_device_data); >> + } >> + >> + return pdd; >> +} >> + >> +/* >> + * Direct the IOMMU to bind the process (specifically the pasid->mm) to the device. >> + * Unbinding occurs when the process dies or the device is removed. >> + * >> + * Assumes that the process lock is held. >> + */ >> +struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, >> + struct kfd_process *p) >> +{ >> + struct kfd_process_device *pdd = kfd_get_process_device_data(dev, p); >> + >> + if (pdd == NULL) >> + return ERR_PTR(-ENOMEM); >> + >> + if (pdd->bound) >> + return pdd; >> + >> + pdd->bound = true; >> + >> + return pdd; >> +} >> + >> +void kfd_unbind_process_from_device(struct kfd_dev *dev, pasid_t pasid) >> +{ >> + struct kfd_process *p; >> + struct kfd_process_device *pdd; >> + >> + BUG_ON(dev == NULL); >> + >> + mutex_lock(&kfd_processes_mutex); >> + >> + list_for_each_entry(p, &kfd_processes_list, processes_list) >> + if (p->pasid == pasid) >> + break; >> + >> + mutex_unlock(&kfd_processes_mutex); >> + >> + BUG_ON(p->pasid != pasid); >> + >> + pdd = kfd_get_process_device_data(dev, p); >> + >> + BUG_ON(pdd == NULL); >> + >> + mutex_lock(&p->mutex); >> + >> + /* >> + * Just mark pdd as unbound, because we still need it to call >> + * amd_iommu_unbind_pasid() in when the process exits. >> + * We don't call amd_iommu_unbind_pasid() here >> + * because the IOMMU called us. >> + */ >> + pdd->bound = false; >> + >> + mutex_unlock(&p->mutex); >> +} >> + >> +/* >> + * Ensure that the process's queue array is large enough to hold >> + * the queue at queue_id. >> + * Assumes that the process lock is held. >> + */ >> +static bool ensure_queue_array_size(struct kfd_process *p, unsigned int queue_id) >> +{ >> + size_t desired_size; >> + struct kfd_queue **new_queues; >> + >> + compiletime_assert(INITIAL_QUEUE_ARRAY_SIZE > 0, "INITIAL_QUEUE_ARRAY_SIZE must not be 0"); >> + compiletime_assert(INITIAL_QUEUE_ARRAY_SIZE <= MAX_PROCESS_QUEUES, >> + "INITIAL_QUEUE_ARRAY_SIZE must be less than MAX_PROCESS_QUEUES"); >> + /* Ensure that doubling the current size won't ever overflow. */ >> + compiletime_assert(MAX_PROCESS_QUEUES < SIZE_MAX / 2, "MAX_PROCESS_QUEUES must be less than SIZE_MAX/2"); >> + >> + /* >> + * These & queue_id < MAX_PROCESS_QUEUES guarantee that >> + * the desired_size calculation will end up <= MAX_PROCESS_QUEUES >> + */ >> + compiletime_assert(is_power_of_2(INITIAL_QUEUE_ARRAY_SIZE), "INITIAL_QUEUE_ARRAY_SIZE must be power of 2."); >> + compiletime_assert(MAX_PROCESS_QUEUES % INITIAL_QUEUE_ARRAY_SIZE == 0, >> + "MAX_PROCESS_QUEUES must be multiple of INITIAL_QUEUE_ARRAY_SIZE."); >> + compiletime_assert(is_power_of_2(MAX_PROCESS_QUEUES / INITIAL_QUEUE_ARRAY_SIZE), >> + "MAX_PROCESS_QUEUES must be a power-of-2 multiple of INITIAL_QUEUE_ARRAY_SIZE."); >> + >> + if (queue_id < p->queue_array_size) >> + return true; >> + >> + if (queue_id >= MAX_PROCESS_QUEUES) >> + return false; >> + >> + desired_size = p->queue_array_size; >> + while (desired_size <= queue_id) >> + desired_size *= 2; >> + >> + BUG_ON(desired_size < queue_id || desired_size > MAX_PROCESS_QUEUES); >> + BUG_ON(desired_size % INITIAL_QUEUE_ARRAY_SIZE != 0 || !is_power_of_2(desired_size / INITIAL_QUEUE_ARRAY_SIZE)); >> + >> + new_queues = kmalloc_array(desired_size, sizeof(p->queues[0]), GFP_KERNEL); >> + if (!new_queues) >> + return false; >> + >> + memcpy(new_queues, p->queues, p->queue_array_size * sizeof(p->queues[0])); >> + >> + kfree(p->queues); >> + p->queues = new_queues; >> + p->queue_array_size = desired_size; >> + >> + return true; >> +} >> + >> +/* Assumes that the process lock is held. */ >> +bool kfd_allocate_queue_id(struct kfd_process *p, unsigned int *queue_id) >> +{ >> + unsigned int qid = find_first_zero_bit(p->allocated_queue_bitmap, MAX_PROCESS_QUEUES); >> + >> + if (qid >= MAX_PROCESS_QUEUES) >> + return false; >> + >> + if (!ensure_queue_array_size(p, qid)) >> + return false; >> + >> + __set_bit(qid, p->allocated_queue_bitmap); >> + >> + p->queues[qid] = NULL; >> + *queue_id = qid; >> + >> + return true; >> +} >> + >> +/* >> + * Install a queue into a previously-allocated queue id. >> + * Assumes that the process lock is held. >> + */ >> +void kfd_install_queue(struct kfd_process *p, unsigned int queue_id, struct kfd_queue *queue) >> +{ >> + /* Have to call allocate_queue_id before install_queue. */ >> + BUG_ON(queue_id >= p->queue_array_size); >> + BUG_ON(queue == NULL); >> + >> + p->queues[queue_id] = queue; >> +} >> + >> +/* >> + * Remove a queue from the open queue list and deallocate the queue id. >> + * This can be called whether or not a queue was installed. >> + * Assumes that the process lock is held. >> + */ >> +void kfd_remove_queue(struct kfd_process *p, unsigned int queue_id) >> +{ >> + BUG_ON(!test_bit(queue_id, p->allocated_queue_bitmap)); >> + BUG_ON(queue_id >= p->queue_array_size); >> + >> + __clear_bit(queue_id, p->allocated_queue_bitmap); >> +} >> + >> +/* Assumes that the process lock is held. */ >> +struct kfd_queue *kfd_get_queue(struct kfd_process *p, unsigned int queue_id) >> +{ >> + /* >> + * test_bit because the contents of unallocated >> + * queue slots are undefined. >> + * Otherwise ensure_queue_array_size would have to clear new entries and >> + * remove_queue would have to NULL removed queues. >> + */ >> + return (queue_id < p->queue_array_size && >> + test_bit(queue_id, p->allocated_queue_bitmap)) ? >> + p->queues[queue_id] : NULL; >> +} >> + >> +struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p) >> +{ >> + return list_first_entry(&p->per_device_data, struct kfd_process_device, per_device_list); >> +} >> + >> +struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, struct kfd_process_device *pdd) >> +{ >> + if (list_is_last(&pdd->per_device_list, &p->per_device_data)) >> + return NULL; >> + return list_next_entry(pdd, per_device_list); >> +} >> + >> +bool kfd_has_process_device_data(struct kfd_process *p) >> +{ >> + return !(list_empty(&p->per_device_data)); >> +} >> diff --git a/drivers/gpu/drm/radeon/amdkfd/kfd_vidmem.c b/drivers/gpu/drm/radeon/amdkfd/kfd_vidmem.c >> new file mode 100644 >> index 0000000..a2c4d30 >> --- /dev/null >> +++ b/drivers/gpu/drm/radeon/amdkfd/kfd_vidmem.c >> @@ -0,0 +1,96 @@ >> +/* >> + * Copyright 2014 Advanced Micro Devices, Inc. >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a >> + * copy of this software and associated documentation files (the "Software"), >> + * to deal in the Software without restriction, including without limitation >> + * the rights to use, copy, modify, merge, publish, distribute, sublicense, >> + * and/or sell copies of the Software, and to permit persons to whom the >> + * Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice shall be included in >> + * all copies or substantial portions of the Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >> + * OTHER DEALINGS IN THE SOFTWARE. >> + */ >> + >> +#include "kfd_priv.h" >> + >> +int kfd_vidmem_alloc(struct kfd_dev *kfd, size_t size, size_t alignment, >> + enum kfd_mempool pool, kfd_mem_obj *mem_obj) >> +{ >> + return kfd2kgd->allocate_mem(kfd->kgd, >> + size, >> + alignment, >> + (enum kgd_memory_pool)pool, >> + (struct kgd_mem **)mem_obj); >> +} >> + >> +void kfd_vidmem_free(struct kfd_dev *kfd, kfd_mem_obj mem_obj) >> +{ >> + kfd2kgd->free_mem(kfd->kgd, (struct kgd_mem *)mem_obj); >> +} >> + >> +int kfd_vidmem_gpumap(struct kfd_dev *kfd, kfd_mem_obj mem_obj, >> + uint64_t *vmid0_address) >> +{ >> + return kfd2kgd->gpumap_mem(kfd->kgd, >> + (struct kgd_mem *)mem_obj, >> + vmid0_address); > > As discussed previously this will not fly, pinning gpu memory is a big NACK. > >> +} >> + >> +void kfd_vidmem_ungpumap(struct kfd_dev *kfd, kfd_mem_obj mem_obj) >> +{ >> + kfd2kgd->ungpumap_mem(kfd->kgd, (struct kgd_mem *)mem_obj); >> +} >> + >> +int kfd_vidmem_kmap(struct kfd_dev *kfd, kfd_mem_obj mem_obj, void **ptr) >> +{ >> + return kfd2kgd->kmap_mem(kfd->kgd, (struct kgd_mem *)mem_obj, ptr); >> +} >> + >> +void kfd_vidmem_unkmap(struct kfd_dev *kfd, kfd_mem_obj mem_obj) >> +{ >> + kfd2kgd->unkmap_mem(kfd->kgd, (struct kgd_mem *)mem_obj); >> +} >> + >> +int kfd_vidmem_alloc_map(struct kfd_dev *kfd, kfd_mem_obj *mem_obj, >> + void **ptr, uint64_t *vmid0_address, size_t size) >> +{ >> + int retval; >> + >> + retval = kfd_vidmem_alloc(kfd, size, PAGE_SIZE, >> + KFD_MEMPOOL_SYSTEM_WRITECOMBINE, mem_obj); >> + if (retval != 0) >> + goto fail_vidmem_alloc; >> + >> + retval = kfd_vidmem_kmap(kfd, *mem_obj, ptr); >> + if (retval != 0) >> + goto fail_vidmem_kmap; >> + >> + retval = kfd_vidmem_gpumap(kfd, *mem_obj, vmid0_address); >> + if (retval != 0) >> + goto fail_vidmem_gpumap; >> + >> + return 0; >> + >> +fail_vidmem_gpumap: >> + kfd_vidmem_unkmap(kfd, *mem_obj); >> +fail_vidmem_kmap: >> + kfd_vidmem_free(kfd, *mem_obj); >> +fail_vidmem_alloc: >> + return retval; >> +} >> + >> +void kfd_vidmem_free_unmap(struct kfd_dev *kfd, kfd_mem_obj mem_obj) >> +{ >> + kfd_vidmem_ungpumap(kfd, mem_obj); >> + kfd_vidmem_unkmap(kfd, mem_obj); >> + kfd_vidmem_free(kfd, mem_obj); >> +} >> -- >> 1.9.1 >> _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel