Thank you for the review Andrzej. I'll update the patch to v2 shortly, it should cover all your comments. FYI, I'll be on holiday until September 16 so I might not be able to respond in the following days. Regards, Szymon On 03.09.2019 15:19, Andrzej Hajda wrote: > +CC: $(./script/get_maintainers.pl drivers/gpu/drm/vgem/vgem_drv.c) > > > On 20.08.2019 08:58, Szymon Andrzejuk wrote: >> Page fault handler inside vgem driver now preallocates pages in advance >> when fault occurs for the first time. Pages can be allocated in >> direction of increasing/decreasing addresses, depending on memory access >> profile. In case of random access no preallocation occurs. >> >> Synthetic benchmark showed over 8x bandwidth increase when copying data >> from mmapped vgem buffer with memcpy and ~160 times when accessing mapped >> buffer sequentially. Compiled with gcc 8.2.0 with -O2 flag. >> Unigine Heaven running on custom virgl vtest virtual GPU with vgem buffers >> sees ~17% FPS increase. >> >> This performance increase only occurs when accessing vgem buffer mapped >> using DRM_IOCTL_MODE_MAP_DUMB ioctl. When accessing buffer imported >> from prime fd the vgem page fault handler is not invoked. It's advised >> to use vector streaming copy instructions and avoid sequential accesses >> in this case. Streaming copy brings the performance to be on par with >> similar buffer allocated with memfd_create(2) syscall. >> >> Signed-off-by: Szymon Andrzejuk <s.andrzejuk@xxxxxxxxxxx> >> --- >> drivers/gpu/drm/vgem/vgem_drv.c | 177 ++++++++++++++++++++++++++------ >> 1 file changed, 143 insertions(+), 34 deletions(-) >> >> diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c >> index 11a8f99ba18c..739ba841e89c 100644 >> --- a/drivers/gpu/drm/vgem/vgem_drv.c >> +++ b/drivers/gpu/drm/vgem/vgem_drv.c >> @@ -34,6 +34,7 @@ >> #include <linux/ramfs.h> >> #include <linux/shmem_fs.h> >> #include <linux/dma-buf.h> >> +#include <linux/pfn_t.h> >> #include "vgem_drv.h" >> >> #define DRIVER_NAME "vgem" >> @@ -50,8 +51,21 @@ static struct vgem_device { >> static void vgem_gem_free_object(struct drm_gem_object *obj) >> { >> struct drm_vgem_gem_object *vgem_obj = to_vgem_bo(obj); >> + int i; >> + >> + mutex_lock(&vgem_obj->pages_lock); >> + if (vgem_obj->pages) { >> + int num_pages = obj->size >> PAGE_SHIFT; >> + >> + for (i = 0; i < num_pages; i++) { >> + if (vgem_obj->pages[i]) >> + put_page(vgem_obj->pages[i]); >> + } >> + kvfree(vgem_obj->pages); >> + vgem_obj->pages = NULL; >> + } >> + mutex_unlock(&vgem_obj->pages_lock); >> >> - kvfree(vgem_obj->pages); >> mutex_destroy(&vgem_obj->pages_lock); >> >> if (obj->import_attach) >> @@ -61,6 +75,72 @@ static void vgem_gem_free_object(struct drm_gem_object *obj) >> kfree(vgem_obj); >> } >> >> +static int __vgem_alloc_page(struct page *page, struct vm_area_struct *vma, >> + unsigned long vaddr, int page_num) > > unused page_num > > >> +{ >> + unsigned long pfn; >> + int insert_ret; >> + >> + pfn = page_to_pfn(page); >> + insert_ret = vmf_insert_mixed(vma, vaddr, __pfn_to_pfn_t(pfn, PFN_DEV)); >> + >> + if (insert_ret & VM_FAULT_ERROR) >> + return VM_FAULT_ERROR; > > Is it OK to return mask? instead of insert_ret. > > >> + >> + return 0; >> +} >> + >> +static int __vgem_read_mapping_page(struct drm_vgem_gem_object *obj, >> + int page_num, struct page **page) >> +{ >> + int ret; >> + struct page *mapped_page; >> + >> + mapped_page = shmem_read_mapping_page(file_inode(obj->base.filp)->i_mapping, >> + page_num); >> + if (IS_ERR(page)) { >> + switch (PTR_ERR(page)) { >> + case -ENOSPC: >> + case -ENOMEM: >> + ret = VM_FAULT_OOM; >> + break; >> + case -EBUSY: >> + ret = VM_FAULT_RETRY; >> + break; >> + case -EFAULT: >> + case -EINVAL: >> + ret = VM_FAULT_SIGBUS; >> + break; >> + default: >> + WARN_ON(PTR_ERR(page)); >> + ret = VM_FAULT_SIGBUS; >> + break; >> + } >> + >> + return ret; >> + } >> + >> + *page = mapped_page; >> + return 0; >> +} >> + >> +static int __vgem_prepare_single_page(struct drm_vgem_gem_object *obj, >> + struct vm_area_struct *vma, >> + int page_num, unsigned long vaddr) >> +{ >> + int ret; >> + >> + ret = __vgem_read_mapping_page(obj, page_num, &obj->pages[page_num]); >> + if (ret) >> + return ret; >> + >> + ret = __vgem_alloc_page(obj->pages[page_num], vma, vaddr, page_num); >> + if (ret) >> + return ret; > > One can use return __vgem_alloc_page(...), up to you. > > >> + >> + return 0; >> +} >> + >> static vm_fault_t vgem_gem_fault(struct vm_fault *vmf) >> { >> struct vm_area_struct *vma = vmf->vma; >> @@ -70,6 +150,8 @@ static vm_fault_t vgem_gem_fault(struct vm_fault *vmf) >> vm_fault_t ret = VM_FAULT_SIGBUS; >> loff_t num_pages; >> pgoff_t page_offset; >> + int page_num, page_prep_ret; >> + const int PREFAULT_PAGES = 8; >> page_offset = (vaddr - vma->vm_start) >> PAGE_SHIFT; >> >> num_pages = DIV_ROUND_UP(obj->base.size, PAGE_SIZE); >> @@ -77,41 +159,60 @@ static vm_fault_t vgem_gem_fault(struct vm_fault *vmf) >> if (page_offset >= num_pages) >> return VM_FAULT_SIGBUS; >> >> + ret = VM_FAULT_NOPAGE; >> + >> mutex_lock(&obj->pages_lock); >> - if (obj->pages) { >> - get_page(obj->pages[page_offset]); >> - vmf->page = obj->pages[page_offset]; >> - ret = 0; >> - } >> - mutex_unlock(&obj->pages_lock); >> - if (ret) { >> - struct page *page; >> - >> - page = shmem_read_mapping_page( >> - file_inode(obj->base.filp)->i_mapping, >> - page_offset); >> - if (!IS_ERR(page)) { >> - vmf->page = page; >> - ret = 0; >> - } else switch (PTR_ERR(page)) { >> - case -ENOSPC: >> - case -ENOMEM: >> - ret = VM_FAULT_OOM; >> - break; >> - case -EBUSY: >> - ret = VM_FAULT_RETRY; >> - break; >> - case -EFAULT: >> - case -EINVAL: >> - ret = VM_FAULT_SIGBUS; >> - break; >> - default: >> - WARN_ON(PTR_ERR(page)); >> - ret = VM_FAULT_SIGBUS; >> - break; >> + >> + if (num_pages > 1) { >> + bool forward = true; >> + bool random = false; >> + >> + // Determine prefaulting direction. If adjacent pages are both >> + // allocated/not allocated then we have random access. >> + // Always try to prefault on first and last page. >> + if (page_offset != 0 && page_offset != num_pages - 1) { >> + struct page *next, *prev; >> + >> + next = obj->pages[page_offset + 1]; >> + prev = obj->pages[page_offset - 1]; >> + if (!((uintptr_t)next ^ (uintptr_t)prev)) >> + random = true; >> + else if (!prev) >> + forward = false; >> + } else { >> + forward = (page_offset == 0); >> } > > Quite complicated, maybe sth like this: > > bool next, prev; > > next = obj->pages[page_offset + 1]; > > prev = obj->pages[page_offset - 1]; > > if (prev == next) > > random = true; > > >> >> + if (!random) { >> + for (page_num = page_offset; >> + forward ? page_num < page_offset + PREFAULT_PAGES && page_num < num_pages : >> + page_offset - page_num < PREFAULT_PAGES && page_num >= 0; >> + forward ? page_num++ : page_num--) { > > Again complicated, try pre-calculate boundaries and: > > start_page = ...; > > end_page = ...; > > step = forward ? 1 : -1; > > for (page_num = start_page; page_num < end_page; page_num += step) > > ... > > >> + if (!obj->pages[page_num]) { >> + page_prep_ret = __vgem_prepare_single_page(obj, vma, page_num, vaddr); >> + if (page_prep_ret) { >> + ret = page_prep_ret; >> + break; >> + } >> + } else { >> + // random access, exit loop >> + break; >> + } >> + >> + vaddr = vaddr + (forward ? 1 : -1) * PAGE_SIZE; > > vaddr += step * PAGE_SIZE; > > > Regards > > Andrzej > > >> + } >> + } else { >> + page_prep_ret = __vgem_prepare_single_page(obj, vma, page_offset, vaddr); >> + if (page_prep_ret) >> + ret = page_prep_ret; >> + } >> + } else { >> + page_prep_ret = __vgem_prepare_single_page(obj, vma, page_offset, vaddr); >> + if (page_prep_ret) >> + ret = page_prep_ret; >> } >> + >> + mutex_unlock(&obj->pages_lock); >> return ret; >> } >> >> @@ -182,7 +283,7 @@ static struct drm_gem_object *vgem_gem_create(struct drm_device *dev, >> unsigned long size) >> { >> struct drm_vgem_gem_object *obj; >> - int ret; >> + int ret, num_pages; >> >> obj = __vgem_gem_create(dev, size); >> if (IS_ERR(obj)) >> @@ -193,6 +294,13 @@ static struct drm_gem_object *vgem_gem_create(struct drm_device *dev, >> if (ret) >> return ERR_PTR(ret); >> >> + mutex_lock(&obj->pages_lock); >> + >> + num_pages = obj->base.size >> PAGE_SHIFT; >> + obj->pages = kvcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); >> + >> + mutex_unlock(&obj->pages_lock); >> + >> return &obj->base; >> } >> >> @@ -262,7 +370,8 @@ static int vgem_mmap(struct file *filp, struct vm_area_struct *vma) >> /* Keep the WC mmaping set by drm_gem_mmap() but our pages >> * are ordinary and not special. >> */ >> - vma->vm_flags = flags | VM_DONTEXPAND | VM_DONTDUMP; >> + vma->vm_flags = flags | VM_DONTEXPAND | VM_DONTDUMP | VM_MIXEDMAP; >> + >> return 0; >> } >> >> >> >> _______________________________________________ >> dri-devel mailing list >> dri-devel@xxxxxxxxxxxxxxxxxxxxx >> https://lists.freedesktop.org/mailman/listinfo/dri-devel > > > _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel