Hi Thomas, a little ramblings below. Just my thoughts while trying to understand the code - especially since I looked at it before. Sam On Mon, Apr 25, 2022 at 01:27:50PM +0200, Thomas Zimmermann wrote: > Store the per-page state for fbdev's deferred I/O in struct > fb_deferred_io_pageref. Maintain a list of pagerefs for the pages > that have to be written back to video memory. Update all affected > drivers. > > As with pages before, fbdev acquires a pageref when an mmaped page > of the framebuffer is being written to. It holds the pageref in a > list of all currently written pagerefs until it flushes the written > pages to video memory. Writeback occurs periodically. After writeback > fbdev releases all pagerefs and builds up a new dirty list until the > next writeback occurs. > > Using pagerefs has a number of benefits. > > For pages of the framebuffer, the deferred I/O code used struct > page.lru as an entry into the list of dirty pages. The lru field is > owned by the page cache, which makes deferred I/O incompatible with > some memory pages (e.g., most notably DRM's GEM SHMEM allocator). > struct fb_deferred_io_pageref now provides an entry into a list of > dirty framebuffer pages, freeing lru for use with the page cache. > > Drivers also assumed that struct page.index is the page offset into > the framebuffer. This is not true for DRM buffers, which are located > at various offset within a mapped area. struct fb_deferred_io_pageref > explicitly stores an offset into the framebuffer. struct page.index > is now only the page offset into the mapped area. > > These changes will allow DRM to use fbdev deferred I/O without an > intermediate shadow buffer. > > v2: > * minor fixes in commit message > > Signed-off-by: Thomas Zimmermann <tzimmermann@xxxxxxx> > Reviewed-by: Javier Martinez Canillas <javierm@xxxxxxxxxx> > --- > diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c > index 02a4a4fa3da3..db02e17e12b6 100644 > --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c > +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c > @@ -322,12 +322,13 @@ static void vmw_deferred_io(struct fb_info *info, > struct vmw_fb_par *par = info->par; > unsigned long start, end, min, max; > unsigned long flags; > - struct page *page; > + struct fb_deferred_io_pageref *pageref; > int y1, y2; > > min = ULONG_MAX; > max = 0; > - list_for_each_entry(page, pagelist, lru) { > + list_for_each_entry(pageref, pagelist, list) { > + struct page *page = pageref->page; > start = page->index << PAGE_SHIFT; > end = start + PAGE_SIZE - 1; > min = min(min, start); This is the same change in all deferred_io drivers like above. We now traverse pageref->list where pagelist points to the head. It took me a while to understand that pagelist points to a list of fb_deferred_io_pageref and not a list of pages. I had been helped, had this been renamed to s/pagelist/pagereflist/. If you see no point in this then just ignore my comment, I have not stared at kernel code for a while and is thus easy to confuse. > diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c > index 6924d489a289..a03b9c64fc61 100644 > --- a/drivers/video/fbdev/core/fb_defio.c > +++ b/drivers/video/fbdev/core/fb_defio.c > @@ -36,6 +36,60 @@ static struct page *fb_deferred_io_page(struct fb_info *info, unsigned long offs > return page; > } > > +static struct fb_deferred_io_pageref *fb_deferred_io_pageref_get(struct fb_info *info, > + unsigned long offset, > + struct page *page) > +{ > + struct fb_deferred_io *fbdefio = info->fbdefio; > + struct list_head *pos = &fbdefio->pagelist; > + unsigned long pgoff = offset >> PAGE_SHIFT; > + struct fb_deferred_io_pageref *pageref, *cur; > + > + if (WARN_ON_ONCE(pgoff >= info->npagerefs)) > + return NULL; /* incorrect allocation size */ > + > + /* 1:1 mapping between pageref and page offset */ > + pageref = &info->pagerefs[pgoff]; > + > + /* > + * This check is to catch the case where a new process could start > + * writing to the same page through a new PTE. This new access > + * can cause a call to .page_mkwrite even if the original process' > + * PTE is marked writable. > + */ > + if (!list_empty(&pageref->list)) > + goto pageref_already_added; > + > + pageref->page = page; > + pageref->offset = pgoff << PAGE_SHIFT; > + > + if (unlikely(fbdefio->sort_pagelist)) { > + /* > + * We loop through the list of pagerefs before adding, in > + * order to keep the pagerefs sorted. This has significant > + * overhead of O(n^2) with n being the number of written > + * pages. If possible, drivers should try to work with > + * unsorted page lists instead. > + */ > + list_for_each_entry(cur, &info->fbdefio->pagelist, list) { > + if (cur > pageref) > + break; > + } > + pos = &cur->list; > + } > + > + list_add_tail(&pageref->list, pos); > + > +pageref_already_added: > + return pageref; > +} > + > +static void fb_deferred_io_pageref_put(struct fb_deferred_io_pageref *pageref, > + struct fb_info *info) > +{ > + list_del_init(&pageref->list); > +} > + > /* this is to find and return the vmalloc-ed fb pages */ > static vm_fault_t fb_deferred_io_fault(struct vm_fault *vmf) > { > @@ -59,7 +113,7 @@ static vm_fault_t fb_deferred_io_fault(struct vm_fault *vmf) > printk(KERN_ERR "no mapping available\n"); > > BUG_ON(!page->mapping); > - page->index = vmf->pgoff; > + page->index = vmf->pgoff; /* for page_mkclean() */ > > vmf->page = page; > return 0; > @@ -95,7 +149,11 @@ static vm_fault_t fb_deferred_io_mkwrite(struct vm_fault *vmf) > struct page *page = vmf->page; > struct fb_info *info = vmf->vma->vm_private_data; > struct fb_deferred_io *fbdefio = info->fbdefio; > - struct list_head *pos = &fbdefio->pagelist; > + struct fb_deferred_io_pageref *pageref; > + unsigned long offset; > + vm_fault_t ret; > + > + offset = (vmf->address - vmf->vma->vm_start); > > /* this is a callback we get when userspace first tries to > write to the page. we schedule a workqueue. that workqueue > @@ -112,6 +170,12 @@ static vm_fault_t fb_deferred_io_mkwrite(struct vm_fault *vmf) > if (fbdefio->first_io && list_empty(&fbdefio->pagelist)) > fbdefio->first_io(info); > > + pageref = fb_deferred_io_pageref_get(info, offset, page); Compared to the old code we now do all the sorting and stuff without the page locked, which seem like a big change. > + if (WARN_ON_ONCE(!pageref)) { > + ret = VM_FAULT_OOM; > + goto err_mutex_unlock; > + } > + > /* > * We want the page to remain locked from ->page_mkwrite until > * the PTE is marked dirty to avoid page_mkclean() being called > @@ -120,47 +184,17 @@ static vm_fault_t fb_deferred_io_mkwrite(struct vm_fault *vmf) > * Do this by locking the page here and informing the caller > * about it with VM_FAULT_LOCKED. > */ > - lock_page(page); > - > - /* > - * This check is to catch the case where a new process could start > - * writing to the same page through a new PTE. This new access > - * can cause a call to .page_mkwrite even if the original process' > - * PTE is marked writable. > - * > - * TODO: The lru field is owned by the page cache; hence the name. > - * We dequeue in fb_deferred_io_work() after flushing the > - * page's content into video memory. Instead of lru, fbdefio > - * should have it's own field. > - */ > - if (!list_empty(&page->lru)) > - goto page_already_added; > - > - if (unlikely(fbdefio->sort_pagelist)) { > - /* > - * We loop through the pagelist before adding in order to > - * keep the pagelist sorted. This has significant overhead > - * of O(n^2) with n being the number of written pages. If > - * possible, drivers should try to work with unsorted page > - * lists instead. > - */ > - struct page *cur; > - > - list_for_each_entry(cur, &fbdefio->pagelist, lru) { > - if (cur->index > page->index) > - break; > - } > - pos = &cur->lru; > - } > - > - list_add_tail(&page->lru, pos); > + lock_page(pageref->page); > > -page_already_added: > mutex_unlock(&fbdefio->lock); > > /* come back after delay to process the deferred IO */ > schedule_delayed_work(&info->deferred_work, fbdefio->delay); > return VM_FAULT_LOCKED; > + > +err_mutex_unlock: > + mutex_unlock(&fbdefio->lock); > + return ret; > } > > static const struct vm_operations_struct fb_deferred_io_vm_ops = { > @@ -186,15 +220,14 @@ EXPORT_SYMBOL_GPL(fb_deferred_io_mmap); > /* workqueue callback */ > static void fb_deferred_io_work(struct work_struct *work) > { > - struct fb_info *info = container_of(work, struct fb_info, > - deferred_work.work); > - struct list_head *node, *next; > - struct page *cur; > + struct fb_info *info = container_of(work, struct fb_info, deferred_work.work); > + struct fb_deferred_io_pageref *pageref, *next; > struct fb_deferred_io *fbdefio = info->fbdefio; > > /* here we mkclean the pages, then do all deferred IO */ > mutex_lock(&fbdefio->lock); > - list_for_each_entry(cur, &fbdefio->pagelist, lru) { > + list_for_each_entry(pageref, &fbdefio->pagelist, list) { > + struct page *cur = pageref->page; > lock_page(cur); > page_mkclean(cur); > unlock_page(cur); > @@ -204,30 +237,48 @@ static void fb_deferred_io_work(struct work_struct *work) > fbdefio->deferred_io(info, &fbdefio->pagelist); > > /* clear the list */ > - list_for_each_safe(node, next, &fbdefio->pagelist) { > - list_del_init(node); > - } > + list_for_each_entry_safe(pageref, next, &fbdefio->pagelist, list) > + fb_deferred_io_pageref_put(pageref, info); > + > mutex_unlock(&fbdefio->lock); > } > > -void fb_deferred_io_init(struct fb_info *info) > +int fb_deferred_io_init(struct fb_info *info) > { > struct fb_deferred_io *fbdefio = info->fbdefio; > - struct page *page; > - unsigned int i; > + struct fb_deferred_io_pageref *pagerefs; > + unsigned long npagerefs, i; > + int ret; > > BUG_ON(!fbdefio); > + > + if (WARN_ON(!info->fix.smem_len)) > + return -EINVAL; > + > mutex_init(&fbdefio->lock); > INIT_DELAYED_WORK(&info->deferred_work, fb_deferred_io_work); > INIT_LIST_HEAD(&fbdefio->pagelist); > if (fbdefio->delay == 0) /* set a default of 1 s */ > fbdefio->delay = HZ; > > - /* initialize all the page lists one time */ > - for (i = 0; i < info->fix.smem_len; i += PAGE_SIZE) { > - page = fb_deferred_io_page(info, i); > - INIT_LIST_HEAD(&page->lru); > + npagerefs = DIV_ROUND_UP(info->fix.smem_len, PAGE_SIZE); > + > + /* alloc a page ref for each page of the display memory */ > + pagerefs = kvcalloc(npagerefs, sizeof(*pagerefs), GFP_KERNEL); > + if (!pagerefs) { > + ret = -ENOMEM; > + goto err; > } > + for (i = 0; i < npagerefs; ++i) > + INIT_LIST_HEAD(&pagerefs[i].list); > + info->npagerefs = npagerefs; > + info->pagerefs = pagerefs; > + > + return 0; > + > +err: > + mutex_destroy(&fbdefio->lock); > + return ret; > } > EXPORT_SYMBOL_GPL(fb_deferred_io_init); > > @@ -254,6 +305,7 @@ void fb_deferred_io_cleanup(struct fb_info *info) > page->mapping = NULL; > } > > + kvfree(info->pagerefs); > mutex_destroy(&fbdefio->lock); > } > EXPORT_SYMBOL_GPL(fb_deferred_io_cleanup); > diff --git a/include/linux/fb.h b/include/linux/fb.h > index f95da1af9ff6..a332590c0fae 100644 > --- a/include/linux/fb.h > +++ b/include/linux/fb.h > @@ -201,6 +201,13 @@ struct fb_pixmap { > }; > > #ifdef CONFIG_FB_DEFERRED_IO > +struct fb_deferred_io_pageref { > + struct page *page; > + unsigned long offset; > + /* private */ > + struct list_head list; This is the list of pages - so this could be named pagelist... > +}; > + > struct fb_deferred_io { > /* delay between mkwrite and deferred handler */ > unsigned long delay; > @@ -468,6 +475,8 @@ struct fb_info { > #endif > #ifdef CONFIG_FB_DEFERRED_IO > struct delayed_work deferred_work; > + unsigned long npagerefs; > + struct fb_deferred_io_pageref *pagerefs; > struct fb_deferred_io *fbdefio; > #endif > > @@ -661,7 +670,7 @@ static inline void __fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, > > /* drivers/video/fb_defio.c */ > int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma); > -extern void fb_deferred_io_init(struct fb_info *info); > +extern int fb_deferred_io_init(struct fb_info *info); > extern void fb_deferred_io_open(struct fb_info *info, > struct inode *inode, > struct file *file); > -- > 2.36.0