Re: [PATCH v2 2/3] fbdev: Track deferred-I/O pages in pageref struct

Sam Ravnborg <sam@xxxxxxxxxxxx> · Mon, 25 Apr 2022 20:17:07 +0200

Hi Thomas,

a little ramblings below. Just my thoughts while trying to understand
the code - especially since I looked at it before.

	Sam

On Mon, Apr 25, 2022 at 01:27:50PM +0200, Thomas Zimmermann wrote:
> Store the per-page state for fbdev's deferred I/O in struct
> fb_deferred_io_pageref. Maintain a list of pagerefs for the pages
> that have to be written back to video memory. Update all affected
> drivers.
> 
> As with pages before, fbdev acquires a pageref when an mmaped page
> of the framebuffer is being written to. It holds the pageref in a
> list of all currently written pagerefs until it flushes the written
> pages to video memory. Writeback occurs periodically. After writeback
> fbdev releases all pagerefs and builds up a new dirty list until the
> next writeback occurs.
> 
> Using pagerefs has a number of benefits.
> 
> For pages of the framebuffer, the deferred I/O code used struct
> page.lru as an entry into the list of dirty pages. The lru field is
> owned by the page cache, which makes deferred I/O incompatible with
> some memory pages (e.g., most notably DRM's GEM SHMEM allocator).
> struct fb_deferred_io_pageref now provides an entry into a list of
> dirty framebuffer pages, freeing lru for use with the page cache.
> 
> Drivers also assumed that struct page.index is the page offset into
> the framebuffer. This is not true for DRM buffers, which are located
> at various offset within a mapped area. struct fb_deferred_io_pageref
> explicitly stores an offset into the framebuffer. struct page.index
> is now only the page offset into the mapped area.
> 
> These changes will allow DRM to use fbdev deferred I/O without an
> intermediate shadow buffer.
> 
> v2:
> 	* minor fixes in commit message
> 
> Signed-off-by: Thomas Zimmermann <tzimmermann@xxxxxxx>
> Reviewed-by: Javier Martinez Canillas <javierm@xxxxxxxxxx>
> ---

> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
> index 02a4a4fa3da3..db02e17e12b6 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
> @@ -322,12 +322,13 @@ static void vmw_deferred_io(struct fb_info *info,
>  	struct vmw_fb_par *par = info->par;
>  	unsigned long start, end, min, max;
>  	unsigned long flags;
> -	struct page *page;
> +	struct fb_deferred_io_pageref *pageref;
>  	int y1, y2;
>  
>  	min = ULONG_MAX;
>  	max = 0;
> -	list_for_each_entry(page, pagelist, lru) {
> +	list_for_each_entry(pageref, pagelist, list) {
> +		struct page *page = pageref->page;
>  		start = page->index << PAGE_SHIFT;
>  		end = start + PAGE_SIZE - 1;
>  		min = min(min, start);

This is the same change in all deferred_io drivers like above.
We now traverse pageref->list where pagelist points to the head.

It took me a while to understand that pagelist points to a list of
fb_deferred_io_pageref and not a list of pages.
I had been helped, had this been renamed to s/pagelist/pagereflist/.

If you see no point in this then just ignore my comment, I have not
stared at kernel code for a while and is thus easy to confuse.

> diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c
> index 6924d489a289..a03b9c64fc61 100644
> --- a/drivers/video/fbdev/core/fb_defio.c
> +++ b/drivers/video/fbdev/core/fb_defio.c
> @@ -36,6 +36,60 @@ static struct page *fb_deferred_io_page(struct fb_info *info, unsigned long offs
>  	return page;
>  }
>  
> +static struct fb_deferred_io_pageref *fb_deferred_io_pageref_get(struct fb_info *info,
> +								 unsigned long offset,
> +								 struct page *page)
> +{
> +	struct fb_deferred_io *fbdefio = info->fbdefio;
> +	struct list_head *pos = &fbdefio->pagelist;
> +	unsigned long pgoff = offset >> PAGE_SHIFT;
> +	struct fb_deferred_io_pageref *pageref, *cur;
> +
> +	if (WARN_ON_ONCE(pgoff >= info->npagerefs))
> +		return NULL; /* incorrect allocation size */
> +
> +	/* 1:1 mapping between pageref and page offset */
> +	pageref = &info->pagerefs[pgoff];
> +
> +	/*
> +	 * This check is to catch the case where a new process could start
> +	 * writing to the same page through a new PTE. This new access
> +	 * can cause a call to .page_mkwrite even if the original process'
> +	 * PTE is marked writable.
> +	 */
> +	if (!list_empty(&pageref->list))
> +		goto pageref_already_added;
> +
> +	pageref->page = page;
> +	pageref->offset = pgoff << PAGE_SHIFT;
> +
> +	if (unlikely(fbdefio->sort_pagelist)) {
> +		/*
> +		 * We loop through the list of pagerefs before adding, in
> +		 * order to keep the pagerefs sorted. This has significant
> +		 * overhead of O(n^2) with n being the number of written
> +		 * pages. If possible, drivers should try to work with
> +		 * unsorted page lists instead.
> +		 */
> +		list_for_each_entry(cur, &info->fbdefio->pagelist, list) {
> +			if (cur > pageref)
> +				break;
> +		}
> +		pos = &cur->list;
> +	}
> +
> +	list_add_tail(&pageref->list, pos);
> +
> +pageref_already_added:
> +	return pageref;
> +}
> +
> +static void fb_deferred_io_pageref_put(struct fb_deferred_io_pageref *pageref,
> +				       struct fb_info *info)
> +{
> +	list_del_init(&pageref->list);
> +}
> +
>  /* this is to find and return the vmalloc-ed fb pages */
>  static vm_fault_t fb_deferred_io_fault(struct vm_fault *vmf)
>  {
> @@ -59,7 +113,7 @@ static vm_fault_t fb_deferred_io_fault(struct vm_fault *vmf)
>  		printk(KERN_ERR "no mapping available\n");
>  
>  	BUG_ON(!page->mapping);
> -	page->index = vmf->pgoff;
> +	page->index = vmf->pgoff; /* for page_mkclean() */
>  
>  	vmf->page = page;
>  	return 0;
> @@ -95,7 +149,11 @@ static vm_fault_t fb_deferred_io_mkwrite(struct vm_fault *vmf)
>  	struct page *page = vmf->page;
>  	struct fb_info *info = vmf->vma->vm_private_data;
>  	struct fb_deferred_io *fbdefio = info->fbdefio;
> -	struct list_head *pos = &fbdefio->pagelist;
> +	struct fb_deferred_io_pageref *pageref;
> +	unsigned long offset;
> +	vm_fault_t ret;
> +
> +	offset = (vmf->address - vmf->vma->vm_start);
>  
>  	/* this is a callback we get when userspace first tries to
>  	write to the page. we schedule a workqueue. that workqueue
> @@ -112,6 +170,12 @@ static vm_fault_t fb_deferred_io_mkwrite(struct vm_fault *vmf)
>  	if (fbdefio->first_io && list_empty(&fbdefio->pagelist))
>  		fbdefio->first_io(info);
>  
> +	pageref = fb_deferred_io_pageref_get(info, offset, page);
Compared to the old code we now do all the sorting and stuff without
the page locked, which seem like a big change.

> +	if (WARN_ON_ONCE(!pageref)) {
> +		ret = VM_FAULT_OOM;
> +		goto err_mutex_unlock;
> +	}
> +
>  	/*
>  	 * We want the page to remain locked from ->page_mkwrite until
>  	 * the PTE is marked dirty to avoid page_mkclean() being called
> @@ -120,47 +184,17 @@ static vm_fault_t fb_deferred_io_mkwrite(struct vm_fault *vmf)
>  	 * Do this by locking the page here and informing the caller
>  	 * about it with VM_FAULT_LOCKED.
>  	 */
> -	lock_page(page);
> -
> -	/*
> -	 * This check is to catch the case where a new process could start
> -	 * writing to the same page through a new PTE. This new access
> -	 * can cause a call to .page_mkwrite even if the original process'
> -	 * PTE is marked writable.
> -	 *
> -	 * TODO: The lru field is owned by the page cache; hence the name.
> -	 *       We dequeue in fb_deferred_io_work() after flushing the
> -	 *       page's content into video memory. Instead of lru, fbdefio
> -	 *       should have it's own field.
> -	 */
> -	if (!list_empty(&page->lru))
> -		goto page_already_added;
> -
> -	if (unlikely(fbdefio->sort_pagelist)) {
> -		/*
> -		 * We loop through the pagelist before adding in order to
> -		 * keep the pagelist sorted. This has significant overhead
> -		 * of O(n^2) with n being the number of written pages. If
> -		 * possible, drivers should try to work with unsorted page
> -		 * lists instead.
> -		 */
> -		struct page *cur;
> -
> -		list_for_each_entry(cur, &fbdefio->pagelist, lru) {
> -			if (cur->index > page->index)
> -				break;
> -		}
> -		pos = &cur->lru;
> -	}
> -
> -	list_add_tail(&page->lru, pos);
> +	lock_page(pageref->page);
>  
> -page_already_added:
>  	mutex_unlock(&fbdefio->lock);
>  
>  	/* come back after delay to process the deferred IO */
>  	schedule_delayed_work(&info->deferred_work, fbdefio->delay);
>  	return VM_FAULT_LOCKED;
> +
> +err_mutex_unlock:
> +	mutex_unlock(&fbdefio->lock);
> +	return ret;
>  }
>  
>  static const struct vm_operations_struct fb_deferred_io_vm_ops = {
> @@ -186,15 +220,14 @@ EXPORT_SYMBOL_GPL(fb_deferred_io_mmap);
>  /* workqueue callback */
>  static void fb_deferred_io_work(struct work_struct *work)
>  {
> -	struct fb_info *info = container_of(work, struct fb_info,
> -						deferred_work.work);
> -	struct list_head *node, *next;
> -	struct page *cur;
> +	struct fb_info *info = container_of(work, struct fb_info, deferred_work.work);
> +	struct fb_deferred_io_pageref *pageref, *next;
>  	struct fb_deferred_io *fbdefio = info->fbdefio;
>  
>  	/* here we mkclean the pages, then do all deferred IO */
>  	mutex_lock(&fbdefio->lock);
> -	list_for_each_entry(cur, &fbdefio->pagelist, lru) {
> +	list_for_each_entry(pageref, &fbdefio->pagelist, list) {
> +		struct page *cur = pageref->page;
>  		lock_page(cur);
>  		page_mkclean(cur);
>  		unlock_page(cur);
> @@ -204,30 +237,48 @@ static void fb_deferred_io_work(struct work_struct *work)
>  	fbdefio->deferred_io(info, &fbdefio->pagelist);
>  
>  	/* clear the list */
> -	list_for_each_safe(node, next, &fbdefio->pagelist) {
> -		list_del_init(node);
> -	}
> +	list_for_each_entry_safe(pageref, next, &fbdefio->pagelist, list)
> +		fb_deferred_io_pageref_put(pageref, info);
> +
>  	mutex_unlock(&fbdefio->lock);
>  }
>  
> -void fb_deferred_io_init(struct fb_info *info)
> +int fb_deferred_io_init(struct fb_info *info)
>  {
>  	struct fb_deferred_io *fbdefio = info->fbdefio;
> -	struct page *page;
> -	unsigned int i;
> +	struct fb_deferred_io_pageref *pagerefs;
> +	unsigned long npagerefs, i;
> +	int ret;
>  
>  	BUG_ON(!fbdefio);
> +
> +	if (WARN_ON(!info->fix.smem_len))
> +		return -EINVAL;
> +
>  	mutex_init(&fbdefio->lock);
>  	INIT_DELAYED_WORK(&info->deferred_work, fb_deferred_io_work);
>  	INIT_LIST_HEAD(&fbdefio->pagelist);
>  	if (fbdefio->delay == 0) /* set a default of 1 s */
>  		fbdefio->delay = HZ;
>  
> -	/* initialize all the page lists one time */
> -	for (i = 0; i < info->fix.smem_len; i += PAGE_SIZE) {
> -		page = fb_deferred_io_page(info, i);
> -		INIT_LIST_HEAD(&page->lru);
> +	npagerefs = DIV_ROUND_UP(info->fix.smem_len, PAGE_SIZE);
> +
> +	/* alloc a page ref for each page of the display memory */
> +	pagerefs = kvcalloc(npagerefs, sizeof(*pagerefs), GFP_KERNEL);
> +	if (!pagerefs) {
> +		ret = -ENOMEM;
> +		goto err;
>  	}
> +	for (i = 0; i < npagerefs; ++i)
> +		INIT_LIST_HEAD(&pagerefs[i].list);
> +	info->npagerefs = npagerefs;
> +	info->pagerefs = pagerefs;
> +
> +	return 0;
> +
> +err:
> +	mutex_destroy(&fbdefio->lock);
> +	return ret;
>  }
>  EXPORT_SYMBOL_GPL(fb_deferred_io_init);
>  
> @@ -254,6 +305,7 @@ void fb_deferred_io_cleanup(struct fb_info *info)
>  		page->mapping = NULL;
>  	}
>  
> +	kvfree(info->pagerefs);
>  	mutex_destroy(&fbdefio->lock);
>  }
>  EXPORT_SYMBOL_GPL(fb_deferred_io_cleanup);

> diff --git a/include/linux/fb.h b/include/linux/fb.h
> index f95da1af9ff6..a332590c0fae 100644
> --- a/include/linux/fb.h
> +++ b/include/linux/fb.h
> @@ -201,6 +201,13 @@ struct fb_pixmap {
>  };
>  
>  #ifdef CONFIG_FB_DEFERRED_IO
> +struct fb_deferred_io_pageref {
> +	struct page *page;
> +	unsigned long offset;
> +	/* private */
> +	struct list_head list;
This is the list of pages - so this could be named pagelist...

> +};
> +
>  struct fb_deferred_io {
>  	/* delay between mkwrite and deferred handler */
>  	unsigned long delay;
> @@ -468,6 +475,8 @@ struct fb_info {
>  #endif
>  #ifdef CONFIG_FB_DEFERRED_IO
>  	struct delayed_work deferred_work;
> +	unsigned long npagerefs;
> +	struct fb_deferred_io_pageref *pagerefs;
>  	struct fb_deferred_io *fbdefio;
>  #endif
>  
> @@ -661,7 +670,7 @@ static inline void __fb_pad_aligned_buffer(u8 *dst, u32 d_pitch,
>  
>  /* drivers/video/fb_defio.c */
>  int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma);
> -extern void fb_deferred_io_init(struct fb_info *info);
> +extern int  fb_deferred_io_init(struct fb_info *info);
>  extern void fb_deferred_io_open(struct fb_info *info,
>  				struct inode *inode,
>  				struct file *file);
> -- 
> 2.36.0