Re: [PATCH V3 1/1] ublk: add io_uring based userspace block driver

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2022/6/29 00:08, Ming Lei wrote:

[...]

> +#define UBLK_MAX_PIN_PAGES	32
> +
> +static inline void ublk_release_pages(struct ublk_queue *ubq, struct page **pages,
> +		int nr_pages)
> +{
> +	int i;
> +
> +	for (i = 0; i < nr_pages; i++)
> +		put_page(pages[i]);
> +}
> +
> +static inline int ublk_pin_user_pages(struct ublk_queue *ubq, u64 start_vm,
> +		unsigned int nr_pages, unsigned int gup_flags,
> +		struct page **pages)
> +{
> +	return get_user_pages_fast(start_vm, nr_pages, gup_flags, pages);
> +}

> +
> +static inline unsigned ublk_copy_bv(struct bio_vec *bv, void **bv_addr,
> +		void *pg_addr, unsigned int *pg_off,
> +		unsigned int *pg_len, bool to_bv)
> +{
> +	unsigned len = min_t(unsigned, bv->bv_len, *pg_len);
> +
> +	if (*bv_addr == NULL)
> +		*bv_addr = kmap_local_page(bv->bv_page);
> +
> +	if (to_bv)
> +		memcpy(*bv_addr + bv->bv_offset, pg_addr + *pg_off, len);
> +	else
> +		memcpy(pg_addr + *pg_off, *bv_addr + bv->bv_offset, len);
> +
> +	bv->bv_offset += len;
> +	bv->bv_len -= len;
> +	*pg_off += len;
> +	*pg_len -= len;
> +
> +	if (!bv->bv_len) {
> +		kunmap_local(*bv_addr);
> +		*bv_addr = NULL;
> +	}
> +
> +	return len;
> +}
> +
> +/* copy rq pages to ublksrv vm address pointed by io->addr */
> +static int ublk_copy_pages(struct ublk_queue *ubq, struct request *rq, bool to_rq,
> +		unsigned int max_bytes)
> +{
> +	unsigned int gup_flags = to_rq ? 0 : FOLL_WRITE;
> +	struct ublk_io *io = &ubq->ios[rq->tag];
> +	struct page *pgs[UBLK_MAX_PIN_PAGES];
> +	struct req_iterator req_iter;
> +	struct bio_vec bv;
> +	const unsigned int rq_bytes = min(blk_rq_bytes(rq), max_bytes);
> +	unsigned long start = io->addr, left = rq_bytes;
> +	unsigned int idx = 0, pg_len = 0, pg_off = 0;
> +	int nr_pin = 0;
> +	void *pg_addr = NULL;
> +	struct page *curr = NULL;
> +
> +	rq_for_each_segment(bv, rq, req_iter) {
> +		unsigned len, bv_off = bv.bv_offset, bv_len = bv.bv_len;
> +		void *bv_addr = NULL;
> +
> +refill:
> +		if (pg_len == 0) {
> +			unsigned int off = 0;
> +
> +			if (pg_addr) {
> +				kunmap_local(pg_addr);
> +				if (!to_rq)
> +					set_page_dirty_lock(curr);
> +				pg_addr = NULL;
> +			}
> +
> +			/* refill pages */
> +			if (idx >= nr_pin) {
> +				unsigned int max_pages;
> +
> +				ublk_release_pages(ubq, pgs, nr_pin);
> +
> +				off = start & (PAGE_SIZE - 1);
> +				max_pages = min_t(unsigned, (off + left +
> +						PAGE_SIZE - 1) >> PAGE_SHIFT,
> +						UBLK_MAX_PIN_PAGES);
> +				nr_pin = ublk_pin_user_pages(ubq, start,
> +						max_pages, gup_flags, pgs);
> +				if (nr_pin < 0)
> +					goto exit;
> +				idx = 0;
> +			}
> +			pg_off = off;
> +			pg_len = min(PAGE_SIZE - off, left);
> +			off = 0;
> +			curr = pgs[idx++];
> +			pg_addr = kmap_local_page(curr);
> +		}
> +
> +		len = ublk_copy_bv(&bv, &bv_addr, pg_addr, &pg_off, &pg_len,
> +				to_rq);
> +		/* either one of the two has been consumed */
> +		WARN_ON_ONCE(bv.bv_len && pg_len);
> +		start += len;
> +		left -= len;
> +
> +		/* overflow */
> +		WARN_ON_ONCE(left > rq_bytes);
> +		WARN_ON_ONCE(bv.bv_len > bv_len);
> +		if (bv.bv_len)
> +			goto refill;
> +
> +		bv.bv_len = bv_len;
> +		bv.bv_offset = bv_off;
> +	}
> +	if (pg_addr) {
> +		kunmap_local(pg_addr);
> +		if (!to_rq)
> +			set_page_dirty_lock(curr);
> +	}
> +	ublk_release_pages(ubq, pgs, nr_pin);
> +
> +exit:
> +	return rq_bytes - left;
> +}
> +

Hi Ming, 

I note that you pin the user buffer's pages, memcpy() and release them immediately.

1) I think maybe copy_page_from_iter() is another choice for copying user buffer to biovecs
   since copy_page_from_iter() do not pin pages(But it may raise page fault).

2) Or will you design some mechanism such as LRU to manage these pinned pages? 
   For example pin those pages frequently required for a long time and release
   those pages not used for a long time.
   I remember you have talked about this LRU on pinned pages?

Which one do you think is better? copy_page_from_iter() or pin pages with LRU?
Maybe it depends on the user's workload?

Regards,
Zhang



[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux