Re: [PATCH v4 4/4] mm: vmalloc: convert vread() to vread_iter()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Mar 22, 2023 at 08:55:23PM +0800, Baoquan He wrote:
> On 03/21/23 at 08:54pm, Lorenzo Stoakes wrote:
> ......
> > Additionally, we must account for the fact that at any point a copy may
> > fail if this happens, we exit indicating fewer bytes retrieved than
> > expected.
> ......
> > -static int aligned_vread(char *buf, char *addr, unsigned long count)
> > +/*
> > + * small helper routine, copy contents to iter from addr.
> > + * If the page is not present, fill zero.
> > + *
> > + * Returns the number of copied bytes.
> > + */
> > +static size_t aligned_vread_iter(struct iov_iter *iter,
> > +				 const char *addr, size_t count)
> >  {
> > -	struct page *p;
> > -	int copied = 0;
> > +	size_t remains = count;
> > +	struct page *page;
> >
> > -	while (count) {
> > +	while (remains > 0) {
> >  		unsigned long offset, length;
> > +		size_t copied = 0;
> >
> >  		offset = offset_in_page(addr);
> >  		length = PAGE_SIZE - offset;
> > -		if (length > count)
> > -			length = count;
> > -		p = vmalloc_to_page(addr);
> > +		if (length > remains)
> > +			length = remains;
> > +		page = vmalloc_to_page(addr);
> >  		/*
> > -		 * To do safe access to this _mapped_ area, we need
> > -		 * lock. But adding lock here means that we need to add
> > -		 * overhead of vmalloc()/vfree() calls for this _debug_
> > -		 * interface, rarely used. Instead of that, we'll use
> > -		 * kmap() and get small overhead in this access function.
> > +		 * To do safe access to this _mapped_ area, we need lock. But
> > +		 * adding lock here means that we need to add overhead of
> > +		 * vmalloc()/vfree() calls for this _debug_ interface, rarely
> > +		 * used. Instead of that, we'll use an local mapping via
> > +		 * copy_page_to_iter_atomic() and accept a small overhead in
> > +		 * this access function.
> >  		 */
> > -		if (p) {
> > -			/* We can expect USER0 is not used -- see vread() */
> > -			void *map = kmap_atomic(p);
> > -			memcpy(buf, map + offset, length);
> > -			kunmap_atomic(map);
> > -		} else
> > -			memset(buf, 0, length);
> > +		if (page)
> > +			copied = copy_page_to_iter_atomic(page, offset, length,
> > +							  iter);
> > +
>
> If we decided to quit at any failing copy point, indicating fewer bytes
> retrieved than expected, wondering why we don't quit here if
> copy_page_to_iter_atomic() failed?
>
> The original zero filling is for unmapped vmalloc area, but not for
> reading area if failed. Not sure if I got your point.
>

Great spot, I will move this zeroing to an else branch.

> > +		/* Zero anything we were unable to copy. */
> > +		copied += zero_iter(iter, length - copied);
> > +
> > +		addr += copied;
> > +		remains -= copied;
> >
> > -		addr += length;
> > -		buf += length;
> > -		copied += length;
> > -		count -= length;
> > +		if (copied != length)
> > +			break;
> >  	}
> > -	return copied;
> > +
> > +	return count - remains;
> >  }
> >
> > -static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags)
> > +/*
> > + * Read from a vm_map_ram region of memory.
> > + *
> > + * Returns the number of copied bytes.
> > + */
> > +static size_t vmap_ram_vread_iter(struct iov_iter *iter, const char *addr,
> > +				  size_t count, unsigned long flags)
> >  {
> >  	char *start;
> >  	struct vmap_block *vb;
> >  	unsigned long offset;
> > -	unsigned int rs, re, n;
> > +	unsigned int rs, re;
> > +	size_t remains, n;
> >
> >  	/*
> >  	 * If it's area created by vm_map_ram() interface directly, but
> >  	 * not further subdividing and delegating management to vmap_block,
> >  	 * handle it here.
> >  	 */
> > -	if (!(flags & VMAP_BLOCK)) {
> > -		aligned_vread(buf, addr, count);
> > -		return;
> > -	}
> > +	if (!(flags & VMAP_BLOCK))
> > +		return aligned_vread_iter(iter, addr, count);
> >
> >  	/*
> >  	 * Area is split into regions and tracked with vmap_block, read out
> > @@ -3505,50 +3537,65 @@ static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags
> >  	 */
> >  	vb = xa_load(&vmap_blocks, addr_to_vb_idx((unsigned long)addr));
> >  	if (!vb)
> > -		goto finished;
> > +		goto finished_zero;
> >
> >  	spin_lock(&vb->lock);
> >  	if (bitmap_empty(vb->used_map, VMAP_BBMAP_BITS)) {
> >  		spin_unlock(&vb->lock);
> > -		goto finished;
> > +		goto finished_zero;
> >  	}
> > +
> > +	remains = count;
> >  	for_each_set_bitrange(rs, re, vb->used_map, VMAP_BBMAP_BITS) {
> > -		if (!count)
> > -			break;
> > +		size_t copied;
> > +
> > +		if (remains == 0)
> > +			goto finished;
> > +
> >  		start = vmap_block_vaddr(vb->va->va_start, rs);
> > -		while (addr < start) {
> > -			if (count == 0)
> > -				goto unlock;
> > -			*buf = '\0';
> > -			buf++;
> > -			addr++;
> > -			count--;
> > +
> > +		if (addr < start) {
> > +			size_t to_zero = min_t(size_t, start - addr, remains);
> > +			size_t zeroed = zero_iter(iter, to_zero);
> > +
> > +			addr += zeroed;
> > +			remains -= zeroed;
> > +
> > +			if (remains == 0 || zeroed != to_zero)
> > +				goto finished;
> >  		}
> > +
> >  		/*it could start reading from the middle of used region*/
> >  		offset = offset_in_page(addr);
> >  		n = ((re - rs + 1) << PAGE_SHIFT) - offset;
> > -		if (n > count)
> > -			n = count;
> > -		aligned_vread(buf, start+offset, n);
> > +		if (n > remains)
> > +			n = remains;
> > +
> > +		copied = aligned_vread_iter(iter, start + offset, n);
> >
> > -		buf += n;
> > -		addr += n;
> > -		count -= n;
> > +		addr += copied;
> > +		remains -= copied;
> > +
> > +		if (copied != n)
> > +			goto finished;
> >  	}
> > -unlock:
> > +
> >  	spin_unlock(&vb->lock);
> >
> > -finished:
> > +finished_zero:
> >  	/* zero-fill the left dirty or free regions */
> > -	if (count)
> > -		memset(buf, 0, count);
> > +	return count - remains + zero_iter(iter, remains);
>
> When it jumps to finished_zero, local varialble 'remains' is still 0, we
> do nothing here but return count. It doesn't look so right. You may need
> to move up the 'remains' assignment as 'count' line.
>

Another great spot, will fix.

> > +finished:
> > +	/* We couldn't copy/zero everything */
> > +	spin_unlock(&vb->lock);
> > +	return count - remains;
> >  }
> >
> >  /**
> > - * vread() - read vmalloc area in a safe way.
> > - * @buf:     buffer for reading data
> > - * @addr:    vm address.
> > - * @count:   number of bytes to be read.
> > + * vread_iter() - read vmalloc area in a safe way to an iterator.
> > + * @iter:         the iterator to which data should be written.
> > + * @addr:         vm address.
> > + * @count:        number of bytes to be read.
> >   *
> >   * This function checks that addr is a valid vmalloc'ed area, and
> >   * copy data from that area to a given buffer. If the given memory range
> > @@ -3568,13 +3615,12 @@ static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags
> >   * (same number as @count) or %0 if [addr...addr+count) doesn't
> >   * include any intersection with valid vmalloc area
> >   */
> > -long vread(char *buf, char *addr, unsigned long count)
> > +long vread_iter(struct iov_iter *iter, const char *addr, size_t count)
> >  {
> >  	struct vmap_area *va;
> >  	struct vm_struct *vm;
> > -	char *vaddr, *buf_start = buf;
> > -	unsigned long buflen = count;
> > -	unsigned long n, size, flags;
> > +	char *vaddr;
> > +	size_t n, size, flags, remains;
> >
> >  	addr = kasan_reset_tag(addr);
> >
> > @@ -3582,18 +3628,22 @@ long vread(char *buf, char *addr, unsigned long count)
> >  	if ((unsigned long) addr + count < count)
> >  		count = -(unsigned long) addr;
> >
> > +	remains = count;
> > +
> >  	spin_lock(&vmap_area_lock);
> >  	va = find_vmap_area_exceed_addr((unsigned long)addr);
> >  	if (!va)
> > -		goto finished;
> > +		goto finished_zero;
> >
> >  	/* no intersects with alive vmap_area */
> > -	if ((unsigned long)addr + count <= va->va_start)
> > -		goto finished;
> > +	if ((unsigned long)addr + remains <= va->va_start)
> > +		goto finished_zero;
> >
> >  	list_for_each_entry_from(va, &vmap_area_list, list) {
> > -		if (!count)
> > -			break;
> > +		size_t copied;
> > +
> > +		if (remains == 0)
> > +			goto finished;
> >
> >  		vm = va->vm;
> >  		flags = va->flags & VMAP_FLAGS_MASK;
> > @@ -3608,6 +3658,7 @@ long vread(char *buf, char *addr, unsigned long count)
> >
> >  		if (vm && (vm->flags & VM_UNINITIALIZED))
> >  			continue;
> > +
> >  		/* Pair with smp_wmb() in clear_vm_uninitialized_flag() */
> >  		smp_rmb();
> >
> > @@ -3616,38 +3667,45 @@ long vread(char *buf, char *addr, unsigned long count)
> >
> >  		if (addr >= vaddr + size)
> >  			continue;
> > -		while (addr < vaddr) {
> > -			if (count == 0)
> > +
> > +		if (addr < vaddr) {
> > +			size_t to_zero = min_t(size_t, vaddr - addr, remains);
> > +			size_t zeroed = zero_iter(iter, to_zero);
> > +
> > +			addr += zeroed;
> > +			remains -= zeroed;
> > +
> > +			if (remains == 0 || zeroed != to_zero)
> >  				goto finished;
> > -			*buf = '\0';
> > -			buf++;
> > -			addr++;
> > -			count--;
> >  		}
> > +
> >  		n = vaddr + size - addr;
> > -		if (n > count)
> > -			n = count;
> > +		if (n > remains)
> > +			n = remains;
> >
> >  		if (flags & VMAP_RAM)
> > -			vmap_ram_vread(buf, addr, n, flags);
> > +			copied = vmap_ram_vread_iter(iter, addr, n, flags);
> >  		else if (!(vm->flags & VM_IOREMAP))
> > -			aligned_vread(buf, addr, n);
> > +			copied = aligned_vread_iter(iter, addr, n);
> >  		else /* IOREMAP area is treated as memory hole */
> > -			memset(buf, 0, n);
> > -		buf += n;
> > -		addr += n;
> > -		count -= n;
> > +			copied = zero_iter(iter, n);
> > +
> > +		addr += copied;
> > +		remains -= copied;
> > +
> > +		if (copied != n)
> > +			goto finished;
> >  	}
> > -finished:
> > -	spin_unlock(&vmap_area_lock);
> >
> > -	if (buf == buf_start)
> > -		return 0;
> > +finished_zero:
> > +	spin_unlock(&vmap_area_lock);
> >  	/* zero-fill memory holes */
> > -	if (buf != buf_start + buflen)
> > -		memset(buf, 0, buflen - (buf - buf_start));
> > +	return count - remains + zero_iter(iter, remains);
> > +finished:
> > +	/* Nothing remains, or We couldn't copy/zero everything. */
>                               ~~~ typo, s/We/we/
> > +	spin_unlock(&vmap_area_lock);
> >
> > -	return buflen;
> > +	return count - remains;
> >  }
> >
> >  /**
> > --
> > 2.39.2
> >
>



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux