Re: [RFC PATCH v10 44/48] ceph: plumb in decryption during sync reads

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, 2022-01-19 at 13:18 +0800, Xiubo Li wrote:
> On 1/12/22 3:16 AM, Jeff Layton wrote:
> > Note that the crypto block may be smaller than a page, but the reverse
> > cannot be true.
> > 
> > Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx>
> > ---
> >   fs/ceph/file.c | 94 ++++++++++++++++++++++++++++++++++++--------------
> >   1 file changed, 69 insertions(+), 25 deletions(-)
> > 
> > diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> > index 41766b2012e9..b4f2fcd33837 100644
> > --- a/fs/ceph/file.c
> > +++ b/fs/ceph/file.c
> > @@ -926,9 +926,17 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
> >   		bool more;
> >   		int idx;
> >   		size_t left;
> > +		u64 read_off = off;
> > +		u64 read_len = len;
> > +
> > +		/* determine new offset/length if encrypted */
> > +		fscrypt_adjust_off_and_len(inode, &read_off, &read_len);
> > +
> > +		dout("sync_read orig %llu~%llu reading %llu~%llu",
> > +		     off, len, read_off, read_len);
> >   
> >   		req = ceph_osdc_new_request(osdc, &ci->i_layout,
> > -					ci->i_vino, off, &len, 0, 1,
> > +					ci->i_vino, read_off, &read_len, 0, 1,
> >   					CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
> >   					NULL, ci->i_truncate_seq,
> >   					ci->i_truncate_size, false);
> > @@ -937,10 +945,13 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
> >   			break;
> >   		}
> >   
> > +		/* adjust len downward if the request truncated the len */
> > +		if (off + len > read_off + read_len)
> > +			len = read_off + read_len - off;
> >   		more = len < iov_iter_count(to);
> >   
> > -		num_pages = calc_pages_for(off, len);
> > -		page_off = off & ~PAGE_MASK;
> > +		num_pages = calc_pages_for(read_off, read_len);
> > +		page_off = offset_in_page(off);
> >   		pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
> >   		if (IS_ERR(pages)) {
> >   			ceph_osdc_put_request(req);
> > @@ -948,7 +959,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
> >   			break;
> >   		}
> >   
> > -		osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_off,
> > +		osd_req_op_extent_osd_data_pages(req, 0, pages, read_len,
> > +						 offset_in_page(read_off),
> >   						 false, false);
> >   		ret = ceph_osdc_start_request(osdc, req, false);
> >   		if (!ret)
> > @@ -957,23 +969,50 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
> >   		ceph_update_read_metrics(&fsc->mdsc->metric,
> >   					 req->r_start_latency,
> >   					 req->r_end_latency,
> > -					 len, ret);
> > +					 read_len, ret);
> >   
> >   		if (ret > 0)
> >   			objver = req->r_version;
> >   		ceph_osdc_put_request(req);
> > -
> >   		i_size = i_size_read(inode);
> >   		dout("sync_read %llu~%llu got %zd i_size %llu%s\n",
> >   		     off, len, ret, i_size, (more ? " MORE" : ""));
> >   
> > -		if (ret == -ENOENT)
> > +		if (ret == -ENOENT) {
> > +			/* No object? Then this is a hole */
> >   			ret = 0;
> > +		} else if (ret > 0 && IS_ENCRYPTED(inode)) {
> > +			int fret;
> > +
> > +			fret = ceph_fscrypt_decrypt_pages(inode, pages, read_off, ret);
> > +			if (fret < 0) {
> > +				ceph_release_page_vector(pages, num_pages);
> > +				ret = fret;
> > +				break;
> > +			}
> > +
> > +			dout("sync_read decrypted fret %d\n", fret);
> > +
> > +			/* account for any partial block at the beginning */
> > +			fret -= (off - read_off);
> > +
> > +			/*
> > +			 * Short read after big offset adjustment?
> > +			 * Nothing is usable, just call it a zero
> > +			 * len read.
> > +			 */
> > +			fret = max(fret, 0);
> > +
> > +			/* account for partial block at the end */
> > +			ret = min_t(ssize_t, fret, len);
> > +		}
> > +
> > +		/* Short read but not EOF? Zero out the remainder. */
> >   		if (ret >= 0 && ret < len && (off + ret < i_size)) {
> >   			int zlen = min(len - ret, i_size - off - ret);
> >   			int zoff = page_off + ret;
> >   			dout("sync_read zero gap %llu~%llu\n",
> > -                             off + ret, off + ret + zlen);
> > +			     off + ret, off + ret + zlen);
> >   			ceph_zero_page_vector_range(zoff, zlen, pages);
> >   			ret += zlen;
> >   		}
> > @@ -981,15 +1020,15 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
> >   		idx = 0;
> >   		left = ret > 0 ? ret : 0;
> >   		while (left > 0) {
> > -			size_t len, copied;
> > -			page_off = off & ~PAGE_MASK;
> > -			len = min_t(size_t, left, PAGE_SIZE - page_off);
> > +			size_t plen, copied;
> > +			plen = min_t(size_t, left, PAGE_SIZE - page_off);
> >   			SetPageUptodate(pages[idx]);
> >   			copied = copy_page_to_iter(pages[idx++],
> > -						   page_off, len, to);
> > +						   page_off, plen, to);
> >   			off += copied;
> >   			left -= copied;
> > -			if (copied < len) {
> > +			page_off = 0;
> > +			if (copied < plen) {
> >   				ret = -EFAULT;
> >   				break;
> >   			}
> > @@ -1006,20 +1045,21 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
> >   			break;
> >   	}
> >   
> > -	if (off > *ki_pos) {
> > -		if (off >= i_size) {
> > -			*retry_op = CHECK_EOF;
> > -			ret = i_size - *ki_pos;
> > -			*ki_pos = i_size;
> > -		} else {
> > -			ret = off - *ki_pos;
> > -			*ki_pos = off;
> > +	if (ret > 0) {
> > +		if (off > *ki_pos) {
> > +			if (off >= i_size) {
> > +				*retry_op = CHECK_EOF;
> > +				ret = i_size - *ki_pos;
> > +				*ki_pos = i_size;
> > +			} else {
> > +				ret = off - *ki_pos;
> > +				*ki_pos = off;
> > +			}
> >   		}
> > -	}
> > -
> > -	if (last_objver && ret > 0)
> > -		*last_objver = objver;
> >   
> > +		if (last_objver)
> > +			*last_objver = objver;
> > +	}
> >   	dout("sync_read result %zd retry_op %d\n", ret, *retry_op);
> >   	return ret;
> >   }
> > @@ -1532,6 +1572,9 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
> >   		last = (pos + len) != (write_pos + write_len);
> >   		rmw = first || last;
> >   
> > +		dout("sync_write ino %llx %lld~%llu adjusted %lld~%llu -- %srmw\n",
> > +		     ci->i_vino.ino, pos, len, write_pos, write_len, rmw ? "" : "no ");
> > +
> 
> Should this move to the previous patch ?
> 
> 

Yes, fixed in wip-fscrypt. Thanks!

> >   		/*
> >   		 * The data is emplaced into the page as it would be if it were in
> >   		 * an array of pagecache pages.
> > @@ -1761,6 +1804,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
> >   		ceph_clear_error_write(ci);
> >   		pos += len;
> >   		written += len;
> > +		dout("sync_write written %d\n", written);
> >   		if (pos > i_size_read(inode)) {
> >   			check_caps = ceph_inode_set_size(inode, pos);
> >   			if (check_caps)
> 

-- 
Jeff Layton <jlayton@xxxxxxxxxx>



[Index of Archives]     [CEPH Users]     [Ceph Large]     [Ceph Dev]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux