Note that the crypto block may be smaller than a page, but the reverse
cannot be true.
Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx>
---
fs/ceph/file.c | 94 ++++++++++++++++++++++++++++++++++++--------------
1 file changed, 69 insertions(+), 25 deletions(-)
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 41766b2012e9..b4f2fcd33837 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -926,9 +926,17 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
bool more;
int idx;
size_t left;
+ u64 read_off = off;
+ u64 read_len = len;
+
+ /* determine new offset/length if encrypted */
+ fscrypt_adjust_off_and_len(inode, &read_off, &read_len);
+
+ dout("sync_read orig %llu~%llu reading %llu~%llu",
+ off, len, read_off, read_len);
req = ceph_osdc_new_request(osdc, &ci->i_layout,
- ci->i_vino, off, &len, 0, 1,
+ ci->i_vino, read_off, &read_len, 0, 1,
CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
NULL, ci->i_truncate_seq,
ci->i_truncate_size, false);
@@ -937,10 +945,13 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
break;
}
+ /* adjust len downward if the request truncated the len */
+ if (off + len > read_off + read_len)
+ len = read_off + read_len - off;
more = len < iov_iter_count(to);
- num_pages = calc_pages_for(off, len);
- page_off = off & ~PAGE_MASK;
+ num_pages = calc_pages_for(read_off, read_len);
+ page_off = offset_in_page(off);
pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
if (IS_ERR(pages)) {
ceph_osdc_put_request(req);
@@ -948,7 +959,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
break;
}
- osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_off,
+ osd_req_op_extent_osd_data_pages(req, 0, pages, read_len,
+ offset_in_page(read_off),
false, false);
ret = ceph_osdc_start_request(osdc, req, false);
if (!ret)
@@ -957,23 +969,50 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
ceph_update_read_metrics(&fsc->mdsc->metric,
req->r_start_latency,
req->r_end_latency,
- len, ret);
+ read_len, ret);
if (ret > 0)
objver = req->r_version;
ceph_osdc_put_request(req);
-
i_size = i_size_read(inode);
dout("sync_read %llu~%llu got %zd i_size %llu%s\n",
off, len, ret, i_size, (more ? " MORE" : ""));
- if (ret == -ENOENT)
+ if (ret == -ENOENT) {
+ /* No object? Then this is a hole */
ret = 0;
+ } else if (ret > 0 && IS_ENCRYPTED(inode)) {
+ int fret;
+
+ fret = ceph_fscrypt_decrypt_pages(inode, pages, read_off, ret);
+ if (fret < 0) {
+ ceph_release_page_vector(pages, num_pages);
+ ret = fret;
+ break;
+ }
+
+ dout("sync_read decrypted fret %d\n", fret);
+
+ /* account for any partial block at the beginning */
+ fret -= (off - read_off);
+
+ /*
+ * Short read after big offset adjustment?
+ * Nothing is usable, just call it a zero
+ * len read.
+ */
+ fret = max(fret, 0);
+
+ /* account for partial block at the end */
+ ret = min_t(ssize_t, fret, len);
+ }
+
+ /* Short read but not EOF? Zero out the remainder. */
if (ret >= 0 && ret < len && (off + ret < i_size)) {
int zlen = min(len - ret, i_size - off - ret);
int zoff = page_off + ret;
dout("sync_read zero gap %llu~%llu\n",
- off + ret, off + ret + zlen);
+ off + ret, off + ret + zlen);
ceph_zero_page_vector_range(zoff, zlen, pages);
ret += zlen;
}
@@ -981,15 +1020,15 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
idx = 0;
left = ret > 0 ? ret : 0;
while (left > 0) {
- size_t len, copied;
- page_off = off & ~PAGE_MASK;
- len = min_t(size_t, left, PAGE_SIZE - page_off);
+ size_t plen, copied;
+ plen = min_t(size_t, left, PAGE_SIZE - page_off);
SetPageUptodate(pages[idx]);
copied = copy_page_to_iter(pages[idx++],
- page_off, len, to);
+ page_off, plen, to);
off += copied;
left -= copied;
- if (copied < len) {
+ page_off = 0;
+ if (copied < plen) {
ret = -EFAULT;
break;
}
@@ -1006,20 +1045,21 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
break;
}
- if (off > *ki_pos) {
- if (off >= i_size) {
- *retry_op = CHECK_EOF;
- ret = i_size - *ki_pos;
- *ki_pos = i_size;
- } else {
- ret = off - *ki_pos;
- *ki_pos = off;
+ if (ret > 0) {
+ if (off > *ki_pos) {
+ if (off >= i_size) {
+ *retry_op = CHECK_EOF;
+ ret = i_size - *ki_pos;
+ *ki_pos = i_size;
+ } else {
+ ret = off - *ki_pos;
+ *ki_pos = off;
+ }
}
- }
-
- if (last_objver && ret > 0)
- *last_objver = objver;
+ if (last_objver)
+ *last_objver = objver;
+ }
dout("sync_read result %zd retry_op %d\n", ret, *retry_op);
return ret;
}
@@ -1532,6 +1572,9 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
last = (pos + len) != (write_pos + write_len);
rmw = first || last;
+ dout("sync_write ino %llx %lld~%llu adjusted %lld~%llu -- %srmw\n",
+ ci->i_vino.ino, pos, len, write_pos, write_len, rmw ? "" : "no ");
+