This patch does three things: 1. The allocated pages are bound to the request, simplifying the memory management especially on the bad path. 2. ret is checked at the earliest point instead of being carried through the loop. 3. The overflow bug is fixed. diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 4b8d59ebda00..9522d5218c04 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1066,7 +1066,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, if (ceph_inode_is_shutdown(inode)) return -EIO; - if (!len) + if (!len || !i_size) return 0; /* * flush any page cache pages in this range. this @@ -1086,7 +1086,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, int num_pages; size_t page_off; bool more; - int idx; + int idx = 0; size_t left; struct ceph_osd_req_op *op; u64 read_off = off; @@ -1127,7 +1127,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, osd_req_op_extent_osd_data_pages(req, 0, pages, read_len, offset_in_page(read_off), - false, false); + false, true); op = &req->r_ops[0]; if (sparse) { @@ -1160,7 +1160,15 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, else if (ret == -ENOENT) ret = 0; - if (ret > 0 && IS_ENCRYPTED(inode)) { + if (ret < 0) { + ceph_osdc_put_request(req); + + if (ret == -EBLOCKLISTED) + fsc->blocklisted = true; + break; + } + + if (IS_ENCRYPTED(inode)) { int fret; fret = ceph_fscrypt_decrypt_extents(inode, pages, @@ -1186,10 +1194,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, ret = min_t(ssize_t, fret, len); } - ceph_osdc_put_request(req); - /* Short read but not EOF? Zero out the remainder. */ - if (ret >= 0 && ret < len && (off + ret < i_size)) { + if (ret < len && (off + ret < i_size)) { int zlen = min(len - ret, i_size - off - ret); int zoff = page_off + ret; @@ -1199,13 +1205,11 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, ret += zlen; } - idx = 0; - if (ret <= 0) - left = 0; - else if (off + ret > i_size) - left = i_size - off; + if (off + ret > i_size) + left = (i_size > off) ? i_size - off : 0; else left = ret; + while (left > 0) { size_t plen, copied; @@ -1221,13 +1225,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, break; } } - ceph_release_page_vector(pages, num_pages); - if (ret < 0) { - if (ret == -EBLOCKLISTED) - fsc->blocklisted = true; - break; - } + ceph_osdc_put_request(req); if (off >= i_size || !more) break; On Thu, Nov 28, 2024 at 9:09 PM Alex Markuze <amarkuze@xxxxxxxxxx> wrote: > > Good catch, I'm reworking the ergonomics of this function, this ret > error code is checked and carried through the loop and checked every > other line. > > On Thu, Nov 28, 2024 at 8:53 PM Luis Henriques <luis.henriques@xxxxxxxxx> wrote: > > > > Hi! > > > > On Thu, Nov 28 2024, Alex Markuze wrote: > > > On Thu, Nov 28, 2024 at 7:43 PM Luis Henriques <luis.henriques@xxxxxxxxx> wrote: > > >> > > >> Hi Alex, > > >> > > >> [ Thank you for looking into this. ] > > >> > > >> On Wed, Nov 27 2024, Alex Markuze wrote: > > >> > > >> > Hi, Folks. > > >> > AFAIK there is no side effect that can affect MDS with this fix. > > >> > This crash happens following this patch > > >> > "1065da21e5df9d843d2c5165d5d576be000142a6" "ceph: stop copying to iter > > >> > at EOF on sync reads". > > >> > > > >> > Per your fix Luis, it seems to address only the cases when i_size goes > > >> > to zero but can happen anytime the `i_size` goes below `off`. > > >> > I propose fixing it this way: > > >> > > >> Hmm... you're probably right. I didn't see this happening, but I guess it > > >> could indeed happen. > > >> > > >> > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > > >> > index 4b8d59ebda00..19b084212fee 100644 > > >> > --- a/fs/ceph/file.c > > >> > +++ b/fs/ceph/file.c > > >> > @@ -1066,7 +1066,7 @@ ssize_t __ceph_sync_read(struct inode *inode, > > >> > loff_t *ki_pos, > > >> > if (ceph_inode_is_shutdown(inode)) > > >> > return -EIO; > > >> > > > >> > - if (!len) > > >> > + if (!len || !i_size) > > >> > return 0; > > >> > /* > > >> > * flush any page cache pages in this range. this > > >> > @@ -1200,12 +1200,11 @@ ssize_t __ceph_sync_read(struct inode *inode, > > >> > loff_t *ki_pos, > > >> > } > > >> > > > >> > idx = 0; > > >> > - if (ret <= 0) > > >> > - left = 0; > > >> > > >> Right now I don't have any means for testing this patch. However, I don't > > >> think this is completely correct. By removing the above condition you're > > >> discarding cases where an error has occurred (i.e. where ret is negative). > > > > > > I didn't discard it though :). > > > I folded it into the `if` statement. I find the if else construct > > > overly verbose and cumbersome. > > > > > > + left = (ret > 0) ? ret : 0; > > > > > > > Right, but with your patch, if 'ret < 0', we could still hit the first > > branch instead of that one: > > > > if (off + ret > i_size) > > left = (i_size > off) ? i_size - off : 0; > > else > > left = (ret > 0) ? ret : 0; > > > > Cheers, > > -- > > Luís > >