On Wed, 2020-09-16 at 13:38 -0400, Jeff Layton wrote: > ...and reorganize the loop for better clarity. > > Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx> > --- > fs/ceph/addr.c | 148 ++++++++++++++++++++++--------------------------- > 1 file changed, 65 insertions(+), 83 deletions(-) > > diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c > index a6c54cfc3fea..89c4d8a9a5eb 100644 > --- a/fs/ceph/addr.c > +++ b/fs/ceph/addr.c > @@ -1251,6 +1251,8 @@ static int context_is_writeable_or_written(struct inode *inode, > * @inode: inode associated with page > * @page: page being dirtied > * > + * We are only allowed to write into/dirty the page if the page is > + * clean, or already dirty within the same snap context. > * Returns NULL on success, negative error code on error, and a snapc ref that should be > * waited on otherwise. > */ > @@ -1307,104 +1309,84 @@ ceph_find_incompatible(struct inode *inode, struct page *page) > /* > * We are only allowed to write into/dirty the page if the page is > * clean, or already dirty within the same snap context. > - * > - * called with page locked. > - * return success with page locked, > - * or any failure (incl -EAGAIN) with page unlocked. > */ > -static int ceph_update_writeable_page(struct file *file, > - loff_t pos, unsigned len, > - struct page *page) > +static int ceph_write_begin(struct file *file, struct address_space *mapping, > + loff_t pos, unsigned len, unsigned flags, > + struct page **pagep, void **fsdata) > { > struct inode *inode = file_inode(file); > struct ceph_inode_info *ci = ceph_inode(inode); > struct ceph_snap_context *snapc; > - loff_t page_off = pos & PAGE_MASK; > + struct page *page = NULL; > + pgoff_t index = pos >> PAGE_SHIFT; > int pos_in_page = pos & ~PAGE_MASK; > - int end_in_page = pos_in_page + len; > - loff_t i_size; > - int r; > + int r = 0; > > -retry_locked: > - snapc = ceph_find_incompatible(inode, page); > - if (snapc) { > - if (IS_ERR(snapc)) { > - r = PTR_ERR(snapc); > - goto fail_unlock; > + dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len); > + > + for (;;) { > + page = grab_cache_page_write_begin(mapping, index, 0); > + if (!page) { > + r = -ENOMEM; > + break; > } > - unlock_page(page); > - ceph_queue_writeback(inode); > - r = wait_event_killable(ci->i_cap_wq, > - context_is_writeable_or_written(inode, snapc)); > - ceph_put_snap_context(snapc); > - return -EAGAIN; > - } > > - if (PageUptodate(page)) { > - dout(" page %p already uptodate\n", page); > - return 0; > - } > + snapc = ceph_find_incompatible(inode, page); > + if (snapc) { > + if (IS_ERR(snapc)) { > + r = PTR_ERR(snapc); > + break; > + } > + unlock_page(page); > + ceph_queue_writeback(inode); > + r = wait_event_killable(ci->i_cap_wq, > + context_is_writeable_or_written(inode, snapc)); > + ceph_put_snap_context(snapc); > + put_page(page); > + page = NULL; Now that I look a bit more, there is probably no need to hold the page reference across this wait. I'll change that to put it after unlocking and re-test. > + if (r != 0) > + break; > + continue; > + } > > - /* full page? */ > - if (pos_in_page == 0 && len == PAGE_SIZE) > - return 0; > + if (PageUptodate(page)) { > + dout(" page %p already uptodate\n", page); > + break; > + } > > - /* past end of file? */ > - i_size = i_size_read(inode); > - > - if (page_off >= i_size || > - (pos_in_page == 0 && (pos+len) >= i_size && > - end_in_page - pos_in_page != PAGE_SIZE)) { > - dout(" zeroing %p 0 - %d and %d - %d\n", > - page, pos_in_page, end_in_page, (int)PAGE_SIZE); > - zero_user_segments(page, > - 0, pos_in_page, > - end_in_page, PAGE_SIZE); > - return 0; > - } > + /* > + * In some cases we don't need to read at all: > + * - full page write > + * - write that lies completely beyond EOF > + * - write that covers the the page from start to EOF or beyond it > + */ > + if ((pos_in_page == 0 && len == PAGE_SIZE) || > + (pos >= i_size_read(inode)) || > + (pos_in_page == 0 && (pos + len) >= i_size_read(inode))) { > + zero_user_segments(page, 0, pos_in_page, > + pos_in_page + len, PAGE_SIZE); > + break; > + } > > - /* we need to read it. */ > - r = ceph_do_readpage(file, page); > - if (r < 0) { > - if (r == -EINPROGRESS) > - return -EAGAIN; > - goto fail_unlock; > + /* > + * We need to read it. If we get back -EINPROGRESS, then the page was > + * handed off to fscache and it will be unlocked when the read completes. > + * Refind the page in that case so we can reacquire the page lock. Otherwise > + * we got a hard error or the read was completed synchronously. > + */ > + r = ceph_do_readpage(file, page); > + if (r != -EINPROGRESS) > + break; > } > - goto retry_locked; > -fail_unlock: > - unlock_page(page); > - return r; > -} > > -/* > - * We are only allowed to write into/dirty the page if the page is > - * clean, or already dirty within the same snap context. > - */ > -static int ceph_write_begin(struct file *file, struct address_space *mapping, > - loff_t pos, unsigned len, unsigned flags, > - struct page **pagep, void **fsdata) > -{ > - struct inode *inode = file_inode(file); > - struct page *page; > - pgoff_t index = pos >> PAGE_SHIFT; > - int r; > - > - do { > - /* get a page */ > - page = grab_cache_page_write_begin(mapping, index, 0); > - if (!page) > - return -ENOMEM; > - > - dout("write_begin file %p inode %p page %p %d~%d\n", file, > - inode, page, (int)pos, (int)len); > - > - r = ceph_update_writeable_page(file, pos, len, page); > - if (r < 0) > + if (r < 0) { > + if (page) { > + unlock_page(page); > put_page(page); > - else > - *pagep = page; > - } while (r == -EAGAIN); > - > + } > + } else { > + *pagep = page; > + } > return r; > } > -- Jeff Layton <jlayton@xxxxxxxxxx>