On Wed, 2019-07-10 at 19:03 +0100, Luis Henriques wrote: > Jeff Layton <jlayton@xxxxxxxxxx> writes: > > > The current code will do the uninlining but it relies on the caller to > > set the i_inline_version appropriately afterward. There are several > > potential races here. > > > > Protect against competing uninlining attempts by having the callers > > take the i_truncate_mutex and then have them update the version > > themselves before dropping it. > > > > Other callers can then re-check the i_inline_version after acquiring the > > mutex and if it has changed to CEPH_INLINE_NONE, they can just drop it > > and do nothing. > > > > Finally since we are doing a lockless check first in all cases, just > > move that into ceph_uninline_data as well, and have the callers call > > it unconditionally. > > > > Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx> > > --- > > fs/ceph/addr.c | 33 ++++++++++++++++++++++++--------- > > fs/ceph/file.c | 18 ++++++------------ > > 2 files changed, 30 insertions(+), 21 deletions(-) > > > > diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c > > index 5f1e2b6577fb..e9700c997d12 100644 > > --- a/fs/ceph/addr.c > > +++ b/fs/ceph/addr.c > > @@ -1541,11 +1541,9 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) > > > > ceph_block_sigs(&oldset); > > > > - if (ci->i_inline_version != CEPH_INLINE_NONE) { > > - err = ceph_uninline_data(inode, off == 0 ? page : NULL); > > - if (err < 0) > > - goto out_free; > > - } > > + err = ceph_uninline_data(inode, off == 0 ? page : NULL); > > + if (err < 0) > > + goto out_free; > > > > if (off + PAGE_SIZE <= size) > > len = PAGE_SIZE; > > @@ -1593,7 +1591,6 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) > > ci->i_inline_version != CEPH_INLINE_NONE) { > > int dirty; > > spin_lock(&ci->i_ceph_lock); > > - ci->i_inline_version = CEPH_INLINE_NONE; > > dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, > > &prealloc_cf); > > spin_unlock(&ci->i_ceph_lock); > > @@ -1656,6 +1653,10 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, > > } > > } > > > > +/* > > + * We borrow the i_truncate_mutex to serialize callers that may be racing to > > + * uninline the data. > > + */ > > int ceph_uninline_data(struct inode *inode, struct page *page) > > { > > struct ceph_inode_info *ci = ceph_inode(inode); > > @@ -1665,15 +1666,23 @@ int ceph_uninline_data(struct inode *inode, struct page *page) > > int err = 0; > > bool from_pagecache = false; > > > > - spin_lock(&ci->i_ceph_lock); > > - inline_version = ci->i_inline_version; > > - spin_unlock(&ci->i_ceph_lock); > > + /* Do a lockless check first -- paired with i_ceph_lock for changes */ > > + inline_version = READ_ONCE(ci->i_inline_version); > > > > dout("uninline_data %p %llx.%llx inline_version %llu\n", > > inode, ceph_vinop(inode), inline_version); > > > > if (inline_version == 1 || /* initial version, no data */ > > inline_version == CEPH_INLINE_NONE) > > + return 0; > > We may need to do the unlock_page(page) before returning. > We don't need to do that, but if the value is 1, then the existing code will reset it to CEPH_INLINE_NONE even though it doesn't inline anything. I'll fix that up for v2. Thanks for the review so far! > > + + mutex_lock(&ci->i_truncate_mutex); + + /* Double check the version > > after taking mutex */ + spin_lock(&ci->i_ceph_lock); + inline_version > > = ci->i_inline_version; + spin_unlock(&ci->i_ceph_lock); + if > > (inline_version == CEPH_INLINE_NONE) goto out; > > > > if (page) { > > @@ -1770,11 +1779,17 @@ int ceph_uninline_data(struct inode *inode, struct page *page) > > err = ceph_osdc_start_request(&fsc->client->osdc, req, false); > > if (!err) > > err = ceph_osdc_wait_request(&fsc->client->osdc, req); > > + if (!err) { > > + spin_lock(&ci->i_ceph_lock); > > + inline_version = CEPH_INLINE_NONE; > > Shouldn't this be ci->i_inline_version = CEPH_INLINE_NONE ? Or maybe > both since the dout() below uses inline_version. > > Cheers, -- Jeff Layton <jlayton@xxxxxxxxxx>