From: Niu Yawei <yawei.niu@xxxxxxxxx> In some cases such as kernel writeback, we shouldn't ignore the layout, otherwise, it could race with layout change undergoing. Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-3160 Lustre-change: http://review.whamcloud.com/6154 Signed-off-by: Niu Yawei <yawei.niu@xxxxxxxxx> Signed-off-by: Jinshan Xiong <jinshan.xiong@xxxxxxxxx> Reviewed-by: Andreas Dilger <andreas.dilger@xxxxxxxxx> Reviewed-by: Fan Yong <fan.yong@xxxxxxxxx> Reviewed-by: Oleg Drokin <oleg.drokin@xxxxxxxxx> Signed-off-by: Peng Tao <tao.peng@xxxxxxx> Signed-off-by: Andreas Dilger <andreas.dilger@xxxxxxxxx> --- drivers/staging/lustre/lustre/llite/file.c | 6 +++--- .../staging/lustre/lustre/llite/llite_internal.h | 3 ++- drivers/staging/lustre/lustre/llite/llite_lib.c | 8 +++++--- drivers/staging/lustre/lustre/llite/rw.c | 11 +++++++++-- drivers/staging/lustre/lustre/llite/vvp_io.c | 13 ++++++++++++- 5 files changed, 31 insertions(+), 10 deletions(-) diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c index d423de1..8c2cf23 100644 --- a/drivers/staging/lustre/lustre/llite/file.c +++ b/drivers/staging/lustre/lustre/llite/file.c @@ -2175,7 +2175,7 @@ int ll_flush(struct file *file, fl_owner_t id) * Return how many pages have been written. */ int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end, - enum cl_fsync_mode mode) + enum cl_fsync_mode mode, int ignore_layout) { struct cl_env_nest nest; struct lu_env *env; @@ -2197,7 +2197,7 @@ int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end, io = ccc_env_thread_io(env); io->ci_obj = cl_i2info(inode)->lli_clob; - io->ci_ignore_layout = 1; + io->ci_ignore_layout = ignore_layout; /* initialize parameters for sync */ fio = &io->u.ci_fsync; @@ -2270,7 +2270,7 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync) struct ll_file_data *fd = LUSTRE_FPRIVATE(file); err = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, - CL_FSYNC_ALL); + CL_FSYNC_ALL, 0); if (rc == 0 && err < 0) rc = err; if (rc < 0) diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h index 177b4db..d8e43bb 100644 --- a/drivers/staging/lustre/lustre/llite/llite_internal.h +++ b/drivers/staging/lustre/lustre/llite/llite_internal.h @@ -465,6 +465,7 @@ struct ll_sb_info { struct lu_fid ll_root_fid; /* root object fid */ int ll_flags; + int ll_umounting:1; struct list_head ll_conn_chain; /* per-conn chain of SBs */ struct lustre_client_ocd ll_lco; @@ -1419,7 +1420,7 @@ static inline int cl_merge_lvb(const struct lu_env *env, struct inode *inode) struct obd_capa *cl_capa_lookup(struct inode *inode, enum cl_req_type crt); int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end, - enum cl_fsync_mode mode); + enum cl_fsync_mode mode, int ignore_layout); /** direct write pages */ struct ll_dio_pages { diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c index 278b97d..2a4a87d 100644 --- a/drivers/staging/lustre/lustre/llite/llite_lib.c +++ b/drivers/staging/lustre/lustre/llite/llite_lib.c @@ -724,8 +724,10 @@ void ll_kill_super(struct super_block *sb) /* we need restore s_dev from changed for clustred NFS before put_super * because new kernels have cached s_dev and change sb->s_dev in * put_super not affected real removing devices */ - if (sbi) + if (sbi) { sb->s_dev = sbi->ll_sdev_orig; + sbi->ll_umounting = 1; + } EXIT; } @@ -1856,7 +1858,8 @@ void ll_delete_inode(struct inode *inode) if (S_ISREG(inode->i_mode) && lli->lli_clob != NULL) /* discard all dirty pages before truncating them, required by * osc_extent implementation at LU-1030. */ - cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, CL_FSYNC_DISCARD); + cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, + CL_FSYNC_DISCARD, 1); truncate_inode_pages(&inode->i_data, 0); @@ -2026,7 +2029,6 @@ void ll_umount_begin(struct super_block *sb) OBD_FREE_PTR(ioc_data); } - /* Really, we'd like to wait until there are no requests outstanding, * and then continue. For now, we just invalidate the requests, * schedule() and sleep one second if needed, and hope. diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c index 0a0ac26..fac1178 100644 --- a/drivers/staging/lustre/lustre/llite/rw.c +++ b/drivers/staging/lustre/lustre/llite/rw.c @@ -1213,7 +1213,7 @@ int ll_writepage(struct page *vmpage, struct writeback_control *wbc) * PageWriteback or clean the page. */ result = cl_sync_file_range(inode, offset, offset + PAGE_CACHE_SIZE - 1, - CL_FSYNC_LOCAL); + CL_FSYNC_LOCAL, 1); if (result > 0) { /* actually we may have written more than one page. * decreasing this page because the caller will count @@ -1240,11 +1240,13 @@ out: int ll_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct inode *inode = mapping->host; + struct ll_sb_info *sbi = ll_i2sbi(inode); loff_t start; loff_t end; enum cl_fsync_mode mode; int range_whole = 0; int result; + int ignore_layout = 0; ENTRY; if (wbc->range_cyclic) { @@ -1263,7 +1265,12 @@ int ll_writepages(struct address_space *mapping, struct writeback_control *wbc) if (wbc->sync_mode == WB_SYNC_ALL) mode = CL_FSYNC_LOCAL; - result = cl_sync_file_range(inode, start, end, mode); + if (sbi->ll_umounting) + /* if the mountpoint is being umounted, all pages have to be + * evicted to avoid hitting LBUG when truncate_inode_pages() + * is called later on. */ + ignore_layout = 1; + result = cl_sync_file_range(inode, start, end, mode, ignore_layout); if (result > 0) { wbc->nr_to_write -= result; result = 0; diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c index 8504d44..eb964ac 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_io.c +++ b/drivers/staging/lustre/lustre/llite/vvp_io.c @@ -1160,8 +1160,19 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj, /* Enqueue layout lock and get layout version. We need to do this * even for operations requiring to open file, such as read and write, * because it might not grant layout lock in IT_OPEN. */ - if (result == 0 && !io->ci_ignore_layout) + if (result == 0 && !io->ci_ignore_layout) { result = ll_layout_refresh(inode, &cio->cui_layout_gen); + if (result == -ENOENT) + /* If the inode on MDS has been removed, but the objects + * on OSTs haven't been destroyed (async unlink), layout + * fetch will return -ENOENT, we'd ingore this error + * and continue with dirty flush. LU-3230. */ + result = 0; + if (result < 0) + CERROR("%s: refresh file layout " DFID " error %d.\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(lu_object_fid(&obj->co_lu)), result); + } RETURN(result); } -- 1.7.9.5 _______________________________________________ devel mailing list devel@xxxxxxxxxxxxxxxxxxxxxx http://driverdev.linuxdriverproject.org/mailman/listinfo/devel