In order to give atomic writes, we should consider power failure during sync_node_pages in fsync. So, this patch marks fsync flag only in the last dnode block. Signed-off-by: Jaegeuk Kim <jaegeuk@xxxxxxxxxx> --- fs/f2fs/f2fs.h | 4 +- fs/f2fs/file.c | 14 +++++-- fs/f2fs/node.c | 106 +++++++++++++++++++++++++++++++++++++++++++++++++---- fs/f2fs/recovery.c | 9 ++--- 4 files changed, 113 insertions(+), 20 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 269abe5..ca828b0 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -159,7 +159,6 @@ struct fsync_inode_entry { struct inode *inode; /* vfs inode pointer */ block_t blkaddr; /* block address locating the last fsync */ block_t last_dentry; /* block address locating the last dentry */ - block_t last_inode; /* block address locating the last inode */ }; #define nats_in_cursum(jnl) (le16_to_cpu(jnl->n_nats)) @@ -1784,7 +1783,8 @@ void ra_node_page(struct f2fs_sb_info *, nid_t); struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); struct page *get_node_page_ra(struct page *, int); void sync_inode_page(struct dnode_of_data *); -int fsync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *); +int fsync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *, + bool); int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *); bool alloc_nid(struct f2fs_sb_info *, nid_t *); void alloc_nid_done(struct f2fs_sb_info *, nid_t); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 60fd64c..dc47d5c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -182,7 +182,8 @@ static void try_to_fix_pino(struct inode *inode) } } -int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) +static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, + int datasync, bool atomic) { struct inode *inode = file->f_mapping->host; struct f2fs_inode_info *fi = F2FS_I(inode); @@ -256,7 +257,7 @@ go_write: goto out; } sync_nodes: - ret = fsync_node_pages(sbi, ino, &wbc); + ret = fsync_node_pages(sbi, ino, &wbc, atomic); if (ret) goto out; @@ -290,6 +291,11 @@ out: return ret; } +int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) +{ + return f2fs_do_sync_file(file, start, end, datasync, false); +} + static pgoff_t __get_first_dirty_index(struct address_space *mapping, pgoff_t pgofs, int whence) { @@ -1407,7 +1413,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) } } - ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0); + ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); err_out: mnt_drop_write_file(filp); return ret; @@ -1465,7 +1471,7 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) drop_inmem_pages(inode); if (f2fs_is_volatile_file(inode)) { clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); - ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0); + ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); } mnt_drop_write_file(filp); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 8a1e211..de070a5 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1222,13 +1222,81 @@ iput_out: iput(inode); } +static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) +{ + pgoff_t index, end; + struct pagevec pvec; + struct page *last_page = NULL; + + pagevec_init(&pvec, 0); + index = 0; + end = ULONG_MAX; + + while (index <= end) { + int i, nr_pages; + nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); + if (nr_pages == 0) + break; + + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + if (unlikely(f2fs_cp_error(sbi))) { + f2fs_put_page(last_page, 0); + pagevec_release(&pvec); + return ERR_PTR(-EIO); + } + + if (!IS_DNODE(page) || !is_cold_node(page)) + continue; + if (ino_of_node(page) != ino) + continue; + + lock_page(page); + + if (unlikely(page->mapping != NODE_MAPPING(sbi))) { +continue_unlock: + unlock_page(page); + continue; + } + if (ino_of_node(page) != ino) + goto continue_unlock; + + if (!PageDirty(page)) { + /* someone wrote it for us */ + goto continue_unlock; + } + + if (last_page) + f2fs_put_page(last_page, 0); + + get_page(page); + last_page = page; + unlock_page(page); + } + pagevec_release(&pvec); + cond_resched(); + } + return last_page; +} + int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, - struct writeback_control *wbc) + struct writeback_control *wbc, bool atomic) { pgoff_t index, end; struct pagevec pvec; int ret = 0; + struct page *last_page = NULL; + bool marked = false; + if (atomic) { + last_page = last_fsync_dnode(sbi, ino); + if (IS_ERR_OR_NULL(last_page)) + return PTR_ERR_OR_ZERO(last_page); + } +retry: pagevec_init(&pvec, 0); index = 0; end = ULONG_MAX; @@ -1245,6 +1313,7 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, struct page *page = pvec.pages[i]; if (unlikely(f2fs_cp_error(sbi))) { + f2fs_put_page(last_page, 0); pagevec_release(&pvec); return -EIO; } @@ -1264,33 +1333,54 @@ continue_unlock: if (ino_of_node(page) != ino) goto continue_unlock; - if (!PageDirty(page)) { + if (!PageDirty(page) && page != last_page) { /* someone wrote it for us */ goto continue_unlock; } f2fs_wait_on_page_writeback(page, NODE, true); BUG_ON(PageWriteback(page)); - if (!clear_page_dirty_for_io(page)) - goto continue_unlock; - set_fsync_mark(page, 1); - if (IS_INODE(page)) - set_dentry_mark(page, + if (!atomic || page == last_page) { + set_fsync_mark(page, 1); + if (IS_INODE(page)) + set_dentry_mark(page, need_dentry_mark(sbi, ino)); + /* may be written by other thread */ + if (!PageDirty(page)) + set_page_dirty(page); + } + + if (!clear_page_dirty_for_io(page)) + goto continue_unlock; ret = NODE_MAPPING(sbi)->a_ops->writepage(page, wbc); if (ret) { unlock_page(page); + f2fs_put_page(last_page, 0); + break; + } + if (page == last_page) { + f2fs_put_page(page, 0); + marked = true; break; } } pagevec_release(&pvec); cond_resched(); - if (ret) + if (ret || marked) break; } + if (!ret && atomic && !marked) { + f2fs_msg(sbi->sb, KERN_DEBUG, + "Retry to write fsync mark: ino=%u, idx=%lx", + ino, last_page->index); + lock_page(last_page); + set_page_dirty(last_page); + unlock_page(last_page); + goto retry; + } return ret ? -EIO: 0; } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 2c87c12..a646d3b 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -257,11 +257,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) } entry->blkaddr = blkaddr; - if (IS_INODE(page)) { - entry->last_inode = blkaddr; - if (is_dent_dnode(page)) - entry->last_dentry = blkaddr; - } + if (IS_INODE(page) && is_dent_dnode(page)) + entry->last_dentry = blkaddr; next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); @@ -521,7 +518,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head) * In this case, we can lose the latest inode(x). * So, call recover_inode for the inode update. */ - if (entry->last_inode == blkaddr) + if (IS_INODE(page)) recover_inode(entry->inode, page); if (entry->last_dentry == blkaddr) { err = recover_dentry(entry->inode, page); -- 2.6.3 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html