With the traditional page-based writes, blocks are allocated separately for each page written to. With iomap writes, we can allocate a lot more blocks at once, with a fraction of the allocation overhead for each page. Signed-off-by: Andreas Gruenbacher <agruenba@xxxxxxxxxx> --- fs/gfs2/aops.c | 20 +++-- fs/gfs2/aops.h | 22 +++++ fs/gfs2/bmap.c | 263 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- fs/gfs2/file.c | 44 ++++++++-- 4 files changed, 325 insertions(+), 24 deletions(-) create mode 100644 fs/gfs2/aops.h diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index a1c6b5de9200..3d9633175aa8 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -22,6 +22,7 @@ #include <linux/backing-dev.h> #include <linux/uio.h> #include <trace/events/writeback.h> +#include <linux/sched/signal.h> #include "gfs2.h" #include "incore.h" @@ -36,10 +37,11 @@ #include "super.h" #include "util.h" #include "glops.h" +#include "aops.h" -static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, - unsigned int from, unsigned int len) +void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, + unsigned int from, unsigned int len) { struct buffer_head *head = page_buffers(page); unsigned int bsize = head->b_size; @@ -462,7 +464,7 @@ static int gfs2_jdata_writepages(struct address_space *mapping, * Returns: errno */ -static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) +int stuffed_readpage(struct gfs2_inode *ip, struct page *page) { struct buffer_head *dibh; u64 dsize = i_size_read(&ip->i_inode); @@ -773,7 +775,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, * adjust_fs_space - Adjusts the free space available due to gfs2_grow * @inode: the rindex inode */ -static void adjust_fs_space(struct inode *inode) +void adjust_fs_space(struct inode *inode) { struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); @@ -819,11 +821,11 @@ static void adjust_fs_space(struct inode *inode) * This copies the data from the page into the inode block after * the inode data structure itself. * - * Returns: errno + * Returns: copied bytes or errno */ -static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, - loff_t pos, unsigned copied, - struct page *page) +int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, + loff_t pos, unsigned copied, + struct page *page) { struct gfs2_inode *ip = GFS2_I(inode); u64 to = pos + copied; @@ -862,7 +864,7 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, * The main write_end function for GFS2. We just put our locking around the VFS * provided functions. * - * Returns: errno + * Returns: copied bytes or errno */ static int gfs2_write_end(struct file *file, struct address_space *mapping, diff --git a/fs/gfs2/aops.h b/fs/gfs2/aops.h new file mode 100644 index 000000000000..976bb32dd405 --- /dev/null +++ b/fs/gfs2/aops.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2017 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License version 2. + */ + +#ifndef __AOPS_DOT_H__ +#define __AOPS_DOT_H__ + +#include "incore.h" + +extern int stuffed_readpage(struct gfs2_inode *ip, struct page *page); +extern int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, + loff_t pos, unsigned copied, + struct page *page); +extern void adjust_fs_space(struct inode *inode); +extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, + unsigned int from, unsigned int len); + +#endif /* __AOPS_DOT_H__ */ diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 1ed7bc022d7b..b6ee5252c014 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -28,6 +28,7 @@ #include "trans.h" #include "dir.h" #include "util.h" +#include "aops.h" #include "trace_gfs2.h" /* This doesn't need to be that large as max 64 bit pointers in a 4k @@ -41,6 +42,8 @@ struct metapath { int mp_aheight; /* actual height (lookup height) */ }; +static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length); + /** * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page * @ip: the inode @@ -375,7 +378,7 @@ static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h) return mp->mp_aheight - x - 1; } -static inline void release_metapath(struct metapath *mp) +static void release_metapath(struct metapath *mp) { int i; @@ -383,6 +386,7 @@ static inline void release_metapath(struct metapath *mp) if (mp->mp_bh[i] == NULL) break; brelse(mp->mp_bh[i]); + mp->mp_bh[i] = NULL; } } @@ -675,14 +679,15 @@ static u64 hole_size(struct inode *inode, sector_t lblock, struct metapath *mp) return holesz << inode->i_blkbits; } -static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap) +static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap, + u64 length) { struct gfs2_inode *ip = GFS2_I(inode); iomap->addr = (ip->i_no_addr << inode->i_blkbits) + sizeof(struct gfs2_dinode); iomap->offset = 0; - iomap->length = i_size_read(inode); + iomap->length = length; iomap->type = IOMAP_MAPPED; iomap->flags = IOMAP_F_DATA_INLINE; } @@ -722,10 +727,15 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, if (flags & IOMAP_REPORT) { if (pos >= i_size_read(inode)) return -ENOENT; - gfs2_stuffed_iomap(inode, iomap); + gfs2_stuffed_iomap(inode, iomap, + i_size_read(inode)); + return 0; + } + if (pos + length <= gfs2_max_stuffed_size(ip)) { + gfs2_stuffed_iomap(inode, iomap, + gfs2_max_stuffed_size(ip)); return 0; } - BUG_ON(!(flags & IOMAP_WRITE)); } lblock = pos >> inode->i_blkbits; lblock_stop = (pos + length - 1) >> inode->i_blkbits; @@ -796,9 +806,140 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, else iomap->length = size - pos; } + /* FIXME: Should we limit iomap->length to the maximum allocation size + * here according to how gfs2_iomap_alloc allocates blocks? */ goto out; } +static int gfs2_write_lock(struct inode *inode) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + int error; + + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); + error = gfs2_glock_nq(&ip->i_gh); + if (error) + goto out_uninit; + if (&ip->i_inode == sdp->sd_rindex) { + struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); + + error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, + GL_NOCACHE, &m_ip->i_gh); + if (error) + goto out_unlock; + } + return 0; + +out_unlock: + gfs2_glock_dq(&ip->i_gh); +out_uninit: + gfs2_holder_uninit(&ip->i_gh); + return error; +} + +static void gfs2_write_unlock(struct inode *inode) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + + if (&ip->i_inode == sdp->sd_rindex) { + struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); + + gfs2_glock_dq_uninit(&m_ip->i_gh); + } + gfs2_glock_dq_uninit(&ip->i_gh); +} + +static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, loff_t length, + unsigned flags, struct iomap *iomap) +{ + struct metapath mp = { .mp_aheight = 1, }; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + unsigned int data_blocks = 0, ind_blocks = 0, rblocks; + bool unstuff, alloc_required; + int ret; + + ret = gfs2_write_lock(inode); + if (ret) + return ret; + + unstuff = gfs2_is_stuffed(ip) && + pos + length > gfs2_max_stuffed_size(ip); + + ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); + if (ret) + goto out_release; + + alloc_required = unstuff || iomap->type != IOMAP_MAPPED; + + if (alloc_required || gfs2_is_jdata(ip)) + gfs2_write_calc_reserv(ip, iomap->length, &data_blocks, &ind_blocks); + + if (alloc_required) { + struct gfs2_alloc_parms ap = { .target = data_blocks + ind_blocks }; + ret = gfs2_quota_lock_check(ip, &ap); + if (ret) + goto out_release; + + ret = gfs2_inplace_reserve(ip, &ap); + if (ret) + goto out_qunlock; + } + + rblocks = RES_DINODE + ind_blocks; + if (gfs2_is_jdata(ip)) + rblocks += data_blocks; + if (ind_blocks || data_blocks) + rblocks += RES_STATFS + RES_QUOTA; + if (inode == sdp->sd_rindex) + rblocks += 2 * RES_STATFS; + if (alloc_required) + rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); + + ret = gfs2_trans_begin(sdp, rblocks, iomap->length >> inode->i_blkbits); + if (ret) + goto out_trans_fail; + + if (unstuff) { + ret = gfs2_unstuff_dinode(ip, NULL); + if (ret) + goto out_trans_end; + release_metapath(&mp); + ret = gfs2_iomap_get(inode, iomap->offset, iomap->length, + flags, iomap, &mp); + if (ret) + goto out_trans_end; + } + + if (iomap->type != IOMAP_MAPPED) { + ret = gfs2_iomap_alloc(inode, iomap, flags, &mp); + if (ret) { + gfs2_trans_end(sdp); + if (alloc_required) + gfs2_inplace_release(ip); + punch_hole(ip, iomap->offset, iomap->length); + goto out_qunlock; + } + } + release_metapath(&mp); + return 0; + +out_trans_end: + gfs2_trans_end(sdp); +out_trans_fail: + if (alloc_required) + gfs2_inplace_release(ip); +out_qunlock: + if (alloc_required) + gfs2_quota_unlock(ip); +out_release: + release_metapath(&mp); + gfs2_write_unlock(inode); + return ret; +} + static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, unsigned flags, struct iomap *iomap) { @@ -808,10 +949,7 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, trace_gfs2_iomap_start(ip, pos, length, flags); if (flags & IOMAP_WRITE) { - ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); - if (!ret && iomap->type == IOMAP_HOLE) - ret = gfs2_iomap_alloc(inode, iomap, flags, &mp); - release_metapath(&mp); + ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap); } else { ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); release_metapath(&mp); @@ -820,8 +958,115 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, return ret; } +static int +gfs2_iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, + unsigned flags, struct page **pagep, struct iomap *iomap) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct page *page; + int ret; + + if (gfs2_is_stuffed(ip)) { + BUG_ON(pos + len > gfs2_max_stuffed_size(ip)); + + page = grab_cache_page_write_begin(inode->i_mapping, 0, flags); + if (!page) + return -ENOMEM; + + if (!PageUptodate(page)) { + ret = stuffed_readpage(ip, page); + if (ret) { + unlock_page(page); + put_page(page); + return ret; + } + } + *pagep = page; + return 0; + } + + return iomap_write_begin(inode, pos, len, flags, pagep, iomap); +} + +static int gfs2_iomap_write_end(struct inode *inode, loff_t pos, unsigned len, + unsigned copied, struct page *page) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct buffer_head *dibh; + int ret; + + if (gfs2_is_stuffed(ip)) { + ret = gfs2_meta_inode_buffer(ip, &dibh); + if (ret) { + unlock_page(page); + put_page(page); + return ret; + } + ret = gfs2_stuffed_write_end(inode, dibh, pos, copied, page); + brelse(dibh); + return ret; + } + + if (gfs2_is_jdata(ip)) + gfs2_page_add_databufs(ip, page, offset_in_page(pos), len); + + return iomap_write_end(inode, pos, len, copied, page); +} + +static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, + ssize_t written, unsigned flags, struct iomap *iomap) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_trans *tr = current->journal_info; + + if (!(flags & IOMAP_WRITE)) + return 0; + + gfs2_ordered_add_inode(ip); + + if (tr->tr_num_buf_new) + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + else { + struct buffer_head *dibh; + int ret; + + ret = gfs2_meta_inode_buffer(ip, &dibh); + if (unlikely(ret)) + return ret; + gfs2_trans_add_meta(ip->i_gl, dibh); + brelse(dibh); + } + + if (inode == sdp->sd_rindex) { + adjust_fs_space(inode); + sdp->sd_rindex_uptodate = 0; + } + + gfs2_trans_end(sdp); + gfs2_inplace_release(ip); + + if (length != written && (iomap->flags & IOMAP_F_NEW)) { + /* Deallocate blocks that were just allocated. */ + loff_t end = round_down(pos + length, PAGE_SIZE); + pos = round_up(pos, PAGE_SIZE); + if (pos < end) { + truncate_pagecache_range(inode, pos, end - 1); + punch_hole(ip, pos, end - pos); + } + } + + if (ip->i_qadata && ip->i_qadata->qa_qd_num) + gfs2_quota_unlock(ip); + gfs2_write_unlock(inode); + return 0; +} + const struct iomap_ops gfs2_iomap_ops = { .iomap_begin = gfs2_iomap_begin, + .write_begin = gfs2_iomap_write_begin, + .write_end = gfs2_iomap_write_end, + .iomap_end = gfs2_iomap_end, }; /** diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 2edd3a9a7b79..d1b54e781577 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -26,10 +26,12 @@ #include <linux/dlm.h> #include <linux/dlm_plock.h> #include <linux/delay.h> +#include <linux/backing-dev.h> #include "gfs2.h" #include "incore.h" #include "bmap.h" +#include "aops.h" #include "dir.h" #include "glock.h" #include "glops.h" @@ -691,9 +693,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, /** * gfs2_file_write_iter - Perform a write to a file * @iocb: The io context - * @iov: The data to write - * @nr_segs: Number of @iov segments - * @pos: The file position + * @from: The data to write * * We have to do a lock/unlock here to refresh the inode size for * O_APPEND writes, otherwise we can land up writing at the wrong @@ -705,8 +705,9 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; - struct gfs2_inode *ip = GFS2_I(file_inode(file)); - int ret; + struct inode *inode = file->f_mapping->host; + struct gfs2_inode *ip = GFS2_I(inode); + ssize_t ret; ret = gfs2_rsqa_alloc(ip); if (ret) @@ -723,7 +724,38 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) gfs2_glock_dq_uninit(&gh); } - return generic_file_write_iter(iocb, from); + if (iocb->ki_flags & IOCB_DIRECT) + return generic_file_write_iter(iocb, from); + + inode_lock(inode); + ret = generic_write_checks(iocb, from); + if (ret <= 0) + goto out; + + ret = file_remove_privs(file); + if (ret) + goto out; + + ret = file_update_time(file); + if (ret) + goto out; + + /* We can write back this queue in page reclaim */ + current->backing_dev_info = inode_to_bdi(inode); + + ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); + + current->backing_dev_info = NULL; + +out: + inode_unlock(inode); + if (likely(ret > 0)) { + iocb->ki_pos += ret; + + /* Handle various SYNC-type writes */ + ret = generic_write_sync(iocb, ret); + } + return ret; } static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, -- 2.14.3