From: Goldwyn Rodrigues <rgoldwyn@xxxxxxxx> IOMAP_F_COW allows to inform the dax code, to first perform a copy which are not page-aligned before performing the write. A new struct btrfs_iomap is passed from iomap_begin() to iomap_end(), which contains all the accounting and locking information for CoW based writes. For writing to a hole, iomap->cow_addr is set to zero. Would this be better handled by a flag or can a valid filesystem block be at offset zero of the device? Signed-off-by: Goldwyn Rodrigues <rgoldwyn@xxxxxxxx> --- fs/btrfs/ctree.h | 6 +++ fs/btrfs/dax.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/file.c | 4 +- 3 files changed, 124 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a3543a4a063d..3bcd2a4959c1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3801,6 +3801,12 @@ int btree_readahead_hook(struct extent_buffer *eb, int err); #ifdef CONFIG_FS_DAX /* dax.c */ ssize_t btrfs_file_dax_read(struct kiocb *iocb, struct iov_iter *to); +ssize_t btrfs_file_dax_write(struct kiocb *iocb, struct iov_iter *from); +#else +static inline ssize_t btrfs_file_dax_write(struct kiocb *iocb, struct iov_iter *from) +{ + return 0; +} #endif /* CONFIG_FS_DAX */ static inline int is_fstree(u64 rootid) diff --git a/fs/btrfs/dax.c b/fs/btrfs/dax.c index bf3d46b0acb6..49619fe3f94f 100644 --- a/fs/btrfs/dax.c +++ b/fs/btrfs/dax.c @@ -9,30 +9,124 @@ #ifdef CONFIG_FS_DAX #include <linux/dax.h> #include <linux/iomap.h> +#include <linux/uio.h> #include "ctree.h" #include "btrfs_inode.h" +struct btrfs_iomap { + u64 start; + u64 end; + int nocow; + struct extent_changeset *data_reserved; + struct extent_state *cached_state; +}; + static int btrfs_iomap_begin(struct inode *inode, loff_t pos, loff_t length, unsigned flags, struct iomap *iomap) { struct extent_map *em; struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, pos, length, 0); + + if (flags & IOMAP_WRITE) { + int ret = 0, nocow; + struct extent_map *map = em; + struct btrfs_iomap *bi; + + bi = kzalloc(sizeof(struct btrfs_iomap), GFP_NOFS); + if (!bi) + return -ENOMEM; + + bi->start = round_down(pos, PAGE_SIZE); + bi->end = round_up(pos + length, PAGE_SIZE); + + iomap->private = bi; + + /* Wait for existing ordered extents in range to finish */ + btrfs_wait_ordered_range(inode, bi->start, bi->end - bi->start); + + lock_extent_bits(&BTRFS_I(inode)->io_tree, bi->start, bi->end, &bi->cached_state); + + ret = btrfs_delalloc_reserve_space(inode, &bi->data_reserved, + bi->start, bi->end - bi->start); + if (ret) { + unlock_extent_cached(&BTRFS_I(inode)->io_tree, bi->start, bi->end, + &bi->cached_state); + kfree(bi); + return ret; + } + + refcount_inc(&map->refs); + ret = btrfs_get_extent_map_write(&em, NULL, + inode, bi->start, bi->end - bi->start, &nocow); + if (ret) { + unlock_extent_cached(&BTRFS_I(inode)->io_tree, bi->start, bi->end, + &bi->cached_state); + btrfs_delalloc_release_space(inode, + bi->data_reserved, bi->start, + bi->end - bi->start, true); + extent_changeset_free(bi->data_reserved); + kfree(bi); + return ret; + } + if (!nocow) { + iomap->flags |= IOMAP_F_COW; + if (map->block_start != EXTENT_MAP_HOLE) { + iomap->cow_addr = map->block_start; + iomap->cow_pos = map->start; + } + } else { + bi->nocow = 1; + } + free_extent_map(map); + } + + iomap->offset = em->start; + iomap->length = em->len; + iomap->bdev = em->bdev; + iomap->dax_dev = fs_info->dax_dev; + if (em->block_start == EXTENT_MAP_HOLE) { iomap->type = IOMAP_HOLE; return 0; } + iomap->type = IOMAP_MAPPED; - iomap->bdev = em->bdev; - iomap->dax_dev = fs_info->dax_dev; - iomap->offset = em->start; - iomap->length = em->len; iomap->addr = em->block_start; return 0; } +static int btrfs_iomap_end(struct inode *inode, loff_t pos, + loff_t length, ssize_t written, unsigned flags, + struct iomap *iomap) +{ + struct btrfs_iomap *bi = iomap->private; + u64 wend; + + if (!bi) + return 0; + + unlock_extent_cached(&BTRFS_I(inode)->io_tree, bi->start, bi->end, + &bi->cached_state); + + wend = round_up(pos + written, PAGE_SIZE); + if (wend < bi->end) { + btrfs_delalloc_release_space(inode, + bi->data_reserved, wend, + bi->end - wend, true); + } + + btrfs_update_ordered_extent(inode, bi->start, wend - bi->start, true); + btrfs_delalloc_release_extents(BTRFS_I(inode), wend - bi->start, false); + extent_changeset_free(bi->data_reserved); + kfree(bi); + return 0; +} + static const struct iomap_ops btrfs_iomap_ops = { .iomap_begin = btrfs_iomap_begin, + .iomap_end = btrfs_iomap_end, }; ssize_t btrfs_file_dax_read(struct kiocb *iocb, struct iov_iter *to) @@ -46,4 +140,21 @@ ssize_t btrfs_file_dax_read(struct kiocb *iocb, struct iov_iter *to) return ret; } + +ssize_t btrfs_file_dax_write(struct kiocb *iocb, struct iov_iter *iter) +{ + ssize_t ret = 0; + u64 pos = iocb->ki_pos; + struct inode *inode = file_inode(iocb->ki_filp); + + ret = dax_iomap_rw(iocb, iter, &btrfs_iomap_ops); + + if (ret > 0) { + pos += ret; + if (pos > i_size_read(inode)) + i_size_write(inode, pos); + iocb->ki_pos = pos; + } + return ret; +} #endif /* CONFIG_FS_DAX */ diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b620f4e718b2..3b320d0ab495 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1964,7 +1964,9 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, if (sync) atomic_inc(&BTRFS_I(inode)->sync_writers); - if (iocb->ki_flags & IOCB_DIRECT) { + if (IS_DAX(inode)) { + num_written = btrfs_file_dax_write(iocb, from); + } else if (iocb->ki_flags & IOCB_DIRECT) { num_written = __btrfs_direct_write(iocb, from); } else { num_written = btrfs_buffered_write(iocb, from); -- 2.16.4