From: Zheng Liu <wenqing.lz@xxxxxxxxxx> A new function called ext4_es_convert_unwritten_extents() is defined to convert a range of unwritten extents to written in extent status tree. This function aims to improve the unwritten extent conversion in DIO end_io. Meanwhile all locks are changed to save irq flags due to DIO end_io is in irq context. Signed-off-by: Zheng Liu <wenqing.lz@xxxxxxxxxx> --- fs/ext4/extents_status.c | 161 ++++++++++++++++++++++++++++++++++++++++++++--- fs/ext4/extents_status.h | 2 + 2 files changed, 155 insertions(+), 8 deletions(-) diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index ccd940c..9db9e05 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -239,10 +239,11 @@ ext4_lblk_t ext4_es_find_extent(struct inode *inode, struct extent_status *es) struct extent_status *es1 = NULL; struct rb_node *node; ext4_lblk_t ret = EXT_MAX_BLOCKS; + unsigned long flags; trace_ext4_es_find_extent_enter(inode, es->es_lblk); - read_lock(&EXT4_I(inode)->i_es_lock); + read_lock_irqsave(&EXT4_I(inode)->i_es_lock, flags); tree = &EXT4_I(inode)->i_es_tree; /* find delay extent in cache firstly */ @@ -273,7 +274,7 @@ out: } } - read_unlock(&EXT4_I(inode)->i_es_lock); + read_unlock_irqrestore(&EXT4_I(inode)->i_es_lock, flags); trace_ext4_es_find_extent_exit(inode, es, ret); return ret; @@ -426,6 +427,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, struct ext4_es_tree *tree; struct extent_status newes; ext4_lblk_t end = lblk + len - 1; + unsigned long flags; int err = 0; es_debug("add [%u/%u) %llu %d to extent status tree of inode %lu\n", @@ -439,7 +441,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, newes.es_status = status; trace_ext4_es_insert_extent(inode, &newes); - write_lock(&EXT4_I(inode)->i_es_lock); + write_lock_irqsave(&EXT4_I(inode)->i_es_lock, flags); tree = &EXT4_I(inode)->i_es_tree; err = __es_remove_extent(tree, lblk, end); if (err != 0) @@ -447,7 +449,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, err = __es_insert_extent(tree, &newes); error: - write_unlock(&EXT4_I(inode)->i_es_lock); + write_unlock_irqrestore(&EXT4_I(inode)->i_es_lock, flags); ext4_es_print_tree(inode); @@ -466,12 +468,13 @@ int ext4_es_lookup_extent(struct inode *inode, struct extent_status *es) struct ext4_es_tree *tree; struct extent_status *es1; struct rb_node *node; + unsigned long flags; int found = 0; es_debug("lookup extent in block %u\n", es->es_lblk); tree = &EXT4_I(inode)->i_es_tree; - read_lock(&EXT4_I(inode)->i_es_lock); + read_lock_irqsave(&EXT4_I(inode)->i_es_lock, flags); /* find delay extent in cache firstly */ if (tree->cache_es) { @@ -506,7 +509,7 @@ out: es->es_status = es1->es_status; } - read_unlock(&EXT4_I(inode)->i_es_lock); + read_unlock_irqrestore(&EXT4_I(inode)->i_es_lock, flags); return found; } @@ -605,6 +608,7 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, { struct ext4_es_tree *tree; ext4_lblk_t end; + unsigned long flags; int err = 0; trace_ext4_es_remove_extent(inode, lblk, len); @@ -616,9 +620,150 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, tree = &EXT4_I(inode)->i_es_tree; - write_lock(&EXT4_I(inode)->i_es_lock); + write_lock_irqsave(&EXT4_I(inode)->i_es_lock, flags); err = __es_remove_extent(tree, lblk, end); - write_unlock(&EXT4_I(inode)->i_es_lock); + write_unlock_irqrestore(&EXT4_I(inode)->i_es_lock, flags); + ext4_es_print_tree(inode); + return err; +} + +int ext4_es_convert_unwritten_extents(struct inode *inode, loff_t offset, + size_t size) +{ + struct ext4_es_tree *tree; + struct rb_node *node; + struct extent_status *es, orig_es, conv_es; + ext4_lblk_t end, len1, len2; + ext4_lblk_t lblk = 0, len = 0; + unsigned long flags; + unsigned int blkbits; + int err = 0; + + /* add trace point and debug */ + blkbits = inode->i_blkbits; + lblk = offset >> blkbits; + len = (EXT4_BLOCK_ALIGN(offset + size, blkbits) >> blkbits) - lblk; + + end = lblk + len - 1; + BUG_ON(end < lblk); + + tree = &EXT4_I(inode)->i_es_tree; + + write_lock_irqsave(&EXT4_I(inode)->i_es_lock, flags); + + es = __es_tree_search(&tree->root, lblk); + if (!es) + goto out; + if (es->es_lblk > end) + goto out; + + tree->cache_es = NULL; + + orig_es.es_lblk = es->es_lblk; + orig_es.es_len = es->es_len; + orig_es.es_pblk = es->es_pblk; + orig_es.es_status = es->es_status; + + len1 = lblk > es->es_lblk ? lblk - es->es_lblk : 0; + len2 = extent_status_end(es) > end ? + extent_status_end(es) - end : 0; + if (len1 > 0) + es->es_len = len1; + if (len2 > 0) { + if (len1 > 0) { + struct extent_status newes; + + newes.es_lblk = end + 1; + newes.es_len = len2; + newes.es_pblk = orig_es.es_pblk + orig_es.es_len - len2; + newes.es_status = orig_es.es_status; + /*BUG_ON(newes.es_status != EXTENT_STATUS_UNWRITTEN);*/ + err = __es_insert_extent(tree, &newes); + if (err) { + es->es_lblk = orig_es.es_lblk; + es->es_len = orig_es.es_len; + goto out; + } + + conv_es.es_lblk = orig_es.es_lblk + len1; + conv_es.es_len = orig_es.es_len - len1 - len2; + conv_es.es_pblk = orig_es.es_pblk + len1; + conv_es.es_status = EXTENT_STATUS_WRITTEN; + err = __es_insert_extent(tree, &conv_es); + if (err) { + int err2; + err2 = __es_remove_extent(tree, newes.es_lblk, + extent_status_end(&newes)); + if (err2) + goto out; + es->es_lblk = orig_es.es_lblk; + es->es_len = orig_es.es_len; + goto out; + } + } else { + es->es_lblk = end + 1; + es->es_len = len2; + es->es_pblk = orig_es.es_pblk + orig_es.es_len - len2; + /*BUG_ON(newes.es_status != EXTENT_STATUS_UNWRITTEN);*/ + + conv_es.es_lblk = orig_es.es_lblk; + conv_es.es_len = orig_es.es_len - len2; + conv_es.es_pblk = orig_es.es_pblk; + conv_es.es_status = EXTENT_STATUS_WRITTEN; + err = __es_insert_extent(tree, &conv_es); + if (err) { + es->es_lblk = orig_es.es_lblk; + es->es_len = orig_es.es_len; + es->es_pblk = orig_es.es_pblk; + } + } + + goto out; + } + + if (len1 > 0) { + node = rb_next(&es->rb_node); + if (node) + es = rb_entry(node, struct extent_status, rb_node); + else + es = NULL; + } + + while (es && extent_status_end(es) <= end) { + node = rb_next(&es->rb_node); + es->es_status = EXTENT_STATUS_WRITTEN; + if (!node) { + es = NULL; + break; + } + es = rb_entry(node, struct extent_status, rb_node); + } + + if (es && es->es_lblk < end + 1) { + ext4_lblk_t orig_len = es->es_len; + + /* + * Here we first set conv_es just because of avoiding copy the + * value of es to a tmporary variable. + */ + len1 = extent_status_end(es) - end; + conv_es.es_lblk = es->es_lblk; + conv_es.es_len = es->es_len - len1; + conv_es.es_pblk = es->es_pblk; + conv_es.es_status = EXTENT_STATUS_WRITTEN; + + es->es_lblk = end + 1; + es->es_len = len1; + es->es_pblk = es->es_pblk + orig_len - len1; + + err = __es_insert_extent(tree, &conv_es); + if (err) + goto out; + } + +out: + write_unlock_irqrestore(&EXT4_I(inode)->i_es_lock, flags); + ext4_es_print_tree(inode); return err; } diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index 1890f80..9069ecf 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h @@ -51,6 +51,8 @@ extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, extern ext4_lblk_t ext4_es_find_extent(struct inode *inode, struct extent_status *es); extern int ext4_es_lookup_extent(struct inode *inode, struct extent_status *es); +extern int ext4_es_convert_unwritten_extents(struct inode *inode, + loff_t offset, size_t size); static inline int ext4_es_is_written(struct extent_status *es) { -- 1.7.12.rc2.18.g61b472e -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html