On Fri 08-02-13 16:43:58, Zheng Liu wrote: > From: Zheng Liu <wenqing.lz@xxxxxxxxxx> > > This commit adds two members in extent_status structure to let it record > physical block and extent status. Here es_pblk is used to record both > of them because physical block only has 48 bits. So extent status could > be stashed into it so that we can save some memory. Now written, > unwritten, delayed and hole are defined as status. > > Due to new member is added into extent status tree, all interfaces need > to be adjusted. The patch looks good. You can add: Reviewed-by: Jan Kara <jack@xxxxxxx> Honza > > Signed-off-by: Zheng Liu <wenqing.lz@xxxxxxxxxx> > Cc: "Theodore Ts'o" <tytso@xxxxxxx> > Cc: Jan kara <jack@xxxxxxx> > --- > fs/ext4/extents_status.c | 67 +++++++++++++++++++++++++++++++++++++-------- > fs/ext4/extents_status.h | 64 ++++++++++++++++++++++++++++++++++++++++++- > fs/ext4/inode.c | 3 +- > include/trace/events/ext4.h | 34 +++++++++++++++-------- > 4 files changed, 142 insertions(+), 26 deletions(-) > > diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c > index aa4d346..5093cee 100644 > --- a/fs/ext4/extents_status.c > +++ b/fs/ext4/extents_status.c > @@ -179,7 +179,9 @@ static void ext4_es_print_tree(struct inode *inode) > while (node) { > struct extent_status *es; > es = rb_entry(node, struct extent_status, rb_node); > - printk(KERN_DEBUG " [%u/%u)", es->es_lblk, es->es_len); > + printk(KERN_DEBUG " [%u/%u) %llu %llx", > + es->es_lblk, es->es_len, > + ext4_es_pblock(es), ext4_es_status(es)); > node = rb_next(node); > } > printk(KERN_DEBUG "\n"); > @@ -234,7 +236,7 @@ static struct extent_status *__es_tree_search(struct rb_root *root, > * @es: delayed extent that we found > * > * Returns the first block of the next extent after es, otherwise > - * EXT_MAX_BLOCKS if no delay extent is found. > + * EXT_MAX_BLOCKS if no extent is found. > * Delayed extent is returned via @es. > */ > ext4_lblk_t ext4_es_find_extent(struct inode *inode, struct extent_status *es) > @@ -249,17 +251,18 @@ ext4_lblk_t ext4_es_find_extent(struct inode *inode, struct extent_status *es) > read_lock(&EXT4_I(inode)->i_es_lock); > tree = &EXT4_I(inode)->i_es_tree; > > - /* find delay extent in cache firstly */ > + /* find extent in cache firstly */ > + es->es_len = es->es_pblk = 0; > if (tree->cache_es) { > es1 = tree->cache_es; > if (in_range(es->es_lblk, es1->es_lblk, es1->es_len)) { > - es_debug("%u cached by [%u/%u)\n", > - es->es_lblk, es1->es_lblk, es1->es_len); > + es_debug("%u cached by [%u/%u) %llu %llx\n", > + es->es_lblk, es1->es_lblk, es1->es_len, > + ext4_es_pblock(es1), ext4_es_status(es1)); > goto out; > } > } > > - es->es_len = 0; > es1 = __es_tree_search(&tree->root, es->es_lblk); > > out: > @@ -267,6 +270,7 @@ out: > tree->cache_es = es1; > es->es_lblk = es1->es_lblk; > es->es_len = es1->es_len; > + es->es_pblk = es1->es_pblk; > node = rb_next(&es1->rb_node); > if (node) { > es1 = rb_entry(node, struct extent_status, rb_node); > @@ -281,7 +285,7 @@ out: > } > > static struct extent_status * > -ext4_es_alloc_extent(ext4_lblk_t lblk, ext4_lblk_t len) > +ext4_es_alloc_extent(ext4_lblk_t lblk, ext4_lblk_t len, ext4_fsblk_t pblk) > { > struct extent_status *es; > es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC); > @@ -289,6 +293,7 @@ ext4_es_alloc_extent(ext4_lblk_t lblk, ext4_lblk_t len) > return NULL; > es->es_lblk = lblk; > es->es_len = len; > + es->es_pblk = pblk; > return es; > } > > @@ -301,6 +306,8 @@ static void ext4_es_free_extent(struct extent_status *es) > * Check whether or not two extents can be merged > * Condition: > * - logical block number is contiguous > + * - physical block number is contiguous > + * - status is equal > */ > static int ext4_es_can_be_merged(struct extent_status *es1, > struct extent_status *es2) > @@ -308,6 +315,13 @@ static int ext4_es_can_be_merged(struct extent_status *es1, > if (es1->es_lblk + es1->es_len != es2->es_lblk) > return 0; > > + if (ext4_es_status(es1) != ext4_es_status(es2)) > + return 0; > + > + if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) && > + (ext4_es_pblock(es1) + es1->es_len != ext4_es_pblock(es2))) > + return 0; > + > return 1; > } > > @@ -367,6 +381,10 @@ static int __es_insert_extent(struct ext4_es_tree *tree, > if (ext4_es_can_be_merged(newes, es)) { > es->es_lblk = newes->es_lblk; > es->es_len += newes->es_len; > + if (ext4_es_is_written(es) || > + ext4_es_is_unwritten(es)) > + ext4_es_store_pblock(es, > + newes->es_pblk); > es = ext4_es_try_to_merge_left(tree, es); > goto out; > } > @@ -384,7 +402,8 @@ static int __es_insert_extent(struct ext4_es_tree *tree, > } > } > > - es = ext4_es_alloc_extent(newes->es_lblk, newes->es_len); > + es = ext4_es_alloc_extent(newes->es_lblk, newes->es_len, > + newes->es_pblk); > if (!es) > return -ENOMEM; > rb_link_node(&es->rb_node, parent, p); > @@ -404,21 +423,24 @@ out: > * Return 0 on success, error code on failure. > */ > int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, > - ext4_lblk_t len) > + ext4_lblk_t len, ext4_fsblk_t pblk, > + unsigned long long status) > { > struct ext4_es_tree *tree; > struct extent_status newes; > ext4_lblk_t end = lblk + len - 1; > int err = 0; > > - trace_ext4_es_insert_extent(inode, lblk, len); > - es_debug("add [%u/%u) to extent status tree of inode %lu\n", > - lblk, len, inode->i_ino); > + es_debug("add [%u/%u) %llu %llx to extent status tree of inode %lu\n", > + lblk, len, pblk, status, inode->i_ino); > > BUG_ON(end < lblk); > > newes.es_lblk = lblk; > newes.es_len = len; > + ext4_es_store_pblock(&newes, pblk); > + ext4_es_store_status(&newes, status); > + trace_ext4_es_insert_extent(inode, &newes); > > write_lock(&EXT4_I(inode)->i_es_lock); > tree = &EXT4_I(inode)->i_es_tree; > @@ -442,6 +464,7 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk, > struct extent_status *es; > struct extent_status orig_es; > ext4_lblk_t len1, len2; > + ext4_fsblk_t block; > int err = 0; > > es = __es_tree_search(&tree->root, lblk); > @@ -455,6 +478,8 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk, > > orig_es.es_lblk = es->es_lblk; > orig_es.es_len = es->es_len; > + orig_es.es_pblk = es->es_pblk; > + > len1 = lblk > es->es_lblk ? lblk - es->es_lblk : 0; > len2 = ext4_es_end(es) > end ? ext4_es_end(es) - end : 0; > if (len1 > 0) > @@ -465,6 +490,13 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk, > > newes.es_lblk = end + 1; > newes.es_len = len2; > + if (ext4_es_is_written(&orig_es) || > + ext4_es_is_unwritten(&orig_es)) { > + block = ext4_es_pblock(&orig_es) + > + orig_es.es_len - len2; > + ext4_es_store_pblock(&newes, block); > + } > + ext4_es_store_status(&newes, ext4_es_status(&orig_es)); > err = __es_insert_extent(tree, &newes); > if (err) { > es->es_lblk = orig_es.es_lblk; > @@ -474,6 +506,11 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk, > } else { > es->es_lblk = end + 1; > es->es_len = len2; > + if (ext4_es_is_written(es) || > + ext4_es_is_unwritten(es)) { > + block = orig_es.es_pblk + orig_es.es_len - len2; > + ext4_es_store_pblock(es, block); > + } > } > goto out; > } > @@ -498,9 +535,15 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk, > } > > if (es && es->es_lblk < end + 1) { > + ext4_lblk_t orig_len = es->es_len; > + > len1 = ext4_es_end(es) - end; > es->es_lblk = end + 1; > es->es_len = len1; > + if (ext4_es_is_written(es) || ext4_es_is_unwritten(es)) { > + block = es->es_pblk + orig_len - len1; > + ext4_es_store_pblock(es, block); > + } > } > > out: > diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h > index 81e9339..2a5d69e 100644 > --- a/fs/ext4/extents_status.h > +++ b/fs/ext4/extents_status.h > @@ -20,10 +20,21 @@ > #define es_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) > #endif > > +#define EXTENT_STATUS_WRITTEN 0x10000000 /* written extent */ > +#define EXTENT_STATUS_UNWRITTEN 0x20000000 /* unwritten extent */ > +#define EXTENT_STATUS_DELAYED 0x40000000 /* delayed extent */ > +#define EXTENT_STATUS_HOLE 0x80000000 /* hole */ > + > +#define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \ > + EXTENT_STATUS_UNWRITTEN | \ > + EXTENT_STATUS_DELAYED | \ > + EXTENT_STATUS_HOLE) > + > struct extent_status { > struct rb_node rb_node; > ext4_lblk_t es_lblk; /* first logical block extent covers */ > ext4_lblk_t es_len; /* length of extent in block */ > + ext4_fsblk_t es_pblk; /* first physical block */ > }; > > struct ext4_es_tree { > @@ -36,10 +47,61 @@ extern void ext4_exit_es(void); > extern void ext4_es_init_tree(struct ext4_es_tree *tree); > > extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, > - ext4_lblk_t len); > + ext4_lblk_t len, ext4_fsblk_t pblk, > + unsigned long long status); > extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, > ext4_lblk_t len); > extern ext4_lblk_t ext4_es_find_extent(struct inode *inode, > struct extent_status *es); > > +static inline int ext4_es_is_written(struct extent_status *es) > +{ > + return (es->es_pblk & EXTENT_STATUS_WRITTEN); > +} > + > +static inline int ext4_es_is_unwritten(struct extent_status *es) > +{ > + return (es->es_pblk & EXTENT_STATUS_UNWRITTEN); > +} > + > +static inline int ext4_es_is_delayed(struct extent_status *es) > +{ > + return (es->es_pblk & EXTENT_STATUS_DELAYED); > +} > + > +static inline int ext4_es_is_hole(struct extent_status *es) > +{ > + return (es->es_pblk & EXTENT_STATUS_HOLE); > +} > + > +static inline ext4_fsblk_t ext4_es_status(struct extent_status *es) > +{ > + return (es->es_pblk & EXTENT_STATUS_FLAGS); > +} > + > +static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es) > +{ > + return (es->es_pblk & ~EXTENT_STATUS_FLAGS); > +} > + > +static inline void ext4_es_store_pblock(struct extent_status *es, > + ext4_fsblk_t pb) > +{ > + ext4_fsblk_t block; > + > + block = (pb & ~EXTENT_STATUS_FLAGS) | > + (es->es_pblk & EXTENT_STATUS_FLAGS); > + es->es_pblk = block; > +} > + > +static inline void ext4_es_store_status(struct extent_status *es, > + unsigned long long status) > +{ > + ext4_fsblk_t block; > + > + block = (status & EXTENT_STATUS_FLAGS) | > + (es->es_pblk & ~EXTENT_STATUS_FLAGS); > + es->es_pblk = block; > +} > + > #endif /* _EXT4_EXTENTS_STATUS_H */ > diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c > index cbfe13b..7fb00d8 100644 > --- a/fs/ext4/inode.c > +++ b/fs/ext4/inode.c > @@ -1821,7 +1821,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, > goto out_unlock; > } > > - retval = ext4_es_insert_extent(inode, map->m_lblk, map->m_len); > + retval = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, > + ~0, EXTENT_STATUS_DELAYED); > if (retval) > goto out_unlock; > > diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h > index 952628a..ef2f96e 100644 > --- a/include/trace/events/ext4.h > +++ b/include/trace/events/ext4.h > @@ -2068,28 +2068,33 @@ TRACE_EVENT(ext4_ext_remove_space_done, > ); > > TRACE_EVENT(ext4_es_insert_extent, > - TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len), > + TP_PROTO(struct inode *inode, struct extent_status *es), > > - TP_ARGS(inode, lblk, len), > + TP_ARGS(inode, es), > > TP_STRUCT__entry( > - __field( dev_t, dev ) > - __field( ino_t, ino ) > - __field( loff_t, lblk ) > - __field( loff_t, len ) > + __field( dev_t, dev ) > + __field( ino_t, ino ) > + __field( ext4_lblk_t, lblk ) > + __field( ext4_lblk_t, len ) > + __field( ext4_fsblk_t, pblk ) > + __field( unsigned long long, status ) > ), > > TP_fast_assign( > __entry->dev = inode->i_sb->s_dev; > __entry->ino = inode->i_ino; > - __entry->lblk = lblk; > - __entry->len = len; > + __entry->lblk = es->es_lblk; > + __entry->len = es->es_len; > + __entry->pblk = ext4_es_pblock(es); > + __entry->status = ext4_es_status(es); > ), > > - TP_printk("dev %d,%d ino %lu es [%lld/%lld)", > + TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %llx", > MAJOR(__entry->dev), MINOR(__entry->dev), > (unsigned long) __entry->ino, > - __entry->lblk, __entry->len) > + __entry->lblk, __entry->len, > + __entry->pblk, __entry->status) > ); > > TRACE_EVENT(ext4_es_remove_extent, > @@ -2150,6 +2155,8 @@ TRACE_EVENT(ext4_es_find_extent_exit, > __field( ino_t, ino ) > __field( ext4_lblk_t, lblk ) > __field( ext4_lblk_t, len ) > + __field( ext4_fsblk_t, pblk ) > + __field( unsigned long long, status ) > __field( ext4_lblk_t, ret ) > ), > > @@ -2158,13 +2165,16 @@ TRACE_EVENT(ext4_es_find_extent_exit, > __entry->ino = inode->i_ino; > __entry->lblk = es->es_lblk; > __entry->len = es->es_len; > + __entry->pblk = ext4_es_pblock(es); > + __entry->status = ext4_es_status(es); > __entry->ret = ret; > ), > > - TP_printk("dev %d,%d ino %lu es [%u/%u) ret %u", > + TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %llx ret %u", > MAJOR(__entry->dev), MINOR(__entry->dev), > (unsigned long) __entry->ino, > - __entry->lblk, __entry->len, __entry->ret) > + __entry->lblk, __entry->len, > + __entry->pblk, __entry->status, __entry->ret) > ); > > #endif /* _TRACE_EXT4_H */ > -- > 1.7.12.rc2.18.g61b472e > -- Jan Kara <jack@xxxxxxx> SUSE Labs, CR -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html