On Thu, Jan 10, 2013 at 8:51 AM, David Sterba <dsterba@xxxxxxx> wrote: > On Thu, Dec 20, 2012 at 10:43:22PM +0800, zwu.kernel@xxxxxxxxx wrote: >> --- a/fs/hot_tracking.c >> +++ b/fs/hot_tracking.c >> @@ -164,6 +164,135 @@ static void hot_inode_tree_exit(struct hot_info *root) >> spin_unlock(&root->lock); >> } >> >> +struct hot_inode_item >> +*hot_inode_item_lookup(struct hot_info *root, u64 ino) >> +{ >> + struct rb_node **p = &root->hot_inode_tree.map.rb_node; >> + struct rb_node *parent = NULL; >> + struct hot_comm_item *ci; >> + struct hot_inode_item *entry; >> + >> + /* walk tree to find insertion point */ >> + spin_lock(&root->lock); >> + while (*p) { >> + parent = *p; >> + ci = rb_entry(parent, struct hot_comm_item, rb_node); >> + entry = container_of(ci, struct hot_inode_item, hot_inode); >> + if (ino < entry->i_ino) >> + p = &(*p)->rb_left; >> + else if (ino > entry->i_ino) >> + p = &(*p)->rb_right; > > style comment: put { } around the all if/else blocks, no, it will violate checkpatch.pl. If the if/else block only contains one line of code, we should not put {} around them. > >> + else { >> + spin_unlock(&root->lock); >> + kref_get(&entry->hot_inode.refs); > > jumping forwards in the series, the spin_unlock and kref_get get swapped > later, and I think that's the right order. Otherwise there's a small > window where the entry does not get the reference and could be > potentially freed by racing kref_put, no? yes, good catch, thanks, done > > <lookup entry E> > spin_unlock(tree) > spin_lock(tree) > <lookup entry E> > kref_put(E) or via hot_inode_item_put(E) (1) > kref_get(E) (2) > > > if the reference count at (1) was 1, it's freed and (2) hits a free > memory. hot_inode_item_put can be called from filesystem or via seq > print of the respective /proc files, so I think there are chances to hit > the problem. Great. > >> + return entry; >> + } >> + } >> + spin_unlock(&root->lock); >> + >> + entry = kmem_cache_zalloc(hot_inode_item_cachep, GFP_NOFS); >> + if (!entry) >> + return ERR_PTR(-ENOMEM); >> + >> + spin_lock(&root->lock); >> + hot_inode_item_init(entry, ino, &root->hot_inode_tree); >> + rb_link_node(&entry->hot_inode.rb_node, parent, p); >> + rb_insert_color(&entry->hot_inode.rb_node, >> + &root->hot_inode_tree.map); >> + spin_unlock(&root->lock); >> + >> + kref_get(&entry->hot_inode.refs); > > Similar here, the entry is inserted into the tree but there's no > refcount yet. And the order of spin_unlock/kref_get remains unchanged. ditto > >> + return entry; >> +} >> +EXPORT_SYMBOL_GPL(hot_inode_item_lookup); >> + >> +static struct hot_range_item >> +*hot_range_item_lookup(struct hot_inode_item *he, >> + loff_t start) >> +{ >> + struct rb_node **p = &he->hot_range_tree.map.rb_node; >> + struct rb_node *parent = NULL; >> + struct hot_comm_item *ci; >> + struct hot_range_item *entry; >> + >> + /* walk tree to find insertion point */ >> + spin_lock(&he->lock); >> + while (*p) { >> + parent = *p; >> + ci = rb_entry(parent, struct hot_comm_item, rb_node); >> + entry = container_of(ci, struct hot_range_item, hot_range); >> + if (start < entry->start) >> + p = &(*p)->rb_left; >> + else if (start > hot_range_end(entry)) >> + p = &(*p)->rb_right; > > if { ...} > else if { ... } We should not put {} around them as what i explained above. > >> + else { >> + spin_unlock(&he->lock); >> + kref_get(&entry->hot_range.refs); > > same here Done > >> + return entry; >> + } >> + } >> + spin_unlock(&he->lock); >> + >> + entry = kmem_cache_zalloc(hot_range_item_cachep, GFP_NOFS); >> + if (!entry) >> + return ERR_PTR(-ENOMEM); >> + >> + spin_lock(&he->lock); >> + hot_range_item_init(entry, start, he); >> + rb_link_node(&entry->hot_range.rb_node, parent, p); >> + rb_insert_color(&entry->hot_range.rb_node, >> + &he->hot_range_tree.map); >> + spin_unlock(&he->lock); >> + >> + kref_get(&entry->hot_range.refs); > > and here Done > >> + return entry; >> +} >> + >> +/* >> + * This function does the actual work of updating >> + * the frequency numbers, whatever they turn out to be. > > Can this function be described a bit better? This comment did not help. OK, i will > >> + */ >> +static void hot_rw_freq_calc(struct timespec old_atime, >> + struct timespec cur_time, u64 *avg) >> +{ >> + struct timespec delta_ts; >> + u64 new_delta; >> + >> + delta_ts = timespec_sub(cur_time, old_atime); >> + new_delta = timespec_to_ns(&delta_ts) >> FREQ_POWER; >> + >> + *avg = (*avg << FREQ_POWER) - *avg + new_delta; >> + *avg = *avg >> FREQ_POWER; >> +} >> + >> +static void hot_freq_data_update(struct hot_freq_data *freq_data, bool write) >> +{ >> + struct timespec cur_time = current_kernel_time(); >> + >> + if (write) { >> + freq_data->nr_writes += 1; > > The preferred style is > > freq_data->nr_writes++ OK, done. > >> + hot_rw_freq_calc(freq_data->last_write_time, >> + cur_time, >> + &freq_data->avg_delta_writes); >> + freq_data->last_write_time = cur_time; >> + } else { >> + freq_data->nr_reads += 1; > > (...) > >> + hot_rw_freq_calc(freq_data->last_read_time, >> + freq_data->last_read_time, >> + cur_time, >> + &freq_data->avg_delta_reads); >> + freq_data->last_read_time = cur_time; >> + } >> +} >> + > > david -- Regards, Zhi Yong Wu -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html