From: Zhi Yong Wu <wuzhy@xxxxxxxxxxxxxxxxxx> Introduce two proc interfaces hot-mem-high-thresh and hot-mem-low-thresh to cap the memory which is consumed by hot_inode_item and hot_range_item, and they will be in the unit of 1M bytes. Signed-off-by: Zhi Yong Wu <wuzhy@xxxxxxxxxxxxxxxxxx> --- fs/hot_tracking.c | 40 ++++++++++++++++++++++++++++++++++++++-- fs/hot_tracking.h | 26 ++++++++++++++++++++++++++ include/linux/hot_tracking.h | 6 ++++++ kernel/sysctl.c | 14 ++++++++++++++ 4 files changed, 84 insertions(+), 2 deletions(-) diff --git a/fs/hot_tracking.c b/fs/hot_tracking.c index 0d265b9..915b48b 100644 --- a/fs/hot_tracking.c +++ b/fs/hot_tracking.c @@ -23,6 +23,12 @@ static struct dentry *hot_debugfs_root; +int sysctl_hot_mem_high_thresh __read_mostly = 0; +EXPORT_SYMBOL_GPL(sysctl_hot_mem_high_thresh); + +int sysctl_hot_mem_low_thresh __read_mostly = 0; +EXPORT_SYMBOL_GPL(sysctl_hot_mem_low_thresh); + int sysctl_hot_update_interval __read_mostly = 300; EXPORT_SYMBOL_GPL(sysctl_hot_update_interval); @@ -122,10 +128,14 @@ static void hot_comm_item_unlink(struct hot_info *root, spin_lock(&he->i_lock); rb_erase(&ci->rb_node, &he->hot_range_tree); spin_unlock(&he->i_lock); + + hot_mem_limit_sub(root, sizeof(struct hot_range_item)); } else { spin_lock(&root->t_lock); rb_erase(&ci->rb_node, &root->hot_inode_tree); spin_unlock(&root->t_lock); + + hot_mem_limit_sub(root, sizeof(struct hot_inode_item)); } hot_comm_item_put(ci); @@ -199,13 +209,15 @@ redo: else { hot_comm_item_get(&he->hot_inode); spin_unlock(&root->t_lock); - if (he_new) + if (he_new) { /* * Lost the race. Somebody else inserted * the item for the inode. Free the * newly allocated item. */ kmem_cache_free(hot_inode_item_cachep, he_new); + hot_mem_limit_sub(root, sizeof(struct hot_inode_item)); + } if (test_bit(HOT_DELETING, &he->hot_inode.delete_flag)) return ERR_PTR(-ENOENT); @@ -231,6 +243,7 @@ redo: if (!he_new) return ERR_PTR(-ENOMEM); + hot_mem_limit_add(root, sizeof(struct hot_inode_item)); hot_inode_item_init(he_new, root, ino); goto redo; @@ -280,13 +293,15 @@ redo: else { hot_comm_item_get(&hr->hot_range); spin_unlock(&he->i_lock); - if(hr_new) + if(hr_new) { /* * Lost the race. Somebody else inserted * the item for the range. Free the * newly allocated item. */ kmem_cache_free(hot_range_item_cachep, hr_new); + hot_mem_limit_sub(root, sizeof(struct hot_range_item)); + } if (test_bit(HOT_DELETING, &hr->hot_range.delete_flag)) return ERR_PTR(-ENOENT); @@ -312,6 +327,7 @@ redo: if (!hr_new) return ERR_PTR(-ENOMEM); + hot_mem_limit_add(root, sizeof(struct hot_range_item)); hot_range_item_init(hr_new, he, start); goto redo; @@ -570,6 +586,22 @@ static void hot_item_evictor(struct hot_info *root, unsigned long work, } } +static void hot_mem_evictor(struct hot_info *root) +{ + unsigned long work; + + if (sysctl_hot_mem_high_thresh == 0) + return; + + /* note: sysctl_** is in the unit of 1M bytes */ + if (hot_mem_limit(root) <= sysctl_hot_mem_high_thresh * 1024 * 1024) + return; + + work = hot_mem_limit(root) - sysctl_hot_mem_low_thresh * 1024 * 1024; + + hot_item_evictor(root, work, hot_mem_limit); +} + /* * Every sync period we update temperatures for * each hot inode item and hot range item for aging @@ -584,6 +616,8 @@ static void hot_update_worker(struct work_struct *work) struct hot_inode_item *he; int i, j; + hot_mem_evictor(root); + rcu_read_lock(); node = rb_first(&root->hot_inode_tree); while (node) { @@ -1235,6 +1269,7 @@ int hot_track_init(struct super_block *sb) if (IS_ERR(root)) return PTR_ERR(root); + hot_mem_limit_init(root); sb->s_hot_root = root; ret = hot_debugfs_init(sb); @@ -1264,6 +1299,7 @@ void hot_track_exit(struct super_block *sb) { struct hot_info *root = sb->s_hot_root; + hot_mem_limit_exit(root); hot_debugfs_exit(sb); hot_tree_exit(root); sb->s_hot_root = NULL; diff --git a/fs/hot_tracking.h b/fs/hot_tracking.h index d1ab48b..be9f5cd 100644 --- a/fs/hot_tracking.h +++ b/fs/hot_tracking.h @@ -45,4 +45,30 @@ struct hot_debugfs { const struct file_operations *fops; }; +/* Memory Tracking Functions. */ +static inline unsigned long hot_mem_limit(struct hot_info *root) +{ + return percpu_counter_read(&root->mem); +} + +static inline void hot_mem_limit_sub(struct hot_info *root, int i) +{ + percpu_counter_add(&root->mem, -i); +} + +static inline void hot_mem_limit_add(struct hot_info *root, int i) +{ + percpu_counter_add(&root->mem, i); +} + +static inline void hot_mem_limit_init(struct hot_info *root) +{ + percpu_counter_init(&root->mem, 0); +} + +static inline void hot_mem_limit_exit(struct hot_info *root) +{ + percpu_counter_destroy(&root->mem); +} + #endif /* __HOT_TRACKING__ */ diff --git a/include/linux/hot_tracking.h b/include/linux/hot_tracking.h index f5c5769..03e5026 100644 --- a/include/linux/hot_tracking.h +++ b/include/linux/hot_tracking.h @@ -16,6 +16,7 @@ #define _LINUX_HOTTRACK_H #include <linux/types.h> +#include <linux/percpu_counter.h> struct hot_heat_info { __u64 avg_delta_reads; @@ -108,10 +109,15 @@ struct hot_info { struct shrinker hot_shrink; struct dentry *debugfs_dentry; atomic_t run_debugfs; + + struct percpu_counter mem ____cacheline_aligned_in_smp; }; /* set how often to update temperatures (seconds) */ extern int sysctl_hot_update_interval; +/* note: sysctl_** is in the unit of 1M bytes */ +extern int sysctl_hot_mem_high_thresh; +extern int sysctl_hot_mem_low_thresh; /* * Hot data tracking ioctls: diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1ba111d..753585d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1617,6 +1617,20 @@ static struct ctl_table fs_table[] = { .extra1 = &pipe_min_size, }, { + .procname = "hot-mem-high-thresh", + .data = &sysctl_hot_mem_high_thresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "hot-mem-low-thresh", + .data = &sysctl_hot_mem_low_thresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { .procname = "hot-update-interval", .data = &sysctl_hot_update_interval, .maxlen = sizeof(int), -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html