This patch introduces another concept used by the unzip subsystem called 'workstation'. It can be seen as a sparse array that stores pointers pointed to data structures related to the corresponding physical blocks. All lookup cases are protected by RCU read lock. Besides, reference count and spin_lock are also introduced to manage its lifetime and serialize all update operations. 'workstation' is currently implemented on the in-kernel radix tree approach for backward compatibility. With the evolution of linux kernel, it could be migrated into XArray implementation in the future. Signed-off-by: Gao Xiang <gaoxiang25@xxxxxxxxxx> --- drivers/staging/erofs/internal.h | 93 ++++++++++++++++++++++++++++++++++++++++ drivers/staging/erofs/super.c | 9 ++++ drivers/staging/erofs/utils.c | 81 ++++++++++++++++++++++++++++++++-- 3 files changed, 180 insertions(+), 3 deletions(-) diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h index 42455f0..b07cd7a 100644 --- a/drivers/staging/erofs/internal.h +++ b/drivers/staging/erofs/internal.h @@ -79,6 +79,9 @@ struct erofs_sb_info { #ifdef CONFIG_EROFS_FS_ZIP /* cluster size in bit shift */ unsigned char clusterbits; + + /* the dedicated workstation for compression */ + struct radix_tree_root workstn_tree; #endif u32 build_time_nsec; @@ -149,6 +152,96 @@ static inline void *erofs_kmalloc(struct erofs_sb_info *sbi, #define set_opt(sbi, option) ((sbi)->mount_opt |= EROFS_MOUNT_##option) #define test_opt(sbi, option) ((sbi)->mount_opt & EROFS_MOUNT_##option) +#ifdef CONFIG_EROFS_FS_ZIP +#define erofs_workstn_lock(sbi) xa_lock(&(sbi)->workstn_tree) +#define erofs_workstn_unlock(sbi) xa_unlock(&(sbi)->workstn_tree) + +/* basic unit of the workstation of a super_block */ +struct erofs_workgroup { + /* the workgroup index in the workstation */ + pgoff_t index; + + /* overall workgroup reference count */ + atomic_t refcount; +}; + +#define EROFS_LOCKED_MAGIC (INT_MIN | 0xE0F510CCL) + +static inline bool erofs_workgroup_try_to_freeze( + struct erofs_workgroup *grp, int v) +{ +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) + if (v != atomic_cmpxchg(&grp->refcount, + v, EROFS_LOCKED_MAGIC)) + return false; + preempt_disable(); +#else + preempt_disable(); + if (atomic_read(&grp->refcount) != v) { + preempt_enable(); + return false; + } +#endif + return true; +} + +static inline void erofs_workgroup_unfreeze( + struct erofs_workgroup *grp, int v) +{ +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) + atomic_set(&grp->refcount, v); +#endif + preempt_enable(); +} + +static inline bool erofs_workgroup_get(struct erofs_workgroup *grp, int *ocnt) +{ + const int locked = (int)EROFS_LOCKED_MAGIC; + int o; + +repeat: + o = atomic_read(&grp->refcount); + + /* spin if it is temporarily locked at the reclaim path */ + if (unlikely(o == locked)) { +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) + do + cpu_relax(); + while (atomic_read(&grp->refcount) == locked); +#endif + goto repeat; + } + + if (unlikely(o <= 0)) + return -1; + + if (unlikely(atomic_cmpxchg(&grp->refcount, o, o + 1) != o)) + goto repeat; + + *ocnt = o; + return 0; +} + +#define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount) + +extern int erofs_workgroup_put(struct erofs_workgroup *grp); + +extern struct erofs_workgroup *erofs_find_workgroup( + struct super_block *sb, pgoff_t index, bool *tag); + +extern int erofs_register_workgroup(struct super_block *sb, + struct erofs_workgroup *grp, bool tag); + +extern unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, + unsigned long nr_shrink, bool cleanup); + +static inline void erofs_workstation_cleanup_all(struct super_block *sb) +{ + erofs_shrink_workstation(EROFS_SB(sb), ~0UL, true); +} + +#endif + /* we strictly follow PAGE_SIZE and no buffer head yet */ #define LOG_BLOCK_SIZE PAGE_SHIFT diff --git a/drivers/staging/erofs/super.c b/drivers/staging/erofs/super.c index ef85884..e155a2b 100644 --- a/drivers/staging/erofs/super.c +++ b/drivers/staging/erofs/super.c @@ -296,6 +296,10 @@ static int erofs_read_super(struct super_block *sb, if (!silent) infoln("root inode @ nid %llu", ROOT_NID(sbi)); +#ifdef CONFIG_EROFS_FS_ZIP + INIT_RADIX_TREE(&sbi->workstn_tree, GFP_ATOMIC); +#endif + /* get the root inode */ inode = erofs_iget(sb, ROOT_NID(sbi), true); if (IS_ERR(inode)) { @@ -376,6 +380,11 @@ static void erofs_put_super(struct super_block *sb) __putname(sbi->dev_name); mutex_lock(&sbi->umount_mutex); + +#ifdef CONFIG_EROFS_FS_ZIP + erofs_workstation_cleanup_all(sb); +#endif + erofs_unregister_super(sb); mutex_unlock(&sbi->umount_mutex); diff --git a/drivers/staging/erofs/utils.c b/drivers/staging/erofs/utils.c index c1d83ce..0d4eae2 100644 --- a/drivers/staging/erofs/utils.c +++ b/drivers/staging/erofs/utils.c @@ -29,6 +29,83 @@ struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) return page; } +/* global shrink count (for all mounted EROFS instances) */ +static atomic_long_t erofs_global_shrink_cnt; + +#ifdef CONFIG_EROFS_FS_ZIP + +/* radix_tree and the future XArray both don't use tagptr_t yet */ +struct erofs_workgroup *erofs_find_workgroup( + struct super_block *sb, pgoff_t index, bool *tag) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + struct erofs_workgroup *grp; + int oldcount; + +repeat: + rcu_read_lock(); + grp = radix_tree_lookup(&sbi->workstn_tree, index); + if (grp != NULL) { + *tag = radix_tree_exceptional_entry(grp); + grp = (void *)((unsigned long)grp & + ~RADIX_TREE_EXCEPTIONAL_ENTRY); + + if (erofs_workgroup_get(grp, &oldcount)) { + /* prefer to relax rcu read side */ + rcu_read_unlock(); + goto repeat; + } + + /* decrease refcount added by erofs_workgroup_put */ + if (unlikely(oldcount == 1)) + atomic_long_dec(&erofs_global_shrink_cnt); + BUG_ON(index != grp->index); + } + rcu_read_unlock(); + return grp; +} + +int erofs_register_workgroup(struct super_block *sb, + struct erofs_workgroup *grp, + bool tag) +{ + struct erofs_sb_info *sbi; + int err; + + /* grp->refcount should not < 1 */ + BUG_ON(!atomic_read(&grp->refcount)); + + err = radix_tree_preload(GFP_NOFS); + if (err) + return err; + + sbi = EROFS_SB(sb); + erofs_workstn_lock(sbi); + + if (tag) + grp = (void *)((unsigned long)grp | + 1UL << RADIX_TREE_EXCEPTIONAL_SHIFT); + + err = radix_tree_insert(&sbi->workstn_tree, + grp->index, grp); + + if (!err) { + __erofs_workgroup_get(grp); + } + + erofs_workstn_unlock(sbi); + radix_tree_preload_end(); + return err; +} + +unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, + unsigned long nr_shrink, + bool cleanup) +{ + return 0; +} + +#endif /* protected by 'erofs_sb_list_lock' */ static unsigned int shrinker_run_no; @@ -37,9 +114,6 @@ struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) static DEFINE_SPINLOCK(erofs_sb_list_lock); static LIST_HEAD(erofs_sb_list); -/* global shrink count (for all mounted EROFS instances) */ -static atomic_long_t erofs_global_shrink_cnt; - void erofs_register_super(struct super_block *sb) { struct erofs_sb_info *sbi = EROFS_SB(sb); @@ -112,6 +186,7 @@ unsigned long erofs_shrink_scan(struct shrinker *shrink, list_move_tail(&sbi->list, &erofs_sb_list); mutex_unlock(&sbi->umount_mutex); + freed += erofs_shrink_workstation(sbi, nr, false); if (freed >= nr) break; } -- 1.9.1