Christoph Hellwig: > If you care about really unique inode numbers you shouldn't use get_next_ino > but something like an idr allocator. The default i_ino assigned in > new_inode() from which get_next_ino was factored out was mostly intended > for small synthetic filesystems with few enough inodes that it wouldn't > wrap around. Grep-ping get_next_ino, I got 30 calls in mainline. How many of them are get_next_ino() inappropriate? I don't know. But at least for tmpfs, it is better to manage the inums by itself since tmpfs must be one of the biggest consumer of inums and it is NFS-exportable. Do you think we need a common function in VFS to manage inums per sb, or it is totally up to filesystem and the common function is unnecessary? Instead of idr, I was thinking about a simple bitmap in tmpfs such like this. It introduces a new mount option "ino" which forces tmpfs to assign the lowest unused number for a new inode within the mounted tmpfs. Without "ino" or specifying "noino", the behaviour is unchanged (use vfs:get_next_ino()). But it may not scale well due to the single spinlock every time. J. R. Okajima commit 214d38e8c34fb341fd0f37cc92614b5e93e0803b Author: J. R. Okajima <hooanon05@xxxxxxxxxxx> Date: Mon Sep 2 10:45:42 2013 +0900 shmem: management for inum Signed-off-by: J. R. Okajima <hooanon05g@xxxxxxxxx> diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 4d1771c..39762e1 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -29,6 +29,8 @@ struct shmem_sb_info { unsigned long max_inodes; /* How many inodes are allowed */ unsigned long free_inodes; /* How many are left for allocation */ spinlock_t stat_lock; /* Serialize shmem_sb_info changes */ + spinlock_t ino_lock; + unsigned long *ino_bitmap; kuid_t uid; /* Mount uid for root directory */ kgid_t gid; /* Mount gid for root directory */ umode_t mode; /* Mount mode for root directory */ diff --git a/mm/shmem.c b/mm/shmem.c index 9f70e02..bc2c5e4 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -197,14 +197,61 @@ static int shmem_reserve_inode(struct super_block *sb) return 0; } -static void shmem_free_inode(struct super_block *sb) +static void shmem_free_inode(struct inode *inode) { + struct super_block *sb = inode->i_sb; struct shmem_sb_info *sbinfo = SHMEM_SB(sb); + if (sbinfo->max_inodes) { spin_lock(&sbinfo->stat_lock); sbinfo->free_inodes++; spin_unlock(&sbinfo->stat_lock); } + + if (!inode->i_nlink) { + spin_lock(&sbinfo->ino_lock); + if (sbinfo->ino_bitmap) + clear_bit(inode->i_ino - 2, sbinfo->ino_bitmap); + spin_unlock(&sbinfo->ino_lock); + } +} + +/* + * This is unsigned int instead of unsigned long. + * For details, see fs/inode.c:get_next_ino(). + */ +unsigned int shmem_next_ino(struct super_block *sb) +{ + unsigned long ino; + struct shmem_sb_info *sbinfo; + + ino = 0; + sbinfo = SHMEM_SB(sb); + if (sbinfo->ino_bitmap) { + spin_lock(&sbinfo->ino_lock); + /* + * someone else may remount, + * and ino_bitmap might be reset. + */ + if (sbinfo->ino_bitmap + && !bitmap_full(sbinfo->ino_bitmap, sbinfo->max_inodes)) { + ino = find_first_zero_bit(sbinfo->ino_bitmap, + sbinfo->max_inodes); + set_bit(ino, sbinfo->ino_bitmap); + ino += 2; /* ino 0 and 1 are reserved */ + } + spin_unlock(&sbinfo->ino_lock); + } + + /* + * someone else did remount, + * or ino_bitmap is unused originally, + * or ino_bimapt is full. + */ + if (!ino) + ino = get_next_ino(); + + return ino; } /** @@ -578,7 +625,7 @@ static void shmem_evict_inode(struct inode *inode) simple_xattrs_free(&info->xattrs); WARN_ON(inode->i_blocks); - shmem_free_inode(inode->i_sb); + shmem_free_inode(inode); clear_inode(inode); } @@ -1306,7 +1353,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode inode = new_inode(sb); if (inode) { - inode->i_ino = get_next_ino(); + inode->i_ino = shmem_next_ino(sb); inode_init_owner(inode, dir, mode); inode->i_blocks = 0; inode->i_mapping->backing_dev_info = &shmem_backing_dev_info; @@ -1348,7 +1395,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode break; } } else - shmem_free_inode(sb); + shmem_free_inode(inode); return inode; } @@ -1945,7 +1992,7 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry) struct inode *inode = dentry->d_inode; if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)) - shmem_free_inode(inode->i_sb); + shmem_free_inode(inode); dir->i_size -= BOGO_DIRENT_SIZE; inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; @@ -2315,6 +2362,54 @@ static const struct export_operations shmem_export_ops = { .fh_to_dentry = shmem_fh_to_dentry, }; +static void shmem_ino_bitmap(struct shmem_sb_info *sbinfo, + unsigned long prev_max) +{ + unsigned long *p; + unsigned long n, d; + int do_msg; + + n = sbinfo->max_inodes / BITS_PER_BYTE; + if (sbinfo->max_inodes % BITS_PER_BYTE) + n++; + + do_msg = 0; + if (sbinfo->ino_bitmap) { + /* + * by shrinking the bitmap, the large inode number in use + * may be left. but it is harmless. + */ + d = 0; + if (sbinfo->max_inodes > prev_max) { + d = sbinfo->max_inodes - prev_max; + d /= BITS_PER_BYTE; + } + spin_lock(&sbinfo->ino_lock); + p = krealloc(sbinfo->ino_bitmap, n, GFP_NOWAIT); + if (p) { + memset(p + n - d, 0, d); + sbinfo->ino_bitmap = p; + spin_unlock(&sbinfo->ino_lock); + } else { + p = sbinfo->ino_bitmap; + sbinfo->ino_bitmap = NULL; + spin_unlock(&sbinfo->ino_lock); + kfree(p); + do_msg = 1; + } + } else { + p = kzalloc(n, GFP_NOFS); + spin_lock(&sbinfo->ino_lock); + sbinfo->ino_bitmap = p; + spin_unlock(&sbinfo->ino_lock); + do_msg = !p; + } + + if (unlikely(do_msg)) + pr_err("%s: ino failed (%lu bytes). Ignored.\n", + __func__, n); +} + static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, bool remount) { @@ -2322,7 +2417,10 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, struct mempolicy *mpol = NULL; uid_t uid; gid_t gid; + bool do_ino; + unsigned long old_val = sbinfo->max_inodes; + do_ino = 0; while (options != NULL) { this_char = options; for (;;) { @@ -2342,6 +2440,14 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, } if (!*this_char) continue; + if (!strcmp(this_char, "ino")) { + do_ino = 1; + continue; + } else if (!strcmp(this_char, "noino")) { + do_ino = 0; + continue; + } + if ((value = strchr(this_char,'=')) != NULL) { *value++ = 0; } else { @@ -2370,7 +2476,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, goto bad_val; } else if (!strcmp(this_char,"nr_inodes")) { sbinfo->max_inodes = memparse(value, &rest); - if (*rest) + if (*rest || !sbinfo->max_inodes) goto bad_val; } else if (!strcmp(this_char,"mode")) { if (remount) @@ -2408,6 +2514,16 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, } } sbinfo->mpol = mpol; + + if (do_ino) + shmem_ino_bitmap(sbinfo, old_val); + else if (sbinfo->ino_bitmap) { + void *p = sbinfo->ino_bitmap; + spin_lock(&sbinfo->ino_lock); + sbinfo->ino_bitmap = NULL; + spin_unlock(&sbinfo->ino_lock); + kfree(p); + } return 0; bad_val: @@ -2472,6 +2588,8 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root) sbinfo->max_blocks << (PAGE_CACHE_SHIFT - 10)); if (sbinfo->max_inodes != shmem_default_max_inodes()) seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes); + if (sbinfo->ino_bitmap) + seq_printf(seq, ",ino"); if (sbinfo->mode != (S_IRWXUGO | S_ISVTX)) seq_printf(seq, ",mode=%03ho", sbinfo->mode); if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID)) @@ -2491,6 +2609,7 @@ static void shmem_put_super(struct super_block *sb) percpu_counter_destroy(&sbinfo->used_blocks); mpol_put(sbinfo->mpol); + kfree(sbinfo->ino_bitmap); kfree(sbinfo); sb->s_fs_info = NULL; } @@ -2510,6 +2629,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) sbinfo->mode = S_IRWXUGO | S_ISVTX; sbinfo->uid = current_fsuid(); sbinfo->gid = current_fsgid(); + spin_lock_init(&sbinfo->ino_lock); sb->s_fs_info = sbinfo; #ifdef CONFIG_TMPFS -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html