This patch is a proof of concept. It works, but still needs a bit of polish before it's ready for submission. First, the problems: 1) on filesystems w/o permanent inode numbers, i_ino values can be larger than 32 bits, which can cause problems for some 32 bit userspace programs on a 64 bit kernel. 2) many filesystems call new_inode and assume that the i_ino values they are given are unique. They are not guaranteed to be so, since the static counter can wrap. 3) after allocating a new inode, some filesystems call iunique to try to get a unique i_ino value, but they don't actually add their inodes to the hashtable, and so they're still not guaranteed to be unique. This patch is a first step at correcting these problems. It declares a new file_system_type flag (FS_I_INO_DYNAMIC). If this is set, then when new_inode is called, we'll use the IDR functions to generate a unique 31-bit value, leaving the first 100 inode numbers for statically allocated stuff like root inodes, etc. At inode deletion time, we idr_remove it so the i_ino value can be reused. The patch also converts pipefs to use the new scheme (though the conversion just consists of setting the fs_flags value for its file_system_type). There are some things that need to be cleaned up, of course: - error handling for the idr calls - recheck all the possible places where the inode should be unhashed - don't attempt to remove inodes with values <100 - convert other filesystems - remove the static counter from new_inode and (maybe) eliminate iunique It seems to basically work, but there is one oddity. idr seems to prefer reusing values when it can. So if a program, for instance, makes a pipe, then closes it, and makes a new one, the second one is likely to get the same st_ino value as the first one. The value is still unique, but I have to wonder if this may cause some userspace programs to get confused... I'm posting this to get some feedback. If this approach seems acceptable, then I'll clean up the patch and start working on converting the filesystems individually. Comments and suggestions appreciated... -- Jeff diff --git a/fs/inode.c b/fs/inode.c index 26cdb11..52779f4 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -93,6 +93,12 @@ DEFINE_SPINLOCK(inode_lock); static DEFINE_MUTEX(iprune_mutex); /* + * for filesystems that dynamically allocate inode numbers, we reserve the + * first 100 for statically assigned values (root inodes and such) + */ +#define DYNAMIC_I_INO_RESERVED 100 + +/* * Statistics gathering.. */ struct inodes_stat_t inodes_stat; @@ -116,6 +122,7 @@ static struct inode *alloc_inode(struct inode->i_sb = sb; inode->i_blkbits = sb->s_blocksize_bits; + inode->i_ino = 0; inode->i_flags = 0; atomic_set(&inode->i_count, 1); inode->i_op = &empty_iops; @@ -286,6 +293,8 @@ static void dispose_list(struct list_hea spin_lock(&inode_lock); hlist_del_init(&inode->i_hash); list_del_init(&inode->i_sb_list); + if (inode->i_sb->s_type->fs_flags & FS_I_INO_DYNAMIC) + idr_remove(&inode->i_sb->s_inode_ids, inode->i_ino); spin_unlock(&inode_lock); wake_up_inode(inode); @@ -531,11 +540,17 @@ struct inode *new_inode(struct super_blo inode = alloc_inode(sb); if (inode) { + if (sb->s_type->fs_flags & FS_I_INO_DYNAMIC) + idr_pre_get(&sb->s_inode_ids, GFP_KERNEL); spin_lock(&inode_lock); inodes_stat.nr_inodes++; list_add(&inode->i_list, &inode_in_use); list_add(&inode->i_sb_list, &sb->s_inodes); - inode->i_ino = ++last_ino; + if (sb->s_type->fs_flags & FS_I_INO_DYNAMIC) + idr_get_new_above(&sb->s_inode_ids, inode, + DYNAMIC_I_INO_RESERVED, (int *) &inode->i_ino); + else + inode->i_ino = ++last_ino; inode->i_state = 0; spin_unlock(&inode_lock); } @@ -1024,6 +1039,8 @@ void generic_delete_inode(struct inode * } spin_lock(&inode_lock); hlist_del_init(&inode->i_hash); + if (inode->i_sb->s_type->fs_flags & FS_I_INO_DYNAMIC) + idr_remove(&inode->i_sb->s_inode_ids, inode->i_ino); spin_unlock(&inode_lock); wake_up_inode(inode); BUG_ON(inode->i_state != I_CLEAR); @@ -1052,6 +1069,8 @@ static void generic_forget_inode(struct inodes_stat.nr_unused--; hlist_del_init(&inode->i_hash); } + if (inode->i_sb->s_type->fs_flags & FS_I_INO_DYNAMIC) + idr_remove(&sb->s_inode_ids, inode->i_ino); list_del_init(&inode->i_list); list_del_init(&inode->i_sb_list); inode->i_state |= I_FREEING; diff --git a/fs/pipe.c b/fs/pipe.c index b1626f2..ea50833 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1005,6 +1005,7 @@ static struct file_system_type pipe_fs_t .name = "pipefs", .get_sb = pipefs_get_sb, .kill_sb = kill_anon_super, + .fs_flags = FS_I_INO_DYNAMIC, }; static int __init init_pipe_fs(void) diff --git a/fs/super.c b/fs/super.c index 47e554c..23b662b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -93,6 +93,7 @@ static struct super_block *alloc_super(s s->s_qcop = sb_quotactl_ops; s->s_op = &default_op; s->s_time_gran = 1000000000; + idr_init(&s->s_inode_ids); } out: return s; diff --git a/include/linux/fs.h b/include/linux/fs.h index 2fe6e3f..2c1a215 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -91,6 +91,7 @@ #define SEL_EX 4 /* public flags for file_system_type */ #define FS_REQUIRES_DEV 1 #define FS_BINARY_MOUNTDATA 2 +#define FS_I_INO_DYNAMIC 4 /* i_ino values are not permanent */ #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() * during rename() internally. @@ -278,6 +279,7 @@ #include <linux/prio_tree.h> #include <linux/init.h> #include <linux/sched.h> #include <linux/mutex.h> +#include <linux/idr.h> #include <asm/atomic.h> #include <asm/semaphore.h> @@ -961,6 +963,9 @@ #endif /* Granularity of c/m/atime in ns. Cannot be worse than a second */ u32 s_time_gran; + + /* for fs's with dynamic i_ino values, track them with idr */ + struct idr s_inode_ids; }; extern struct timespec current_fs_time(struct super_block *sb); - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html