To ensure the uniquness of the inode-number, manage it by IDR. Also it tries using the lowest unused inode-number, so the value will usually be smaller. Another side effect is the type of the inode-number in tmpfs. By using IDR, it is limited to signed int. But I don't think it a big problem. INT_MAX is big enough for the number of inodes in a single tmpfs. Comparision on performance: - test program: see below - version: 3.15.0-rc7 - before this commit 1 procs, 1048575/1048575 file, do unlink, 43.023 secs (usr 1.029 + sys 40.981) 2 procs, 1048574/1048574 file, do unlink, 24.047 secs (usr 1.048 + sys 45.886) 1 procs, 524286/524286 file, do unlink, 21.476 secs (usr 0.529 + sys 20.441) 2 procs, 524286/524286 file, do unlink, 12.029 secs (usr 0.554 + sys 22.880) 1 procs, 32766/32766 file, do unlink, 1.345 secs (usr 0.035 + sys 1.279) 2 procs, 32766/32766 file, do unlink, 0.753 secs (usr 0.030 + sys 1.439) - after this commit 1 procs, 1048575/1048575 file, do unlink, 45.178 secs (usr 1.183 + sys 43.005) 2 procs, 1048574/1048574 file, do unlink, 25.328 secs (usr 1.126 + sys 48.481) 1 procs, 524286/524286 file, do unlink, 22.668 secs (usr 0.367 + sys 21.806) 2 procs, 524286/524286 file, do unlink, 12.639 secs (usr 0.591 + sys 24.137) 1 procs, 32766/32766 file, do unlink, 1.414 secs (usr 0.028 + sys 1.356) 2 procs, 32766/32766 file, do unlink, 0.787 secs (usr 0.036 + sys 1.500) The overhead surely exists, but looks around 5% or less. Test prorams. ------- tmpfs-idr.sh ------- #!/bin/sh set -eu f() # dir [opts] { local dir=$1 shift seq $(getconf _NPROCESSORS_ONLN) | while read ncpu do seq 1 | while read do_unlink do sudo mount -v -t tmpfs $@ tmpfs $dir #stat -f $dir free_inodes=$(stat -f -c %d $dir) /tmp/tmpfs-idr $dir $ncpu $free_inodes $do_unlink sudo umount $dir done done } dir=/tmp/tmpfs-$$ mkdir $dir uname -a free -m #f $dir -o size=50%,nr_inodes=$((0x7fffffff)) #f $dir -o size=50%,nr_inodes=$((0x07ffffff)) #f $dir -o size=50%,nr_inodes=$((0x007fffff)) f $dir -o size=50%,nr_inodes=$((0x00100000)) f $dir -o size=50%,nr_inodes=$((0x0007ffff)) f $dir -o size=50%,nr_inodes=$((0x00007fff)) rm -fr $dir ------- tmpfs-idr.c ------- #define _GNU_SOURCE #include <pthread.h> #include <sys/resource.h> #include <sys/stat.h> #include <sys/time.h> #include <sys/types.h> #include <assert.h> #include <errno.h> #include <fcntl.h> #include <limits.h> #include <stdio.h> #include <stdlib.h> #include <time.h> #include <unistd.h> #ifndef O_PATH #define O_PATH 010000000 #endif pthread_barrier_t barrier; int rootfd, nproc, nfile, do_unlink; static int argton(char *s) { long l; errno = 0; l = strtol(s, NULL, 0); assert(!((l == LONG_MIN || l == LONG_MAX) && errno)); assert(l >= 0); return l; } void *f(void *arg) { int err, dirfd, fd, i; char a[16]; int id = (long)arg; snprintf(a, sizeof(a), "%d", id); err = mkdirat(rootfd, a, 0755); assert(!err); dirfd = openat(rootfd, a, O_RDONLY | O_PATH); assert(dirfd >= 0); err = pthread_barrier_wait(&barrier); assert(!err || err == PTHREAD_BARRIER_SERIAL_THREAD); for (i = 0; i < nfile; i++) { snprintf(a, sizeof(a), "%d", i); fd = openat(dirfd, a, O_CREAT | O_WRONLY); if (fd >= 0) { if (do_unlink) unlinkat(dirfd, a, /*flags*/ 0); close(fd); } else break; } return (void *)(long)i; } struct perf { struct timespec ts; struct rusage ru; }; void perf(struct perf *perf) { clock_gettime(CLOCK_MONOTONIC, &perf->ts); getrusage(RUSAGE_SELF, &perf->ru); } void ts_subtract(struct timespec *ans, struct timespec *a, struct timespec *b) { ans->tv_sec = a->tv_sec - b->tv_sec; ans->tv_nsec = a->tv_nsec - b->tv_nsec; if (ans->tv_nsec < 0) { ans->tv_sec--; ans->tv_nsec += 1000000000; } } void tv_subtract(struct timeval *ans, struct timeval *a, struct timeval *b) { ans->tv_sec = a->tv_sec - b->tv_sec; ans->tv_usec = a->tv_usec - b->tv_usec; if (ans->tv_usec < 0) { ans->tv_sec--; ans->tv_usec += 1000000; } } #define MAX_NPROC 16 void run(void) { int err, i, n; struct { pthread_t th; void *p; } b[MAX_NPROC]; struct perf s[3]; err = pthread_barrier_init(&barrier, NULL, nproc + 1); assert(!err); for (i = 0; i < nproc; i++) { err = pthread_create(&b[i].th, NULL, f, (void *)(long)i); assert(!err); } perf(s + 0); err = pthread_barrier_wait(&barrier); assert(!err || err == PTHREAD_BARRIER_SERIAL_THREAD); for (i = 0; i < nproc; i++) pthread_join(b[i].th, &b[i].p); perf(s + 1); n = 0; for (i = 0; i < nproc; i++) n += (long)b[i].p; ts_subtract(&s[2].ts, &s[1].ts, &s[0].ts); tv_subtract(&s[2].ru.ru_utime, &s[1].ru.ru_utime, &s[0].ru.ru_utime); tv_subtract(&s[2].ru.ru_stime, &s[1].ru.ru_stime, &s[0].ru.ru_stime); printf("%d procs, %d/%d file, %s unlink, %lu.%03ld secs" " (usr %lu.%03ld + sys %lu.%03ld)\n", nproc, n, nfile * nproc, do_unlink ? "do" : "no", s[2].ts.tv_sec, s[2].ts.tv_nsec / 1000000, s[2].ru.ru_utime.tv_sec, s[2].ru.ru_utime.tv_usec / 1000, s[2].ru.ru_stime.tv_sec, s[2].ru.ru_stime.tv_usec / 1000); } int main(int argc, char *argv[]) { rootfd = open(argv[1], O_RDONLY | O_PATH); assert(rootfd >= 0); nproc = argton(argv[2]); assert(nproc < MAX_NPROC); nfile = argton(argv[3]); nfile /= nproc; do_unlink = argton(argv[4]); run(); return 0; } /* * Local variables: ; * compile-command: "gcc -g -Wall -UNDEBUG -pthread -o /tmp/tmpfs-idr tmpfs-idr.c -lrt"; * End: ; */ ---------------------------------------- Cc: Eric Dumazet <edumazet@xxxxxxxxxx> Cc: Hugh Dickins <hughd@xxxxxxxxxx> Cc: Christoph Hellwig <hch@xxxxxx> Cc: Andreas Dilger <adilger@xxxxxxxxx> Cc: Jan Kara <jack@xxxxxxx> Signed-off-by: J. R. Okajima <hooanon05g@xxxxxxxxx> --- include/linux/shmem_fs.h | 6 ++++-- mm/shmem.c | 37 +++++++++++++++++++++++++++++++------ 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 4d1771c..4ba8b43 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -24,10 +24,12 @@ struct shmem_inode_info { }; struct shmem_sb_info { + struct mutex idr_lock; + struct idr idr; /* manages inode-number */ unsigned long max_blocks; /* How many blocks are allowed */ struct percpu_counter used_blocks; /* How many are allocated */ - unsigned long max_inodes; /* How many inodes are allowed */ - unsigned long free_inodes; /* How many are left for allocation */ + int max_inodes; /* How many inodes are allowed */ + int free_inodes; /* How many are left for allocation */ spinlock_t stat_lock; /* Serialize shmem_sb_info changes */ kuid_t uid; /* Mount uid for root directory */ kgid_t gid; /* Mount gid for root directory */ diff --git a/mm/shmem.c b/mm/shmem.c index 368f314..3ac613d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -107,9 +107,13 @@ static unsigned long shmem_default_max_blocks(void) return totalram_pages / 2; } -static unsigned long shmem_default_max_inodes(void) +static int shmem_default_max_inodes(void) { - return min(totalram_pages - totalhigh_pages, totalram_pages / 2); + unsigned long ul; + + ul = INT_MAX; + ul = min3(ul, totalram_pages - totalhigh_pages, totalram_pages / 2); + return ul; } #endif @@ -569,6 +573,7 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr) static void shmem_evict_inode(struct inode *inode) { struct shmem_inode_info *info = SHMEM_I(inode); + struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); if (inode->i_mapping->a_ops == &shmem_aops) { shmem_unacct_size(info->flags, inode->i_size); @@ -584,6 +589,11 @@ static void shmem_evict_inode(struct inode *inode) simple_xattrs_free(&info->xattrs); WARN_ON(inode->i_blocks); + if (inode->i_ino) { + mutex_lock(&sbinfo->idr_lock); + idr_remove(&sbinfo->idr, inode->i_ino); + mutex_unlock(&sbinfo->idr_lock); + } shmem_free_inode(inode->i_sb); clear_inode(inode); } @@ -1315,13 +1325,13 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode struct inode *inode; struct shmem_inode_info *info; struct shmem_sb_info *sbinfo = SHMEM_SB(sb); + int ino; if (shmem_reserve_inode(sb)) return NULL; inode = new_inode(sb); if (inode) { - inode->i_ino = get_next_ino(); inode_init_owner(inode, dir, mode); inode->i_blocks = 0; inode->i_mapping->backing_dev_info = &shmem_backing_dev_info; @@ -1362,6 +1372,18 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode mpol_shared_policy_init(&info->policy, NULL); break; } + + /* inum 0 and 1 are unused */ + mutex_lock(&sbinfo->idr_lock); + ino = idr_alloc(&sbinfo->idr, inode, 2, INT_MAX, GFP_NOFS); + if (ino > 0) { + inode->i_ino = ino; + mutex_unlock(&sbinfo->idr_lock); + } else { + mutex_unlock(&sbinfo->idr_lock); + iput(inode); /* shmem_free_inode() will be called */ + inode = NULL; + } } else shmem_free_inode(sb); return inode; @@ -2385,7 +2407,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, goto bad_val; } else if (!strcmp(this_char,"nr_inodes")) { sbinfo->max_inodes = memparse(value, &rest); - if (*rest) + if (*rest || sbinfo->max_inodes < 2) goto bad_val; } else if (!strcmp(this_char,"mode")) { if (remount) @@ -2438,7 +2460,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) { struct shmem_sb_info *sbinfo = SHMEM_SB(sb); struct shmem_sb_info config = *sbinfo; - unsigned long inodes; + int inodes; int error = -EINVAL; config.mpol = NULL; @@ -2486,7 +2508,7 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",size=%luk", sbinfo->max_blocks << (PAGE_CACHE_SHIFT - 10)); if (sbinfo->max_inodes != shmem_default_max_inodes()) - seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes); + seq_printf(seq, ",nr_inodes=%d", sbinfo->max_inodes); if (sbinfo->mode != (S_IRWXUGO | S_ISVTX)) seq_printf(seq, ",mode=%03ho", sbinfo->mode); if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID)) @@ -2504,6 +2526,7 @@ static void shmem_put_super(struct super_block *sb) { struct shmem_sb_info *sbinfo = SHMEM_SB(sb); + idr_destroy(&sbinfo->idr); percpu_counter_destroy(&sbinfo->used_blocks); mpol_put(sbinfo->mpol); kfree(sbinfo); @@ -2522,6 +2545,8 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) if (!sbinfo) return -ENOMEM; + mutex_init(&sbinfo->idr_lock); + idr_init(&sbinfo->idr); sbinfo->mode = S_IRWXUGO | S_ISVTX; sbinfo->uid = current_fsuid(); sbinfo->gid = current_fsgid(); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html