[RFC PATCH v4 1/2] tmpfs: manage the inode-number by IDR, signed int inum

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



To ensure the uniquness of the inode-number, manage it by IDR.
Also it tries using the lowest unused inode-number, so the value will
usually be smaller.
Another side effect is the type of the inode-number in tmpfs. By using
IDR, it is limited to signed int. But I don't think it a big
problem. INT_MAX is big enough for the number of inodes in a single tmpfs.

Comparision on performance:
- test program: see below
- version: 3.15.0-rc7
- before this commit
  1 procs, 1048575/1048575 file, do unlink, 43.023 secs (usr 1.029 + sys 40.981)
  2 procs, 1048574/1048574 file, do unlink, 24.047 secs (usr 1.048 + sys 45.886)
  1 procs, 524286/524286 file, do unlink, 21.476 secs (usr 0.529 + sys 20.441)
  2 procs, 524286/524286 file, do unlink, 12.029 secs (usr 0.554 + sys 22.880)
  1 procs, 32766/32766 file, do unlink, 1.345 secs (usr 0.035 + sys 1.279)
  2 procs, 32766/32766 file, do unlink, 0.753 secs (usr 0.030 + sys 1.439)
- after this commit
  1 procs, 1048575/1048575 file, do unlink, 45.178 secs (usr 1.183 + sys 43.005)
  2 procs, 1048574/1048574 file, do unlink, 25.328 secs (usr 1.126 + sys 48.481)
  1 procs, 524286/524286 file, do unlink, 22.668 secs (usr 0.367 + sys 21.806)
  2 procs, 524286/524286 file, do unlink, 12.639 secs (usr 0.591 + sys 24.137)
  1 procs, 32766/32766 file, do unlink, 1.414 secs (usr 0.028 + sys 1.356)
  2 procs, 32766/32766 file, do unlink, 0.787 secs (usr 0.036 + sys 1.500)

The overhead surely exists, but looks around 5% or less.

Test prorams.
------- tmpfs-idr.sh -------
#!/bin/sh

set -eu

f() # dir [opts]
{
	local dir=$1
	shift
	seq $(getconf _NPROCESSORS_ONLN) |
	while read ncpu
	do
		seq 1 |
		while read do_unlink
		do
			sudo mount -v -t tmpfs $@ tmpfs $dir
			#stat -f $dir
			free_inodes=$(stat -f -c %d $dir)
			/tmp/tmpfs-idr $dir $ncpu $free_inodes $do_unlink
			sudo umount $dir
		done
	done
}

dir=/tmp/tmpfs-$$
mkdir $dir
uname -a
free -m
#f $dir -o size=50%,nr_inodes=$((0x7fffffff))
#f $dir -o size=50%,nr_inodes=$((0x07ffffff))
#f $dir -o size=50%,nr_inodes=$((0x007fffff))
f $dir -o size=50%,nr_inodes=$((0x00100000))
f $dir -o size=50%,nr_inodes=$((0x0007ffff))
f $dir -o size=50%,nr_inodes=$((0x00007fff))

rm -fr $dir
------- tmpfs-idr.c -------
#define _GNU_SOURCE
#include <pthread.h>

#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>

#ifndef O_PATH
#define O_PATH		010000000
#endif

pthread_barrier_t barrier;
int rootfd, nproc, nfile, do_unlink;

static int argton(char *s)
{
	long l;

	errno = 0;
	l = strtol(s, NULL, 0);
	assert(!((l == LONG_MIN || l == LONG_MAX)
		 && errno));
	assert(l >= 0);

	return l;
}

void *f(void *arg)
{
	int err, dirfd, fd, i;
	char a[16];
	int id = (long)arg;

	snprintf(a, sizeof(a), "%d", id);
	err = mkdirat(rootfd, a, 0755);
	assert(!err);
	dirfd = openat(rootfd, a, O_RDONLY | O_PATH);
	assert(dirfd >= 0);

	err = pthread_barrier_wait(&barrier);
	assert(!err || err == PTHREAD_BARRIER_SERIAL_THREAD);

	for (i = 0; i < nfile; i++) {
		snprintf(a, sizeof(a), "%d", i);
		fd = openat(dirfd, a, O_CREAT | O_WRONLY);
		if (fd >= 0) {
			if (do_unlink)
				unlinkat(dirfd, a, /*flags*/ 0);
			close(fd);
		} else
			break;
	}

	return (void *)(long)i;
}

struct perf {
	struct timespec ts;
	struct rusage ru;
};

void perf(struct perf *perf)
{
	clock_gettime(CLOCK_MONOTONIC, &perf->ts);
	getrusage(RUSAGE_SELF, &perf->ru);
}

void ts_subtract(struct timespec *ans, struct timespec *a, struct timespec *b)
{
	ans->tv_sec = a->tv_sec - b->tv_sec;
	ans->tv_nsec = a->tv_nsec - b->tv_nsec;
	if (ans->tv_nsec < 0) {
		ans->tv_sec--;
		ans->tv_nsec += 1000000000;
	}
}

void tv_subtract(struct timeval *ans, struct timeval *a, struct timeval *b)
{
	ans->tv_sec = a->tv_sec - b->tv_sec;
	ans->tv_usec = a->tv_usec - b->tv_usec;
	if (ans->tv_usec < 0) {
		ans->tv_sec--;
		ans->tv_usec += 1000000;
	}
}

#define MAX_NPROC 16
void run(void)
{
	int err, i, n;
	struct {
		pthread_t th;
		void *p;
	} b[MAX_NPROC];
	struct perf s[3];

	err = pthread_barrier_init(&barrier, NULL, nproc + 1);
	assert(!err);
	for (i = 0; i < nproc; i++) {
		err = pthread_create(&b[i].th, NULL, f, (void *)(long)i);
		assert(!err);
	}

	perf(s + 0);
	err = pthread_barrier_wait(&barrier);
	assert(!err || err == PTHREAD_BARRIER_SERIAL_THREAD);

	for (i = 0; i < nproc; i++)
		pthread_join(b[i].th, &b[i].p);
	perf(s + 1);

	n = 0;
	for (i = 0; i < nproc; i++)
		n += (long)b[i].p;

	ts_subtract(&s[2].ts, &s[1].ts, &s[0].ts);
	tv_subtract(&s[2].ru.ru_utime, &s[1].ru.ru_utime, &s[0].ru.ru_utime);
	tv_subtract(&s[2].ru.ru_stime, &s[1].ru.ru_stime, &s[0].ru.ru_stime);

	printf("%d procs, %d/%d file, %s unlink, %lu.%03ld secs"
	       " (usr %lu.%03ld + sys %lu.%03ld)\n",
	       nproc, n, nfile * nproc, do_unlink ? "do" : "no",
	       s[2].ts.tv_sec, s[2].ts.tv_nsec / 1000000,
	       s[2].ru.ru_utime.tv_sec, s[2].ru.ru_utime.tv_usec / 1000,
	       s[2].ru.ru_stime.tv_sec, s[2].ru.ru_stime.tv_usec / 1000);
}

int main(int argc, char *argv[])
{
	rootfd = open(argv[1], O_RDONLY | O_PATH);
	assert(rootfd >= 0);
	nproc = argton(argv[2]);
	assert(nproc < MAX_NPROC);
	nfile = argton(argv[3]);
	nfile /= nproc;
	do_unlink = argton(argv[4]);
	run();

	return 0;
}

/*
 * Local variables: ;
 * compile-command: "gcc -g -Wall -UNDEBUG -pthread -o /tmp/tmpfs-idr tmpfs-idr.c -lrt";
 * End: ;
 */
----------------------------------------

Cc: Eric Dumazet <edumazet@xxxxxxxxxx>
Cc: Hugh Dickins <hughd@xxxxxxxxxx>
Cc: Christoph Hellwig <hch@xxxxxx>
Cc: Andreas Dilger <adilger@xxxxxxxxx>
Cc: Jan Kara <jack@xxxxxxx>
Signed-off-by: J. R. Okajima <hooanon05g@xxxxxxxxx>
---
 include/linux/shmem_fs.h |    6 ++++--
 mm/shmem.c               |   37 +++++++++++++++++++++++++++++++------
 2 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 4d1771c..4ba8b43 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -24,10 +24,12 @@ struct shmem_inode_info {
 };
 
 struct shmem_sb_info {
+	struct mutex idr_lock;
+	struct idr idr;		    /* manages inode-number */
 	unsigned long max_blocks;   /* How many blocks are allowed */
 	struct percpu_counter used_blocks;  /* How many are allocated */
-	unsigned long max_inodes;   /* How many inodes are allowed */
-	unsigned long free_inodes;  /* How many are left for allocation */
+	int max_inodes;		    /* How many inodes are allowed */
+	int free_inodes;	    /* How many are left for allocation */
 	spinlock_t stat_lock;	    /* Serialize shmem_sb_info changes */
 	kuid_t uid;		    /* Mount uid for root directory */
 	kgid_t gid;		    /* Mount gid for root directory */
diff --git a/mm/shmem.c b/mm/shmem.c
index 368f314..3ac613d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -107,9 +107,13 @@ static unsigned long shmem_default_max_blocks(void)
 	return totalram_pages / 2;
 }
 
-static unsigned long shmem_default_max_inodes(void)
+static int shmem_default_max_inodes(void)
 {
-	return min(totalram_pages - totalhigh_pages, totalram_pages / 2);
+	unsigned long ul;
+
+	ul = INT_MAX;
+	ul = min3(ul, totalram_pages - totalhigh_pages, totalram_pages / 2);
+	return ul;
 }
 #endif
 
@@ -569,6 +573,7 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 static void shmem_evict_inode(struct inode *inode)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
+	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 
 	if (inode->i_mapping->a_ops == &shmem_aops) {
 		shmem_unacct_size(info->flags, inode->i_size);
@@ -584,6 +589,11 @@ static void shmem_evict_inode(struct inode *inode)
 
 	simple_xattrs_free(&info->xattrs);
 	WARN_ON(inode->i_blocks);
+	if (inode->i_ino) {
+		mutex_lock(&sbinfo->idr_lock);
+		idr_remove(&sbinfo->idr, inode->i_ino);
+		mutex_unlock(&sbinfo->idr_lock);
+	}
 	shmem_free_inode(inode->i_sb);
 	clear_inode(inode);
 }
@@ -1315,13 +1325,13 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
 	struct inode *inode;
 	struct shmem_inode_info *info;
 	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
+	int ino;
 
 	if (shmem_reserve_inode(sb))
 		return NULL;
 
 	inode = new_inode(sb);
 	if (inode) {
-		inode->i_ino = get_next_ino();
 		inode_init_owner(inode, dir, mode);
 		inode->i_blocks = 0;
 		inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
@@ -1362,6 +1372,18 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
 			mpol_shared_policy_init(&info->policy, NULL);
 			break;
 		}
+
+		/* inum 0 and 1 are unused */
+		mutex_lock(&sbinfo->idr_lock);
+		ino = idr_alloc(&sbinfo->idr, inode, 2, INT_MAX, GFP_NOFS);
+		if (ino > 0) {
+			inode->i_ino = ino;
+			mutex_unlock(&sbinfo->idr_lock);
+		} else {
+			mutex_unlock(&sbinfo->idr_lock);
+			iput(inode);	/* shmem_free_inode() will be called */
+			inode = NULL;
+		}
 	} else
 		shmem_free_inode(sb);
 	return inode;
@@ -2385,7 +2407,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
 				goto bad_val;
 		} else if (!strcmp(this_char,"nr_inodes")) {
 			sbinfo->max_inodes = memparse(value, &rest);
-			if (*rest)
+			if (*rest || sbinfo->max_inodes < 2)
 				goto bad_val;
 		} else if (!strcmp(this_char,"mode")) {
 			if (remount)
@@ -2438,7 +2460,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
 {
 	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
 	struct shmem_sb_info config = *sbinfo;
-	unsigned long inodes;
+	int inodes;
 	int error = -EINVAL;
 
 	config.mpol = NULL;
@@ -2486,7 +2508,7 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
 		seq_printf(seq, ",size=%luk",
 			sbinfo->max_blocks << (PAGE_CACHE_SHIFT - 10));
 	if (sbinfo->max_inodes != shmem_default_max_inodes())
-		seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
+		seq_printf(seq, ",nr_inodes=%d", sbinfo->max_inodes);
 	if (sbinfo->mode != (S_IRWXUGO | S_ISVTX))
 		seq_printf(seq, ",mode=%03ho", sbinfo->mode);
 	if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID))
@@ -2504,6 +2526,7 @@ static void shmem_put_super(struct super_block *sb)
 {
 	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
 
+	idr_destroy(&sbinfo->idr);
 	percpu_counter_destroy(&sbinfo->used_blocks);
 	mpol_put(sbinfo->mpol);
 	kfree(sbinfo);
@@ -2522,6 +2545,8 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
 	if (!sbinfo)
 		return -ENOMEM;
 
+	mutex_init(&sbinfo->idr_lock);
+	idr_init(&sbinfo->idr);
 	sbinfo->mode = S_IRWXUGO | S_ISVTX;
 	sbinfo->uid = current_fsuid();
 	sbinfo->gid = current_fsgid();
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux