Re: [PATCH] vfs: get_next_ino(), never inum=0

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Christoph Hellwig:
> If you care about really unique inode numbers you shouldn't use get_next_ino
> but something like an idr allocator.  The default i_ino assigned in
> new_inode() from which get_next_ino was factored out was mostly intended
> for small synthetic filesystems with few enough inodes that it wouldn't
> wrap around.

Grep-ping get_next_ino, I got 30 calls in mainline.
How many of them are get_next_ino() inappropriate? I don't know. But at
least for tmpfs, it is better to manage the inums by itself since tmpfs
must be one of the biggest consumer of inums and it is NFS-exportable.

Do you think we need a common function in VFS to manage inums per sb, or
it is totally up to filesystem and the common function is unnecessary?

Instead of idr, I was thinking about a simple bitmap in tmpfs such like
this. It introduces a new mount option "ino" which forces tmpfs to
assign the lowest unused number for a new inode within the mounted
tmpfs. Without "ino" or specifying "noino", the behaviour is unchanged
(use vfs:get_next_ino()).
But it may not scale well due to the single spinlock every time.


J. R. Okajima


commit 214d38e8c34fb341fd0f37cc92614b5e93e0803b
Author: J. R. Okajima <hooanon05@xxxxxxxxxxx>
Date:   Mon Sep 2 10:45:42 2013 +0900

    shmem: management for inum
    
    Signed-off-by: J. R. Okajima <hooanon05g@xxxxxxxxx>

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 4d1771c..39762e1 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -29,6 +29,8 @@ struct shmem_sb_info {
 	unsigned long max_inodes;   /* How many inodes are allowed */
 	unsigned long free_inodes;  /* How many are left for allocation */
 	spinlock_t stat_lock;	    /* Serialize shmem_sb_info changes */
+	spinlock_t ino_lock;
+	unsigned long *ino_bitmap;
 	kuid_t uid;		    /* Mount uid for root directory */
 	kgid_t gid;		    /* Mount gid for root directory */
 	umode_t mode;		    /* Mount mode for root directory */
diff --git a/mm/shmem.c b/mm/shmem.c
index 9f70e02..bc2c5e4 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -197,14 +197,61 @@ static int shmem_reserve_inode(struct super_block *sb)
 	return 0;
 }
 
-static void shmem_free_inode(struct super_block *sb)
+static void shmem_free_inode(struct inode *inode)
 {
+	struct super_block *sb = inode->i_sb;
 	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
+
 	if (sbinfo->max_inodes) {
 		spin_lock(&sbinfo->stat_lock);
 		sbinfo->free_inodes++;
 		spin_unlock(&sbinfo->stat_lock);
 	}
+
+	if (!inode->i_nlink) {
+		spin_lock(&sbinfo->ino_lock);
+		if (sbinfo->ino_bitmap)
+			clear_bit(inode->i_ino - 2, sbinfo->ino_bitmap);
+		spin_unlock(&sbinfo->ino_lock);
+	}
+}
+
+/*
+ * This is unsigned int instead of unsigned long.
+ * For details, see fs/inode.c:get_next_ino().
+ */
+unsigned int shmem_next_ino(struct super_block *sb)
+{
+	unsigned long ino;
+	struct shmem_sb_info *sbinfo;
+
+	ino = 0;
+	sbinfo = SHMEM_SB(sb);
+	if (sbinfo->ino_bitmap) {
+		spin_lock(&sbinfo->ino_lock);
+		/*
+		 * someone else may remount,
+		 * and ino_bitmap might be reset.
+		 */
+		if (sbinfo->ino_bitmap
+		    && !bitmap_full(sbinfo->ino_bitmap, sbinfo->max_inodes)) {
+			ino = find_first_zero_bit(sbinfo->ino_bitmap,
+						  sbinfo->max_inodes);
+			set_bit(ino, sbinfo->ino_bitmap);
+			ino += 2; /* ino 0 and 1 are reserved */
+		}
+		spin_unlock(&sbinfo->ino_lock);
+	}
+
+	/*
+	 * someone else did remount,
+	 * or ino_bitmap is unused originally,
+	 * or ino_bimapt is full.
+	 */
+	if (!ino)
+		ino = get_next_ino();
+
+	return ino;
 }
 
 /**
@@ -578,7 +625,7 @@ static void shmem_evict_inode(struct inode *inode)
 
 	simple_xattrs_free(&info->xattrs);
 	WARN_ON(inode->i_blocks);
-	shmem_free_inode(inode->i_sb);
+	shmem_free_inode(inode);
 	clear_inode(inode);
 }
 
@@ -1306,7 +1353,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
 
 	inode = new_inode(sb);
 	if (inode) {
-		inode->i_ino = get_next_ino();
+		inode->i_ino = shmem_next_ino(sb);
 		inode_init_owner(inode, dir, mode);
 		inode->i_blocks = 0;
 		inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
@@ -1348,7 +1395,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
 			break;
 		}
 	} else
-		shmem_free_inode(sb);
+		shmem_free_inode(inode);
 	return inode;
 }
 
@@ -1945,7 +1992,7 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
 	struct inode *inode = dentry->d_inode;
 
 	if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
-		shmem_free_inode(inode->i_sb);
+		shmem_free_inode(inode);
 
 	dir->i_size -= BOGO_DIRENT_SIZE;
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
@@ -2315,6 +2362,54 @@ static const struct export_operations shmem_export_ops = {
 	.fh_to_dentry	= shmem_fh_to_dentry,
 };
 
+static void shmem_ino_bitmap(struct shmem_sb_info *sbinfo,
+			     unsigned long prev_max)
+{
+	unsigned long *p;
+	unsigned long n, d;
+	int do_msg;
+
+	n = sbinfo->max_inodes / BITS_PER_BYTE;
+	if (sbinfo->max_inodes % BITS_PER_BYTE)
+		n++;
+
+	do_msg = 0;
+	if (sbinfo->ino_bitmap) {
+		/*
+		 * by shrinking the bitmap, the large inode number in use
+		 * may be left. but it is harmless.
+		 */
+		d = 0;
+		if (sbinfo->max_inodes > prev_max) {
+			d = sbinfo->max_inodes - prev_max;
+			d /= BITS_PER_BYTE;
+		}
+		spin_lock(&sbinfo->ino_lock);
+		p = krealloc(sbinfo->ino_bitmap, n, GFP_NOWAIT);
+		if (p) {
+			memset(p + n - d, 0, d);
+			sbinfo->ino_bitmap = p;
+			spin_unlock(&sbinfo->ino_lock);
+		} else {
+			p = sbinfo->ino_bitmap;
+			sbinfo->ino_bitmap = NULL;
+			spin_unlock(&sbinfo->ino_lock);
+			kfree(p);
+			do_msg = 1;
+		}
+	} else {
+		p = kzalloc(n, GFP_NOFS);
+		spin_lock(&sbinfo->ino_lock);
+		sbinfo->ino_bitmap = p;
+		spin_unlock(&sbinfo->ino_lock);
+		do_msg = !p;
+	}
+
+	if (unlikely(do_msg))
+		pr_err("%s: ino failed (%lu bytes). Ignored.\n",
+		       __func__, n);
+}
+
 static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
 			       bool remount)
 {
@@ -2322,7 +2417,10 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
 	struct mempolicy *mpol = NULL;
 	uid_t uid;
 	gid_t gid;
+	bool do_ino;
+	unsigned long old_val = sbinfo->max_inodes;
 
+	do_ino = 0;
 	while (options != NULL) {
 		this_char = options;
 		for (;;) {
@@ -2342,6 +2440,14 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
 		}
 		if (!*this_char)
 			continue;
+		if (!strcmp(this_char, "ino")) {
+			do_ino = 1;
+			continue;
+		} else if (!strcmp(this_char, "noino")) {
+			do_ino = 0;
+			continue;
+		}
+
 		if ((value = strchr(this_char,'=')) != NULL) {
 			*value++ = 0;
 		} else {
@@ -2370,7 +2476,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
 				goto bad_val;
 		} else if (!strcmp(this_char,"nr_inodes")) {
 			sbinfo->max_inodes = memparse(value, &rest);
-			if (*rest)
+			if (*rest || !sbinfo->max_inodes)
 				goto bad_val;
 		} else if (!strcmp(this_char,"mode")) {
 			if (remount)
@@ -2408,6 +2514,16 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
 		}
 	}
 	sbinfo->mpol = mpol;
+
+	if (do_ino)
+		shmem_ino_bitmap(sbinfo, old_val);
+	else if (sbinfo->ino_bitmap) {
+		void *p = sbinfo->ino_bitmap;
+		spin_lock(&sbinfo->ino_lock);
+		sbinfo->ino_bitmap = NULL;
+		spin_unlock(&sbinfo->ino_lock);
+		kfree(p);
+	}
 	return 0;
 
 bad_val:
@@ -2472,6 +2588,8 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
 			sbinfo->max_blocks << (PAGE_CACHE_SHIFT - 10));
 	if (sbinfo->max_inodes != shmem_default_max_inodes())
 		seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
+	if (sbinfo->ino_bitmap)
+		seq_printf(seq, ",ino");
 	if (sbinfo->mode != (S_IRWXUGO | S_ISVTX))
 		seq_printf(seq, ",mode=%03ho", sbinfo->mode);
 	if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID))
@@ -2491,6 +2609,7 @@ static void shmem_put_super(struct super_block *sb)
 
 	percpu_counter_destroy(&sbinfo->used_blocks);
 	mpol_put(sbinfo->mpol);
+	kfree(sbinfo->ino_bitmap);
 	kfree(sbinfo);
 	sb->s_fs_info = NULL;
 }
@@ -2510,6 +2629,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
 	sbinfo->mode = S_IRWXUGO | S_ISVTX;
 	sbinfo->uid = current_fsuid();
 	sbinfo->gid = current_fsgid();
+	spin_lock_init(&sbinfo->ino_lock);
 	sb->s_fs_info = sbinfo;
 
 #ifdef CONFIG_TMPFS
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux