+ fs-cache-avoid-enfile-checking-for-kernel-specific-open-files.patch added to -mm tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The patch titled

     FS-Cache: Avoid ENFILE checking for kernel-specific open files

has been added to the -mm tree.  Its filename is

     fs-cache-avoid-enfile-checking-for-kernel-specific-open-files.patch

See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find
out what to do about this


From: David Howells <dhowells@xxxxxxxxxx>

Make it possible to avoid ENFILE checking for kernel specific open files, such
as are used by the CacheFiles module.

After, for example, tarring up a kernel source tree over the network, the
CacheFiles module may easily have 20000+ files open in the backing filesystem,
thus causing all non-root processes to be given error ENFILE when they try to
open a file, socket, pipe, etc..

Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
Cc: Trond Myklebust <trond.myklebust@xxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxx>
---

 Documentation/sysctl/fs.txt |    6 +++-
 fs/file_table.c             |   48 +++++++++++++++++++++++++++-------
 fs/open.c                   |   20 ++++++++++++++
 include/linux/file.h        |    1 
 include/linux/fs.h          |   10 +++++++
 include/linux/sysctl.h      |    1 
 kernel/sysctl.c             |   11 +++++++
 7 files changed, 86 insertions(+), 11 deletions(-)

diff -puN Documentation/sysctl/fs.txt~fs-cache-avoid-enfile-checking-for-kernel-specific-open-files Documentation/sysctl/fs.txt
--- devel/Documentation/sysctl/fs.txt~fs-cache-avoid-enfile-checking-for-kernel-specific-open-files	2006-05-11 09:56:24.000000000 -0700
+++ devel-akpm/Documentation/sysctl/fs.txt	2006-05-11 09:56:24.000000000 -0700
@@ -71,7 +71,7 @@ you might want to raise the limit.
 
 ==============================================================
 
-file-max & file-nr:
+file-max, file-nr & file-kernel:
 
 The kernel allocates file handles dynamically, but as yet it
 doesn't free them again.
@@ -88,6 +88,10 @@ close to the maximum, but the number of 
 significantly greater than 0, you've encountered a peak in your 
 usage of file handles and you don't need to increase the maximum.
 
+The value in file-kernel denotes the number of internal file handles
+that the kernel has open.  These do not contribute to ENFILE
+accounting.
+
 ==============================================================
 
 inode-max, inode-nr & inode-state:
diff -puN fs/file_table.c~fs-cache-avoid-enfile-checking-for-kernel-specific-open-files fs/file_table.c
--- devel/fs/file_table.c~fs-cache-avoid-enfile-checking-for-kernel-specific-open-files	2006-05-11 09:56:24.000000000 -0700
+++ devel-akpm/fs/file_table.c	2006-05-11 09:56:24.000000000 -0700
@@ -30,10 +30,13 @@ struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
 };
 
+struct files_kernel_stat_struct files_kernel_stat;
+
 /* public. Not pretty! */
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
 
 static struct percpu_counter nr_files __cacheline_aligned_in_smp;
+static atomic_t nr_kernel_files;
 
 static inline void file_free_rcu(struct rcu_head *head)
 {
@@ -43,7 +46,10 @@ static inline void file_free_rcu(struct 
 
 static inline void file_free(struct file *f)
 {
-	percpu_counter_dec(&nr_files);
+	if (f->f_kernel_flags & FKFLAGS_NO_ENFILE)
+		atomic_dec(&nr_kernel_files);
+	else
+		percpu_counter_dec(&nr_files);
 	call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
 }
 
@@ -74,45 +80,64 @@ int proc_nr_files(ctl_table *table, int 
 	files_stat.nr_files = get_nr_files();
 	return proc_dointvec(table, write, filp, buffer, lenp, ppos);
 }
+int proc_files_kernel(ctl_table *table, int write, struct file *filp,
+		      void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	files_kernel_stat.nr_kernel_files = atomic_read(&nr_kernel_files);
+	return proc_dointvec(table, write, filp, buffer, lenp, ppos);
+}
 #else
 int proc_nr_files(ctl_table *table, int write, struct file *filp,
                      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
+int proc_files_kernel(ctl_table *table, int write, struct file *filp,
+		      void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
 #endif
 
 /* Find an unused file structure and return a pointer to it.
  * Returns NULL, if there are no more free file structures or
  * we run out of memory.
  */
-struct file *get_empty_filp(void)
+struct file *get_empty_kernel_filp(unsigned short kflags)
 {
 	struct task_struct *tsk;
 	static int old_max;
 	struct file * f;
 
 	/*
-	 * Privileged users can go above max_files
+	 * Privileged users can go above max_files and internal kernel users
+	 * can avoid it completely
 	 */
-	if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) {
+	if (!(kflags & FKFLAGS_NO_ENFILE) &&
+	    get_nr_files() >= files_stat.max_files &&
+	    !capable(CAP_SYS_ADMIN)
+	    ) {
 		/*
-		 * percpu_counters are inaccurate.  Do an expensive check before
-		 * we go and fail.
+		 * percpu_counters are inaccurate.  Do an expensive
+		 * check before we go and fail.
 		 */
 		if (percpu_counter_sum(&nr_files) >= files_stat.max_files)
 			goto over;
 	}
 
-	f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
+	f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
 	if (f == NULL)
 		goto fail;
 
-	percpu_counter_inc(&nr_files);
-	memset(f, 0, sizeof(*f));
+	if (kflags & FKFLAGS_NO_ENFILE)
+		atomic_inc(&nr_kernel_files);
+	else
+		percpu_counter_inc(&nr_files);
+
 	if (security_file_alloc(f))
 		goto fail_sec;
 
+	f->f_kernel_flags = kflags;
 	tsk = current;
 	INIT_LIST_HEAD(&f->f_u.fu_list);
 	atomic_set(&f->f_count, 1);
@@ -138,6 +163,11 @@ fail:
 	return NULL;
 }
 
+struct file *get_empty_filp(void)
+{
+	return get_empty_kernel_filp(0);
+}
+
 EXPORT_SYMBOL(get_empty_filp);
 
 void fastcall fput(struct file *file)
diff -puN fs/open.c~fs-cache-avoid-enfile-checking-for-kernel-specific-open-files fs/open.c
--- devel/fs/open.c~fs-cache-avoid-enfile-checking-for-kernel-specific-open-files	2006-05-11 09:56:24.000000000 -0700
+++ devel-akpm/fs/open.c	2006-05-11 09:56:24.000000000 -0700
@@ -974,6 +974,26 @@ struct file *dentry_open(struct dentry *
 EXPORT_SYMBOL(dentry_open);
 
 /*
+ * open a specifically in-kernel file
+ */
+struct file *dentry_open_kernel(struct dentry *dentry, struct vfsmount *mnt, int flags)
+{
+	int error;
+	struct file *f;
+
+	error = -ENFILE;
+	f = get_empty_kernel_filp(FKFLAGS_NO_ENFILE);
+	if (f == NULL) {
+		dput(dentry);
+		mntput(mnt);
+		return ERR_PTR(error);
+	}
+
+	return __dentry_open(dentry, mnt, flags, f, NULL);
+}
+EXPORT_SYMBOL_GPL(dentry_open_kernel);
+
+/*
  * Find an empty file descriptor entry, and mark it busy.
  */
 int get_unused_fd(void)
diff -puN include/linux/file.h~fs-cache-avoid-enfile-checking-for-kernel-specific-open-files include/linux/file.h
--- devel/include/linux/file.h~fs-cache-avoid-enfile-checking-for-kernel-specific-open-files	2006-05-11 09:56:24.000000000 -0700
+++ devel-akpm/include/linux/file.h	2006-05-11 09:56:24.000000000 -0700
@@ -79,7 +79,6 @@ extern void FASTCALL(set_close_on_exec(u
 extern void put_filp(struct file *);
 extern int get_unused_fd(void);
 extern void FASTCALL(put_unused_fd(unsigned int fd));
-struct kmem_cache;
 
 extern struct file ** alloc_fd_array(int);
 extern void free_fd_array(struct file **, int);
diff -puN include/linux/fs.h~fs-cache-avoid-enfile-checking-for-kernel-specific-open-files include/linux/fs.h
--- devel/include/linux/fs.h~fs-cache-avoid-enfile-checking-for-kernel-specific-open-files	2006-05-11 09:56:24.000000000 -0700
+++ devel-akpm/include/linux/fs.h	2006-05-11 09:56:24.000000000 -0700
@@ -34,7 +34,11 @@ struct files_stat_struct {
 	int nr_free_files;	/* read only */
 	int max_files;		/* tunable */
 };
+struct files_kernel_stat_struct {
+	int nr_kernel_files;	/* read only */
+};
 extern struct files_stat_struct files_stat;
+extern struct files_kernel_stat_struct files_kernel_stat;
 extern int get_max_files(void);
 
 struct inodes_stat_t {
@@ -70,6 +74,8 @@ extern int dir_notify_enable;
    behavior for cross-node execution/opening_for_writing of files */
 #define FMODE_EXEC	16
 
+#define FKFLAGS_NO_ENFILE	1	/* kernel internal file (ignored for ENFILE accounting) */
+
 #define RW_MASK		1
 #define RWA_MASK	2
 #define READ 0
@@ -640,6 +646,7 @@ struct file {
 	atomic_t		f_count;
 	unsigned int 		f_flags;
 	mode_t			f_mode;
+	unsigned short		f_kernel_flags;
 	loff_t			f_pos;
 	struct fown_struct	f_owner;
 	unsigned int		f_uid, f_gid;
@@ -1385,6 +1392,7 @@ extern long do_sys_open(int fdf, const c
 			int mode);
 extern struct file *filp_open(const char *, int, int);
 extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
+extern struct file * dentry_open_kernel(struct dentry *, struct vfsmount *, int);
 extern int filp_close(struct file *, fl_owner_t id);
 extern char * getname(const char __user *);
 
@@ -1586,6 +1594,7 @@ static inline void insert_inode_hash(str
 }
 
 extern struct file * get_empty_filp(void);
+extern struct file * get_empty_kernel_filp(unsigned short fkflags);
 extern void file_move(struct file *f, struct list_head *list);
 extern void file_kill(struct file *f);
 struct bio;
@@ -1611,6 +1620,7 @@ extern ssize_t generic_file_direct_write
 		unsigned long *, loff_t, loff_t *, size_t, size_t);
 extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *,
 		unsigned long, loff_t, loff_t *, size_t, ssize_t);
+extern int generic_file_buffered_write_one_kernel_page(struct file *, pgoff_t, struct page *);
 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
 ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov,
diff -puN include/linux/sysctl.h~fs-cache-avoid-enfile-checking-for-kernel-specific-open-files include/linux/sysctl.h
--- devel/include/linux/sysctl.h~fs-cache-avoid-enfile-checking-for-kernel-specific-open-files	2006-05-11 09:56:24.000000000 -0700
+++ devel-akpm/include/linux/sysctl.h	2006-05-11 09:56:24.000000000 -0700
@@ -787,6 +787,7 @@ enum
 	FS_AIO_NR=18,	/* current system-wide number of aio requests */
 	FS_AIO_MAX_NR=19,	/* system-wide maximum number of aio requests */
 	FS_INOTIFY=20,	/* inotify submenu */
+	FS_FILE_KERNEL=21,	/* int: number of internal kernel files */
 };
 
 /* /proc/sys/fs/quota/ */
diff -puN kernel/sysctl.c~fs-cache-avoid-enfile-checking-for-kernel-specific-open-files kernel/sysctl.c
--- devel/kernel/sysctl.c~fs-cache-avoid-enfile-checking-for-kernel-specific-open-files	2006-05-11 09:56:24.000000000 -0700
+++ devel-akpm/kernel/sysctl.c	2006-05-11 09:56:24.000000000 -0700
@@ -53,6 +53,9 @@
 extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
                      void __user *buffer, size_t *lenp, loff_t *ppos);
 
+extern int proc_files_kernel(ctl_table *table, int write, struct file *filp,
+                     void __user *buffer, size_t *lenp, loff_t *ppos);
+
 #if defined(CONFIG_SYSCTL)
 
 /* External variables not in a header file. */
@@ -956,6 +959,14 @@ static ctl_table fs_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 	{
+		.ctl_name	= FS_FILE_KERNEL,
+		.procname	= "file-kernel",
+		.data		= &files_stat,
+		.maxlen		= 1*sizeof(int),
+		.mode		= 0444,
+		.proc_handler	= &proc_files_kernel,
+	},
+	{
 		.ctl_name	= FS_DENTRY,
 		.procname	= "dentry-state",
 		.data		= &dentry_stat,
_

Patches currently in -mm which might be from dhowells@xxxxxxxxxx are

fix-can_share_swap_page-when-config_swap.patch
nfs-permit-filesystem-to-override-root-dentry-on-mount.patch
nfs-permit-filesystem-to-perform-statfs-with-a-known-root-dentry.patch
nfs-abstract-out-namespace-initialisation.patch
nfs-add-dentry-materialisation-op.patch
nfs-split-fs-nfs-inodec-into-inode-superblock-and-namespace-bits.patch
nfs-share-nfs-superblocks-per-protocol-per-server-per-fsid.patch
fs-cache-provide-a-filesystem-specific-syncable-page-bit.patch
fs-cache-add-notification-of-page-becoming-writable-to-vma-ops.patch
fs-cache-avoid-enfile-checking-for-kernel-specific-open-files.patch
fs-cache-generic-filesystem-caching-facility.patch
fs-cache-make-kafs-use-fs-cache.patch
fs-cache-cachefiles-a-cache-that-backs-onto-a-mounted-filesystem.patch
fs-cache-release-page-private-in-failed-readahead.patch
nfs-use-local-caching.patch
fix-incorrect-sa_onstack-behaviour-for-64-bit-processes.patch
net-rxrpc-use-list_move.patch
fs-use-list_move.patch
mutex-subsystem-synchro-test-module.patch

-
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Kernel Newbies FAQ]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Photo]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux