On 2010-01-11, at 05:38, Alexander Shishkin wrote:
When a file descriptor limit is hit, display the top consumers of
descriptors so that it is possible to identify and fix those which
leak them.
Two new sysctl tunables are introduced:
* file-max-consumers -- number of processes to display (defaults
to 10);
* file-max-rate-limit -- time interval between subsequent dumps
(defaults to 10 seconds).
This should default to max_consumers=0 to avoid spamming the logs, IMHO.
Signed-off-by: Alexander Shishkin <virtuoso@xxxxxxxxx>
CC: viro@xxxxxxxxxxxxxxxxxx
CC: linux-fsdevel@xxxxxxxxxxxxxxx
---
Changes:
v3 -- fix a couple of silly checkpatch errors
v2 -- add rate-limiting and reduce number of processes to be output
v1 -- initial implementation.
fs/file_table.c | 89 +++++++++++++++++++++++++++++++++++++++++++
++++++++-
include/linux/fs.h | 5 +++
kernel/sysctl.c | 14 ++++++++
3 files changed, 107 insertions(+), 1 deletions(-)
diff --git a/fs/file_table.c b/fs/file_table.c
index 69652c5..26666fd 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -9,6 +9,7 @@
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/fdtable.h>
+#include <linux/sort.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
@@ -29,7 +30,8 @@
/* sysctl tunables... */
struct files_stat_struct files_stat = {
- .max_files = NR_FILE
+ .max_files = NR_FILE,
+ .max_consumers = NR_CONSUMERS,
};
/* public. Not pretty! */
@@ -90,6 +92,80 @@ int proc_nr_files(ctl_table *table, int write,
}
#endif
+/*
+ * Number of open file descriptors per task_struct
+ */
+struct fd_consumer {
+ struct task_struct *task;
+ int fd_count;
+};
+
+static int cmp_fd_consumers(const void *a, const void *b)
+{
+ const struct fd_consumer *x = a, *y = b;
+
+ return y->fd_count - x->fd_count;
+}
+
+static void dump_fd_consumers(void)
+{
+ struct task_struct *p;
+ struct files_struct *files;
+ struct fdtable *fdt;
+ int proc_limit = files_stat.max_consumers;
+ int i, nproc;
+ struct fd_consumer *procs, *tmp;
+
+ if (!files_stat.max_consumers)
+ return;
+
+ read_lock(&tasklist_lock);
+
+ /* build an array of per-task file descriptor usage */
+ nproc = nr_processes();
+ procs = kzalloc(nproc * sizeof(struct fd_consumer), GFP_KERNEL);
+ if (!procs)
+ goto out;
+
+ tmp = procs;
+
+ for_each_process(p) {
+ tmp->task = p;
+
+ files = get_files_struct(p);
+ if (!files)
+ continue;
+
+ spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
+
+ /* we have to actually *count* the fds */
+ for (tmp->fd_count = i = 0; i < fdt->max_fds; i++)
+ tmp->fd_count += !!fcheck_files(files, i);
+
+ spin_unlock(&files->file_lock);
+ put_files_struct(files);
+
+ tmp++;
+ }
+
+ /* sort by number of used descriptor in descending order */
+ sort(procs, nproc, sizeof(struct fd_consumer), cmp_fd_consumers,
NULL);
+
+ if (proc_limit > nproc)
+ proc_limit = nproc;
+
+ /* output the 'proc_limit' first entries */
+ for (i = 0, tmp = procs; i < proc_limit; i++, tmp++)
+ printk(KERN_INFO "=> %s [%d]: open=%d\n", tmp->task->comm,
+ tmp->task->pid, tmp->fd_count);
+
+ kfree(procs);
+
+out:
+ read_unlock(&tasklist_lock);
+}
+
/* Find an unused file structure and return a pointer to it.
* Returns NULL, if there are no more free file structures or
* we run out of memory.
@@ -105,6 +181,7 @@ struct file *get_empty_filp(void)
const struct cred *cred = current_cred();
static int old_max;
struct file * f;
+ static unsigned long next_dump;
/*
* Privileged users can go above max_files
@@ -140,6 +217,14 @@ over:
if (get_nr_files() > old_max) {
printk(KERN_INFO "VFS: file-max limit %d reached\n",
get_max_files());
+
+ /* dump the biggest file descriptor users */
+ if (!next_dump || time_after(jiffies, next_dump)) {
+ next_dump = jiffies + files_stat.rate_limit;
+
+ dump_fd_consumers();
+ }
+
old_max = get_nr_files();
}
goto fail;
@@ -425,6 +510,8 @@ void __init files_init(unsigned long mempages)
files_stat.max_files = n;
if (files_stat.max_files < NR_FILE)
files_stat.max_files = NR_FILE;
+
+ files_stat.rate_limit = DUMP_RATE_LIMIT;
files_defer_init();
percpu_counter_init(&nr_files, 0);
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9147ca8..291beb3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -36,6 +36,8 @@ struct files_stat_struct {
int nr_files; /* read only */
int nr_free_files; /* read only */
int max_files; /* tunable */
+ int max_consumers; /* tunable */
+ unsigned long rate_limit; /* tunable */
};
struct inodes_stat_t {
@@ -46,6 +48,9 @@ struct inodes_stat_t {
#define NR_FILE 8192 /* this can well be larger on a larger system */
+#define NR_CONSUMERS 10 /* dump this many tasks when file-max is
hit */
+#define DUMP_RATE_LIMIT msecs_to_jiffies(10000) /* wait this long
between
+ dumps */
#define MAY_EXEC 1
#define MAY_WRITE 2
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8a68b24..dfb08fc 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1325,6 +1325,20 @@ static struct ctl_table fs_table[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "file-max-consumers",
+ .data = &files_stat.max_consumers,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "file-max-rate-limit",
+ .data = &files_stat.rate_limit,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_ms_jiffies_minmax,
+ },
+ {
.procname = "nr_open",
.data = &sysctl_nr_open,
.maxlen = sizeof(int),
--
1.6.5
--
To unsubscribe from this list: send the line "unsubscribe linux-
fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html