On Sun, 10 Jan 2010 18:34:02 +0200 Alexander Shishkin <virtuoso@xxxxxxxxx> wrote: > When a file descriptor limit is hit, display the top consumers of > descriptors so that it is possible to identify and fix those which > leak them. > > Two new sysctl tunables are introduced: > * file-max-consumers -- number of processes to display (defaults > to 10); > * file-max-rate-limit -- time interval between subsequent dumps > (defaults to 10 seconds). > > Signed-off-by: Alexander Shishkin <virtuoso@xxxxxxxxx> > CC: viro@xxxxxxxxxxxxxxxxxx > CC: linux-fsdevel@xxxxxxxxxxxxxxx > --- > fs/file_table.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++- > include/linux/fs.h | 5 +++ > kernel/sysctl.c | 14 ++++++++ > 3 files changed, 108 insertions(+), 1 deletions(-) > Seems to be a lot of code for a fairly obscure requirement. Can we justify adding it? Can't the operator use lsof(8)? > +struct fd_consumer > +{ Please use scripts/checkpatch.pl. > + struct task_struct *task; > + int fd_count; > +}; > + > +static int cmp_fd_consumers(const void *a, const void *b) > +{ > + const struct fd_consumer *x = a, *y = b; > + > + return y->fd_count - x->fd_count; > +} > + > +static void dump_fd_consumers(void) > +{ > + struct task_struct *p; > + struct files_struct *files; > + struct fdtable *fdt; > + int proc_limit = files_stat.max_consumers; > + int i, nproc; > + struct fd_consumer *procs, *tmp; Please avoid variables called `tmp'. Think up a meaningful name. > + if (!files_stat.max_consumers) > + return; > + > + read_lock(&tasklist_lock); > + > + /* build an array of per-task file descriptor usage */ > + nproc = nr_processes(); > + procs = kzalloc(nproc * sizeof(struct fd_consumer), GFP_KERNEL); GFP_KERNEL allocation under tasklist_lock is a bug. A bug which would have been revealed during runtime testing had you empoyed the wise advice in Documentation/SubmitChecklist! > + if (!procs) > + goto out; > + > + tmp = procs; > + > + for_each_process(p) { > + tmp->task = p; > + > + files = get_files_struct(p); This does task_lock() under tasklist_lock. Is that correct? > + if (!files) > + continue; > + > + spin_lock(&files->file_lock); Similarly, is this the correct ranking? Also, is it a _new_ ranking? Is there any place where we simultaneously take tasklist_lock and file_lock? If so, that's a bit undesirable. > + fdt = files_fdtable(files); > + > + /* we have to actually *count* the fds */ > + for (tmp->fd_count = i = 0; i < fdt->max_fds; i++) > + tmp->fd_count += !!fcheck_files(files, i); > + > + spin_unlock(&files->file_lock); > + put_files_struct(files); > + > + tmp++; > + } > + > + /* sort by number of used descriptor in descending order */ > + sort(procs, nproc, sizeof(struct fd_consumer), cmp_fd_consumers, NULL); This code can hold tasklist lock for quite a long time. > + if (proc_limit > nproc) > + proc_limit = nproc; > + > + /* output the 'proc_limit' first entries */ > + for (i = 0, tmp = procs; i < proc_limit; i++, tmp++) > + printk(KERN_INFO "=> %s [%d]: open=%d\n", tmp->task->comm, > + tmp->task->pid, tmp->fd_count); > + > + kfree(procs); > + > +out: > + read_unlock(&tasklist_lock); > +} > + > /* Find an unused file structure and return a pointer to it. > * Returns NULL, if there are no more free file structures or > * we run out of memory. > @@ -105,6 +182,7 @@ struct file *get_empty_filp(void) > const struct cred *cred = current_cred(); > static int old_max; > struct file * f; > + static unsigned long next_dump = 0; > > /* > * Privileged users can go above max_files > @@ -140,6 +218,14 @@ over: > if (get_nr_files() > old_max) { > printk(KERN_INFO "VFS: file-max limit %d reached\n", > get_max_files()); > + > + /* dump the biggest file descriptor users */ > + if (!next_dump || time_after(jiffies, next_dump)) { > + next_dump = jiffies + files_stat.rate_limit; > + > + dump_fd_consumers(); > + } Use of __ratelimit() would be appropriate here. > old_max = get_nr_files(); > } > goto fail; > @@ -425,6 +511,8 @@ void __init files_init(unsigned long mempages) > files_stat.max_files = n; > if (files_stat.max_files < NR_FILE) > files_stat.max_files = NR_FILE; > + > + files_stat.rate_limit = DUMP_RATE_LIMIT; > files_defer_init(); > percpu_counter_init(&nr_files, 0); > } > diff --git a/include/linux/fs.h b/include/linux/fs.h > index 9147ca8..291beb3 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -36,6 +36,8 @@ struct files_stat_struct { > int nr_files; /* read only */ > int nr_free_files; /* read only */ > int max_files; /* tunable */ > + int max_consumers; /* tunable */ > + unsigned long rate_limit; /* tunable */ > }; > > struct inodes_stat_t { > @@ -46,6 +48,9 @@ struct inodes_stat_t { > > > #define NR_FILE 8192 /* this can well be larger on a larger system */ > +#define NR_CONSUMERS 10 /* dump this many tasks when file-max is hit */ > +#define DUMP_RATE_LIMIT msecs_to_jiffies(10000) /* wait this long between > + dumps */ > > #define MAY_EXEC 1 > #define MAY_WRITE 2 > diff --git a/kernel/sysctl.c b/kernel/sysctl.c > index 8a68b24..dfb08fc 100644 > --- a/kernel/sysctl.c > +++ b/kernel/sysctl.c > @@ -1325,6 +1325,20 @@ static struct ctl_table fs_table[] = { > .proc_handler = proc_dointvec, > }, > { > + .procname = "file-max-consumers", > + .data = &files_stat.max_consumers, > + .maxlen = sizeof(int), > + .mode = 0644, > + .proc_handler = proc_dointvec, > + }, > + { > + .procname = "file-max-rate-limit", > + .data = &files_stat.rate_limit, > + .maxlen = sizeof(unsigned long), > + .mode = 0644, > + .proc_handler = proc_doulongvec_ms_jiffies_minmax, > + }, > + { > .procname = "nr_open", > .data = &sysctl_nr_open, > .maxlen = sizeof(int), -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html