From: Ira Weiny <ira.weiny@xxxxxxxxx> Now that we have the file pins information stored add a new procfs entry to display them to the user. NOTE output will be dependant on where the file pin is tied to. Some processes may have the pin associated with a file descriptor in which case that file is reported as well. Others are associated directly with the process mm and are reported as such. For example of a file pinned to an RDMA open context (fd 4) and a file pinned to the mm of that process: 4: /dev/infiniband/uverbs0 /mnt/pmem/foo /mnt/pmem/bar Signed-off-by: Ira Weiny <ira.weiny@xxxxxxxxx> --- fs/proc/base.c | 214 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 214 insertions(+) diff --git a/fs/proc/base.c b/fs/proc/base.c index ebea9501afb8..f4d219172235 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2995,6 +2995,7 @@ static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns, */ static const struct file_operations proc_task_operations; static const struct inode_operations proc_task_inode_operations; +static const struct file_operations proc_pid_file_pins_operations; static const struct pid_entry tgid_base_stuff[] = { DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), @@ -3024,6 +3025,7 @@ static const struct pid_entry tgid_base_stuff[] = { ONE("stat", S_IRUGO, proc_tgid_stat), ONE("statm", S_IRUGO, proc_pid_statm), REG("maps", S_IRUGO, proc_pid_maps_operations), + REG("file_pins", S_IRUGO, proc_pid_file_pins_operations), #ifdef CONFIG_NUMA REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations), #endif @@ -3422,6 +3424,7 @@ static const struct pid_entry tid_base_stuff[] = { ONE("stat", S_IRUGO, proc_tid_stat), ONE("statm", S_IRUGO, proc_pid_statm), REG("maps", S_IRUGO, proc_pid_maps_operations), + REG("file_pins", S_IRUGO, proc_pid_file_pins_operations), #ifdef CONFIG_PROC_CHILDREN REG("children", S_IRUGO, proc_tid_children_operations), #endif @@ -3718,3 +3721,214 @@ void __init set_proc_pid_nlink(void) nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); } + +/** + * file_pin information below. + */ + +struct proc_file_pins_private { + struct inode *inode; + struct task_struct *task; + struct mm_struct *mm; + struct files_struct *files; + unsigned int nr_pins; + struct xarray fps; +} __randomize_layout; + +static void release_fp(struct proc_file_pins_private *priv) +{ + up_read(&priv->mm->mmap_sem); + mmput(priv->mm); +} + +static void print_fd_file_pin(struct seq_file *m, struct file *file, + unsigned long i) +{ + struct file_file_pin *fp; + struct file_file_pin *tmp; + + if (list_empty_careful(&file->file_pins)) + return; + + seq_printf(m, "%lu: ", i); + seq_file_path(m, file, "\n"); + seq_putc(m, '\n'); + + list_for_each_entry_safe(fp, tmp, &file->file_pins, list) { + seq_puts(m, " "); + seq_file_path(m, fp->file, "\n"); + seq_putc(m, '\n'); + } +} + +/* We are storing the index's within the FD table for later retrieval */ +static int store_fd(const void *priv , struct file *file, unsigned i) +{ + struct proc_file_pins_private *fp_priv; + + /* cast away const... */ + fp_priv = (struct proc_file_pins_private *)priv; + + if (list_empty_careful(&file->file_pins)) + return 0; + + /* can't sleep in the iterate of the fd table */ + xa_store(&fp_priv->fps, fp_priv->nr_pins, xa_mk_value(i), GFP_ATOMIC); + fp_priv->nr_pins++; + + return 0; +} + +static void store_mm_pins(struct proc_file_pins_private *priv) +{ + struct mm_file_pin *fp; + struct mm_file_pin *tmp; + + list_for_each_entry_safe(fp, tmp, &priv->mm->file_pins, list) { + xa_store(&priv->fps, priv->nr_pins, fp, GFP_KERNEL); + priv->nr_pins++; + } +} + + +static void *fp_start(struct seq_file *m, loff_t *ppos) +{ + struct proc_file_pins_private *priv = m->private; + unsigned int pos = *ppos; + + priv->task = get_proc_task(priv->inode); + if (!priv->task) + return ERR_PTR(-ESRCH); + + if (!priv->mm || !mmget_not_zero(priv->mm)) + return NULL; + + priv->files = get_files_struct(priv->task); + down_read(&priv->mm->mmap_sem); + + xa_destroy(&priv->fps); + priv->nr_pins = 0; + + /* grab fds of "files" which have pins and store as xa values */ + if (priv->files) + iterate_fd(priv->files, 0, store_fd, priv); + + /* store mm_file_pins as xa entries */ + store_mm_pins(priv); + + if (pos >= priv->nr_pins) { + release_fp(priv); + return NULL; + } + + return xa_load(&priv->fps, pos); +} + +static void *fp_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct proc_file_pins_private *priv = m->private; + + (*pos)++; + if ((*pos) >= priv->nr_pins) { + release_fp(priv); + return NULL; + } + + return xa_load(&priv->fps, *pos); +} + +static void fp_stop(struct seq_file *m, void *v) +{ + struct proc_file_pins_private *priv = m->private; + + if (v) + release_fp(priv); + + if (priv->task) { + put_task_struct(priv->task); + priv->task = NULL; + } + + if (priv->files) { + put_files_struct(priv->files); + priv->files = NULL; + } +} + +static int show_fp(struct seq_file *m, void *v) +{ + struct proc_file_pins_private *priv = m->private; + + if (xa_is_value(v)) { + struct file *file; + unsigned long fd = xa_to_value(v); + + rcu_read_lock(); + file = fcheck_files(priv->files, fd); + if (file) + print_fd_file_pin(m, file, fd); + rcu_read_unlock(); + } else { + struct mm_file_pin *fp = v; + + seq_puts(m, "mm: "); + seq_file_path(m, fp->file, "\n"); + } + + return 0; +} + +static const struct seq_operations proc_pid_file_pins_op = { + .start = fp_start, + .next = fp_next, + .stop = fp_stop, + .show = show_fp +}; + +static int proc_file_pins_open(struct inode *inode, struct file *file) +{ + struct proc_file_pins_private *priv = __seq_open_private(file, + &proc_pid_file_pins_op, + sizeof(*priv)); + + if (!priv) + return -ENOMEM; + + xa_init(&priv->fps); + priv->inode = inode; + priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); + priv->task = NULL; + if (IS_ERR(priv->mm)) { + int err = PTR_ERR(priv->mm); + + seq_release_private(inode, file); + return err; + } + + return 0; +} + +static int proc_file_pins_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct proc_file_pins_private *priv = seq->private; + + /* This is for "protection" not sure when these may end up not being + * NULL here... */ + WARN_ON(priv->files); + WARN_ON(priv->task); + + if (priv->mm) + mmdrop(priv->mm); + + xa_destroy(&priv->fps); + + return seq_release_private(inode, file); +} + +static const struct file_operations proc_pid_file_pins_operations = { + .open = proc_file_pins_open, + .read = seq_read, + .llseek = seq_lseek, + .release = proc_file_pins_release, +}; -- 2.20.1