Sorry for delay, On 08/02, Kirill A. Shutemov wrote: > > +/* > + * proc_pid_personality() and proc_pid_stack() take cred_guard_mutex via > + * lock_trace() And at first glance they lock_trace() can die. But lets temporary ignore, m_start() is trickier. > +static struct lock_class_key pid_maps_seq_file_lock; > + > void task_mem(struct seq_file *m, struct mm_struct *mm) > { > unsigned long data, text, lib, swap; > @@ -242,6 +254,7 @@ static int do_maps_open(struct inode *inode, struct file *file, > ret = seq_open(file, ops); > if (!ret) { > struct seq_file *m = file->private_data; > + lockdep_set_class(&m->lock, &pid_maps_seq_file_lock); Perhaps lockdep_set_subclass() would be better... But this doesn't matter. The question is, why m_start() calls mm_access(). This is not even strictly correct if the task execs between m_stop() + m_start(). Can't we do something like below? The patch is obviously horrible and incomplete, just to explain what I meant. Basically this is what proc_mem_operations does. Oleg. diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 3ab6d14..c16b70e 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -267,6 +267,7 @@ extern int proc_remount(struct super_block *, int *, char *); struct proc_maps_private { struct pid *pid; struct task_struct *task; + struct mm_struct *mm; #ifdef CONFIG_MMU struct vm_area_struct *tail_vma; #endif diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index cfa63ee..9b88248 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -165,9 +165,9 @@ static void *m_start(struct seq_file *m, loff_t *pos) if (!priv->task) return ERR_PTR(-ESRCH); - mm = mm_access(priv->task, PTRACE_MODE_READ); - if (!mm || IS_ERR(mm)) - return mm; + mm = priv->mm; + if (!mm || !atomic_inc_not_zero(&mm->mm_users)) + return NULL; down_read(&mm->mmap_sem); tail_vma = get_gate_vma(priv->task->mm); @@ -231,6 +231,27 @@ static void m_stop(struct seq_file *m, void *v) put_task_struct(priv->task); } +// TODO: change __mem_open() to use this helper +static struct mm_struct *xxx(struct inode *inode, struct file *file, unsigned int mode) +{ + struct task_struct *task = get_proc_task(inode); + struct mm_struct *mm = ERR_PTR(-ESRCH); + + if (task) { + mm = mm_access(task, mode); + put_task_struct(task); + + if (!IS_ERR_OR_NULL(mm)) { + /* ensure this mm_struct can't be freed */ + atomic_inc(&mm->mm_count); + /* but do not pin its memory */ + mmput(mm); + } + } + + return mm; +} + static int do_maps_open(struct inode *inode, struct file *file, const struct seq_operations *ops) { @@ -239,17 +260,38 @@ static int do_maps_open(struct inode *inode, struct file *file, priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (priv) { priv->pid = proc_pid(inode); + priv->mm = xxx(inode, file, PTRACE_MODE_READ); + + // XXX cleanup me + if (IS_ERR(priv->mm)) { + ret = -EACCES; + goto free; + } + ret = seq_open(file, ops); if (!ret) { struct seq_file *m = file->private_data; m->private = priv; } else { + // XXX cleanup me + if (priv->mm) + mmdrop(priv->mm); + free: kfree(priv); } } return ret; } +static int xxx_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct proc_maps_private *priv = seq->private; + if (priv->mm) + mmdrop(priv->mm); + return 0; +} + static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) { @@ -399,14 +441,14 @@ const struct file_operations proc_pid_maps_operations = { .open = pid_maps_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = xxx_release, }; const struct file_operations proc_tid_maps_operations = { .open = tid_maps_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = xxx_release, }; /* @@ -682,14 +724,14 @@ const struct file_operations proc_pid_smaps_operations = { .open = pid_smaps_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = xxx_release, }; const struct file_operations proc_tid_smaps_operations = { .open = tid_smaps_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = xxx_release, }; /* -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html