Reading /proc/mounts from userspace is atomic, but only within a single system call. That means that if there are ongoing unmounts while a userspace process is reading /proc/mounts, the process can omit some mount points. The patch makes /proc/mounts more-or-less consistent: a userspace process is guaranteed to read all mount points that will exist when the process closes the file. This is achieved by keeping the position where a process stopped as a pointer to mount entry and resuming reading from the position. If a mount entry is removed, all processes that stopped on the entry are advanced i.e. their position is moved to the next entry. To achieve this, all processes reading /proc/mounts are organized in a linked list. An example of /proc/mounts inconsistency is here: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=593516 --- fs/mount.h | 7 ++++++ fs/namespace.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++- fs/proc_namespace.c | 5 ++++ 3 files changed, 71 insertions(+), 2 deletions(-) diff --git a/fs/mount.h b/fs/mount.h index 4ef36d9..d02574a 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -70,7 +70,14 @@ struct proc_mounts { struct seq_file m; /* must be the first element */ struct mnt_namespace *ns; struct path root; + struct list_head *iter; + loff_t iter_pos; + int iter_advanced; + struct list_head reader; int (*show)(struct seq_file *, struct vfsmount *); }; +extern void register_mounts_reader(struct proc_mounts *p); +extern void unregister_mounts_reader(struct proc_mounts *p); + extern const struct seq_operations mounts_op; diff --git a/fs/namespace.c b/fs/namespace.c index e608199..504940a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -51,6 +51,37 @@ EXPORT_SYMBOL_GPL(fs_kobj); */ DEFINE_BRLOCK(vfsmount_lock); +static LIST_HEAD(mounts_readers); +static DEFINE_SPINLOCK(mounts_lock); + +void register_mounts_reader(struct proc_mounts *p) +{ + spin_lock(&mounts_lock); + list_add(&p->reader, &mounts_readers); + spin_unlock(&mounts_lock); +} + +void unregister_mounts_reader(struct proc_mounts *p) +{ + spin_lock(&mounts_lock); + list_del(&p->reader); + spin_unlock(&mounts_lock); +} + +static void advance_mounts_readers(struct list_head *iter) +{ + struct proc_mounts *p; + + spin_lock(&mounts_lock); + list_for_each_entry(p, &mounts_readers, reader) { + if (p->iter == iter) { + p->iter = p->iter->next; + p->iter_advanced = 1; + } + } + spin_unlock(&mounts_lock); +} + static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) { unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); @@ -941,14 +972,39 @@ static void *m_start(struct seq_file *m, loff_t *pos) struct proc_mounts *p = container_of(m, struct proc_mounts, m); down_read(&namespace_sem); - return seq_list_start(&p->ns->list, *pos); + if (p->iter_advanced) { + p->iter_advanced = 0; + if (p->iter_pos < *pos) + p->iter_pos++; + } + + if (!p->iter || (p->iter_pos > *pos && p->iter == &p->ns->list)) { + p->iter = p->ns->list.next; + p->iter_pos = 0; + } + + while (p->iter_pos < *pos && p->iter != &p->ns->list) { + p->iter = p->iter->next; + p->iter_pos++; + } + + while (p->iter_pos > *pos && p->iter != p->ns->list.next) { + p->iter = p->iter->prev; + p->iter_pos--; + } + + p->iter_pos = *pos; + return p->iter != &p->ns->list ? p->iter : NULL; } static void *m_next(struct seq_file *m, void *v, loff_t *pos) { struct proc_mounts *p = container_of(m, struct proc_mounts, m); - return seq_list_next(v, &p->ns->list, pos); + p->iter = p->iter->next; + p->iter_pos++; + *pos = p->iter_pos; + return p->iter != &p->ns->list ? p->iter : NULL; } static void m_stop(struct seq_file *m, void *v) @@ -1071,6 +1127,7 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill) list_for_each_entry(p, &tmp_list, mnt_hash) { list_del_init(&p->mnt_expire); + advance_mounts_readers(&p->mnt_list); list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); p->mnt_ns = NULL; diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 1241285..4f4524d 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -273,6 +273,10 @@ static int mounts_open_common(struct inode *inode, struct file *file, p->root = root; p->m.poll_event = ns->event; p->show = show; + p->iter = NULL; + p->iter_pos = 0; + p->iter_advanced = 0; + register_mounts_reader(p); return 0; @@ -289,6 +293,7 @@ static int mounts_open_common(struct inode *inode, struct file *file, static int mounts_release(struct inode *inode, struct file *file) { struct proc_mounts *p = file->private_data; + unregister_mounts_reader(p); path_put(&p->root); put_mnt_ns(p->ns); return seq_release(inode, file); -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html