[PATCH 2/3] vfs: ->mmap_prepare()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Some filesystems (NFS) need i_mutex in ->mmap(), this violates the normal
locking order. Provide a hook before we take mmap_sem.

This leaves a window between ->mmap_prepare() and ->mmap(), if thats a problem
(Trond?) we could also provide ->mmap_finish() and guarantee it being called
if ->mmap_prepare() returned success.

This would allow holding state and thereby close the window.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
---
 Documentation/filesystems/Locking |   11 ++++++++++-
 Documentation/filesystems/vfs.txt |    3 +++
 include/linux/fs.h                |    1 +
 ipc/shm.c                         |   13 +++++++++++++
 mm/mmap.c                         |   12 ++++++++++++
 mm/nommu.c                        |   12 ++++++++++++
 6 files changed, 51 insertions(+), 1 deletion(-)

Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -1172,6 +1172,7 @@ struct file_operations {
 	int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
+	int (*mmap_prepare) (struct file *, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff);
 	int (*mmap) (struct file *, struct vm_area_struct *);
 	int (*open) (struct inode *, struct file *);
 	int (*flush) (struct file *, fl_owner_t id);
Index: linux-2.6/mm/mmap.c
===================================================================
--- linux-2.6.orig/mm/mmap.c
+++ linux-2.6/mm/mmap.c
@@ -1035,6 +1035,12 @@ unsigned long do_mmap_pgoff(struct file 
 	struct mm_struct *mm = current->mm;
 	unsigned long ret;
 
+	if (file && file->f_op && file->f_op->mmap_prepare) {
+		ret = file->f_op->mmap_prepare(file, len, prot, flags, pgoff);
+		if (ret)
+			return ret;
+	}
+
 	down_write(&mm->mmap_sem);
 	ret = ___do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
 	up_write(&mm->mmap_sem);
@@ -1054,6 +1060,12 @@ unsigned long do_mmap(struct file *file,
 	if ((offset + PAGE_ALIGN(len)) < offset || (offset & ~PAGE_MASK))
 		return ret;
 
+	if (file && file->f_op && file->f_op->mmap_prepare) {
+		ret = file->f_op->mmap_prepare(file, len, prot, flags, pgoff);
+		if (ret)
+			return ret;
+	}
+
 	down_write(&mm->mmap_sem);
 	ret = ___do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
 	up_write(&mm->mmap_sem);
Index: linux-2.6/mm/nommu.c
===================================================================
--- linux-2.6.orig/mm/nommu.c
+++ linux-2.6/mm/nommu.c
@@ -1025,6 +1025,12 @@ unsigned long do_mmap_pgoff(struct file 
 	struct mm_struct *mm = current->mm;
 	unsigned long ret;
 
+	if (file && file->f_op && file->f_op->mmap_prepare) {
+		ret = file->f_op->mmap_prepare(file, len, prot, flags, pgoff);
+		if (ret)
+			return ret;
+	}
+
 	down_write(&mm->mmap_sem);
 	ret = ___do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
 	up_write(&mm->mmap_sem);
@@ -1044,6 +1050,12 @@ unsigned long do_mmap(struct file *file,
 	if ((offset + PAGE_ALIGN(len)) < offset || (offset & ~PAGE_MASK))
 		return ret;
 
+	if (file && file->f_op && file->f_op->mmap_prepare) {
+		ret = file->f_op->mmap_prepare(file, len, prot, flags, pgoff);
+		if (ret)
+			return ret;
+	}
+
 	down_write(&mm->mmap_sem);
 	ret = ___do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
 	up_write(&mm->mmap_sem);
Index: linux-2.6/ipc/shm.c
===================================================================
--- linux-2.6.orig/ipc/shm.c
+++ linux-2.6/ipc/shm.c
@@ -300,6 +300,12 @@ static int shm_mmap(struct file * file, 
 	struct shm_file_data *sfd = shm_file_data(file);
 	int ret;
 
+	/*
+	 * SHM backing filesystems may not have mmap_prepare!
+	 * See so_shmat().
+	 */
+	WARN_ON(sfd->file->f_op->mmap_prepare);
+
 	ret = sfd->file->f_op->mmap(sfd->file, vma);
 	if (ret != 0)
 		return ret;
@@ -1012,6 +1018,13 @@ long do_shmat(int shmid, char __user *sh
 			goto invalid;
 	}
 		
+	/*
+	 *  The usage of ___do_mmap_locked() is needed because we must already
+	 *  hold the mmap_sem here due to find_vma_intersection vs mmap races.
+	 *
+	 *  This prohibits in SHM backing filesystems from using
+	 *  f_op->mmap_prepare().
+	 */
 	user_addr = ___do_mmap_pgoff (file, addr, size, prot, flags, 0);
 	*raddr = user_addr;
 	err = 0;
Index: linux-2.6/Documentation/filesystems/Locking
===================================================================
--- linux-2.6.orig/Documentation/filesystems/Locking
+++ linux-2.6/Documentation/filesystems/Locking
@@ -378,6 +378,8 @@ prototypes:
 			unsigned long);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
+	int (*mmap_prepare) (struct file *, unsigned long len, unsigned long prot,
+			unsigned long flags, unsigned long pgoff);
 	int (*mmap) (struct file *, struct vm_area_struct *);
 	int (*open) (struct inode *, struct file *);
 	int (*flush) (struct file *);
@@ -413,7 +415,8 @@ poll:			no
 ioctl:			yes	(see below)
 unlocked_ioctl:		no	(see below)
 compat_ioctl:		no
-mmap:			no
+mmap_prepare:		no	(see below)
+mmap:			no	(see below)
 open:			maybe	(see below)
 flush:			no
 release:		no
@@ -436,6 +439,12 @@ For many filesystems, it is probably saf
 semaphore.  Note some filesystems (i.e. remote ones) provide no
 protection for i_size so you will need to use the BKL.
 
+->mmap_prepare() is called on mmap(2) _before_ acquisition of the mmap_sem,
+filesystems can use this hook to prepare the file for being mapped, and can
+take i_mutex if they need to.
+
+->mmap() is called while the mmap_sem is held.
+
 ->open() locking is in-transit: big lock partially moved into the methods.
 The only exception is ->open() in the instances of file_operations that never
 end up in ->i_fop/->proc_fops, i.e. ones that belong to character devices
Index: linux-2.6/Documentation/filesystems/vfs.txt
===================================================================
--- linux-2.6.orig/Documentation/filesystems/vfs.txt
+++ linux-2.6/Documentation/filesystems/vfs.txt
@@ -762,6 +762,7 @@ struct file_operations {
 	int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
+	int (*mmap_prepare) (struct file *, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff);
 	int (*mmap) (struct file *, struct vm_area_struct *);
 	int (*open) (struct inode *, struct file *);
 	int (*flush) (struct file *);
@@ -809,6 +810,8 @@ otherwise noted.
   compat_ioctl: called by the ioctl(2) system call when 32 bit system calls
  	 are used on 64 bit kernels.
 
+  mmap_prepare: called by the mmap(2) system call
+
   mmap: called by the mmap(2) system call
 
   open: called by the VFS when an inode should be opened. When the VFS

--

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux