[PATCH v3 2/7] vfs: Add O_DENYREAD/WRITE flags support for open syscall

Pavel Shilovsky <piastry@xxxxxxxxxxx> · Thu, 28 Feb 2013 19:25:28 +0400

If O_DENYMAND flag is specified, O_DENYREAD/WRITE/MAND flags are
translated to flock's flags:

!O_DENYREAD  -> LOCK_READ
!O_DENYWRITE -> LOCK_WRITE
O_DENYMAND   -> LOCK_MAND

and set through flock_lock_file on a file.

This change affects opens that use O_DENYMAND flag - all other
native Linux opens don't care about these flags. It allow us to
enable this feature for applications that need it (e.g. NFS and
Samba servers that export the same directory for Windows clients,
or Wine applications that access the same files simultaneously).

Create codepath is slightly changed to prevent data races on
newely created files: when open with O_CREAT can return with -ETXTBSY
error for successfully created files due to a deny lock set by
another task.

Signed-off-by: Pavel Shilovsky <piastry@xxxxxxxxxxx>
---
 fs/locks.c         | 116 +++++++++++++++++++++++++++++++++++++++++++++++------
 fs/namei.c         |  44 ++++++++++++++++++--
 include/linux/fs.h |   6 +++
 3 files changed, 151 insertions(+), 15 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index a94e331..0cc7d1b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -605,20 +605,81 @@ static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *s
 	return (locks_conflict(caller_fl, sys_fl));
 }
 
-/* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
- * checking before calling the locks_conflict().
+static unsigned int
+deny_flags_to_cmd(unsigned int flags)
+{
+	unsigned int cmd = LOCK_MAND;
+
+	if (!(flags & O_DENYREAD))
+		cmd |= LOCK_READ;
+	if (!(flags & O_DENYWRITE))
+		cmd |= LOCK_WRITE;
+
+	return cmd;
+}
+
+/*
+ * locks_mand_conflict - Determine if there's a share reservation conflict
+ * @caller_fl: lock we're attempting to acquire
+ * @sys_fl: lock already present on system that we're checking against
+ *
+ * Check to see if there's a share_reservation conflict. LOCK_READ/LOCK_WRITE
+ * tell us whether the reservation allows other readers and writers.
+ *
+ * We only check against other LOCK_MAND locks, so applications that want to
+ * use share mode locking will only conflict against one another. "normal"
+ * applications that open files won't be affected by and won't themselves
+ * affect the share reservations.
  */
-static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
+static int
+locks_mand_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
 {
-	/* FLOCK locks referring to the same filp do not conflict with
+	unsigned char caller_type = caller_fl->fl_type;
+	unsigned char sys_type = sys_fl->fl_type;
+	fmode_t caller_fmode = caller_fl->fl_file->f_mode;
+	fmode_t sys_fmode = sys_fl->fl_file->f_mode;
+
+	/* they can only conflict if they're both LOCK_MAND */
+	if (!(caller_type & LOCK_MAND) || !(sys_type & LOCK_MAND))
+		return 0;
+
+	if (!(caller_type & LOCK_READ) && (sys_fmode & FMODE_READ))
+		return 1;
+	if (!(caller_type & LOCK_WRITE) && (sys_fmode & FMODE_WRITE))
+		return 1;
+	if (!(sys_type & LOCK_READ) && (caller_fmode & FMODE_READ))
+		return 1;
+	if (!(sys_type & LOCK_WRITE) && (caller_fmode & FMODE_WRITE))
+		return 1;
+
+	return 0;
+}
+
+/*
+ * Determine if lock sys_fl blocks lock caller_fl. FLOCK specific checking
+ * before calling the locks_conflict(). Boolean is_mand indicates whether
+ * we should use a share reservation scheme or not.
+ */
+static int
+flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl,
+		     bool is_mand)
+{
+	/*
+	 * FLOCK locks referring to the same filp do not conflict with
 	 * each other.
 	 */
-	if (!IS_FLOCK(sys_fl) || (caller_fl->fl_file == sys_fl->fl_file))
-		return (0);
-	if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND))
+	if (!IS_FLOCK(sys_fl))
+		return 0;
+	if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND)) {
+		if (is_mand)
+			return locks_mand_conflict(caller_fl, sys_fl);
+		else
+			return 0;
+	}
+	if (caller_fl->fl_file == sys_fl->fl_file)
 		return 0;
 
-	return (locks_conflict(caller_fl, sys_fl));
+	return locks_conflict(caller_fl, sys_fl);
 }
 
 void
@@ -697,14 +758,19 @@ static int posix_locks_deadlock(struct file_lock *caller_fl,
 	return 0;
 }
 
-/* Try to create a FLOCK lock on filp. We always insert new FLOCK locks
+/*
+ * Try to create a FLOCK lock on filp. We always insert new FLOCK locks
  * after any leases, but before any posix locks.
  *
  * Note that if called with an FL_EXISTS argument, the caller may determine
  * whether or not a lock was successfully freed by testing the return
  * value for -ENOENT.
+ *
+ * Take @is_conflict callback that determines how to check if locks have
+ * conflicts or not.
  */
-static int flock_lock_file(struct file *filp, struct file_lock *request)
+static int
+flock_lock_file(struct file *filp, struct file_lock *request, bool is_mand)
 {
 	struct file_lock *new_fl = NULL;
 	struct file_lock **before;
@@ -760,7 +826,7 @@ find_conflict:
 			break;
 		if (IS_LEASE(fl))
 			continue;
-		if (!flock_locks_conflict(request, fl))
+		if (!flock_locks_conflict(request, fl, is_mand))
 			continue;
 		error = -EAGAIN;
 		if (!(request->fl_flags & FL_SLEEP))
@@ -783,6 +849,32 @@ out:
 	return error;
 }
 
+/*
+ * Determine if a file is allowed to be opened with specified access and deny
+ * modes. Lock the file and return 0 if checks passed, otherwise return a error
+ * code.
+ */
+int
+deny_lock_file(struct file *filp)
+{
+	struct file_lock *lock;
+	int error = 0;
+
+	if (!(filp->f_flags & O_DENYMAND))
+		return error;
+
+	error = flock_make_lock(filp, &lock, deny_flags_to_cmd(filp->f_flags));
+	if (error)
+		return error;
+
+	error = flock_lock_file(filp, lock, true);
+	if (error == -EAGAIN)
+		error = -ETXTBSY;
+
+	locks_free_lock(lock);
+	return error;
+}
+
 static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
 {
 	struct file_lock *fl;
@@ -1589,7 +1681,7 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl)
 	int error;
 	might_sleep();
 	for (;;) {
-		error = flock_lock_file(filp, fl);
+		error = flock_lock_file(filp, fl, false);
 		if (error != FILE_LOCK_DEFERRED)
 			break;
 		error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
diff --git a/fs/namei.c b/fs/namei.c
index 43a97ee..c1f7e08 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2559,9 +2559,14 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
 	 * here.
 	 */
 	error = may_open(&file->f_path, acc_mode, open_flag);
-	if (error)
+	if (error) {
 		fput(file);
+		goto out;
+	}
 
+	error = deny_lock_file(file);
+	if (error)
+		fput(file);
 out:
 	dput(dentry);
 	return error;
@@ -2771,9 +2776,9 @@ retry_lookup:
 	}
 	mutex_lock(&dir->d_inode->i_mutex);
 	error = lookup_open(nd, path, file, op, got_write, opened);
-	mutex_unlock(&dir->d_inode->i_mutex);
 
 	if (error <= 0) {
+		mutex_unlock(&dir->d_inode->i_mutex);
 		if (error)
 			goto out;
 
@@ -2791,8 +2796,32 @@ retry_lookup:
 		will_truncate = false;
 		acc_mode = MAY_OPEN;
 		path_to_nameidata(path, nd);
-		goto finish_open_created;
+
+		/*
+		 * Unlock parent i_mutex later when the open finishes - prevent
+		 * races when a file can be locked with a deny lock by another
+		 * task that opens the file.
+		 */
+		error = may_open(&nd->path, acc_mode, open_flag);
+		if (error) {
+			mutex_unlock(&dir->d_inode->i_mutex);
+			goto out;
+		}
+		file->f_path.mnt = nd->path.mnt;
+		error = finish_open(file, nd->path.dentry, NULL, opened);
+		if (error) {
+			mutex_unlock(&dir->d_inode->i_mutex);
+			if (error == -EOPENSTALE)
+				goto stale_open;
+			goto out;
+		}
+		error = deny_lock_file(file);
+		mutex_unlock(&dir->d_inode->i_mutex);
+		if (error)
+			goto exit_fput;
+		goto opened;
 	}
+	mutex_unlock(&dir->d_inode->i_mutex);
 
 	/*
 	 * create/update audit record if it already exists.
@@ -2885,6 +2914,15 @@ finish_open_created:
 			goto stale_open;
 		goto out;
 	}
+	/*
+	 * Lock parent i_mutex to prevent races with deny locks on newely
+	 * created files.
+	 */
+	mutex_lock(&dir->d_inode->i_mutex);
+	error = deny_lock_file(file);
+	mutex_unlock(&dir->d_inode->i_mutex);
+	if (error)
+		goto exit_fput;
 opened:
 	error = open_check_o_direct(file);
 	if (error)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7617ee0..347e1de 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1005,6 +1005,7 @@ extern int lease_modify(struct file_lock **, int);
 extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
 extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
 extern void locks_delete_block(struct file_lock *waiter);
+extern int deny_lock_file(struct file *);
 extern void lock_flocks(void);
 extern void unlock_flocks(void);
 #else /* !CONFIG_FILE_LOCKING */
@@ -1153,6 +1154,11 @@ static inline void locks_delete_block(struct file_lock *waiter)
 {
 }
 
+static inline int deny_lock_file(struct file *filp)
+{
+	return 0;
+}
+
 static inline void lock_flocks(void)
 {
 }
-- 
1.8.1.2

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html