[PATCH RFC 06/15] fanotify: Support submission through ring buffer

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This adds support for the ring buffer mode in fanotify.  It is enabled
by a new flag FAN_PREALLOC_QUEUE passed to fanotify_init.  If this flag
is enabled, the group only allows marks that support the ring buffer
submission.  In a following patch, FAN_ERROR will make use of this
mechanism.

Signed-off-by: Gabriel Krisman Bertazi <krisman@xxxxxxxxxxxxx>
---
 fs/notify/fanotify/fanotify.c      | 77 +++++++++++++++++++---------
 fs/notify/fanotify/fanotify_user.c | 81 ++++++++++++++++++------------
 include/linux/fanotify.h           |  5 +-
 include/uapi/linux/fanotify.h      |  1 +
 4 files changed, 105 insertions(+), 59 deletions(-)

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index e3669d8a4a64..98591a8155a7 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -612,6 +612,26 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
 	return event;
 }
 
+static struct fanotify_event *fanotify_ring_get_slot(struct fsnotify_group *group,
+						     u32 mask, const void *data,
+						     int data_type)
+{
+	size_t size = 0;
+
+	pr_debug("%s: group=%p mask=%x size=%lu\n", __func__, group, mask, size);
+
+	return FANOTIFY_E(fsnotify_ring_alloc_event_slot(group, size));
+}
+
+static void fanotify_ring_write_event(struct fsnotify_group *group,
+				      struct fanotify_event *event, u32 mask,
+				      const void *data, __kernel_fsid_t *fsid)
+{
+	fanotify_init_event(group, event, 0, mask);
+
+	event->pid = get_pid(task_tgid(current));
+}
+
 /*
  * Get cached fsid of the filesystem containing the object from any connector.
  * All connectors are supposed to have the same fsid, but we do not verify that
@@ -701,31 +721,38 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
 			return 0;
 	}
 
-	event = fanotify_alloc_event(group, mask, data, data_type, dir,
-				     file_name, &fsid);
-	ret = -ENOMEM;
-	if (unlikely(!event)) {
-		/*
-		 * We don't queue overflow events for permission events as
-		 * there the access is denied and so no event is in fact lost.
-		 */
-		if (!fanotify_is_perm_event(mask))
-			fsnotify_queue_overflow(group);
-		goto finish;
-	}
-
-	fsn_event = &event->fse;
-	ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
-	if (ret) {
-		/* Permission events shouldn't be merged */
-		BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS);
-		/* Our event wasn't used in the end. Free it. */
-		fsnotify_destroy_event(group, fsn_event);
-
-		ret = 0;
-	} else if (fanotify_is_perm_event(mask)) {
-		ret = fanotify_get_response(group, FANOTIFY_PERM(event),
-					    iter_info);
+	if (group->flags & FSN_SUBMISSION_RING_BUFFER) {
+		event = fanotify_ring_get_slot(group, mask, data, data_type);
+		if (IS_ERR(event))
+			return PTR_ERR(event);
+		fanotify_ring_write_event(group, event, mask, data, &fsid);
+		fsnotify_ring_commit_slot(group, &event->fse);
+	} else {
+		event = fanotify_alloc_event(group, mask, data, data_type, dir,
+					     file_name, &fsid);
+		ret = -ENOMEM;
+		if (unlikely(!event)) {
+			/*
+			 * We don't queue overflow events for permission events as
+			 * there the access is denied and so no event is in fact lost.
+			 */
+			if (!fanotify_is_perm_event(mask))
+				fsnotify_queue_overflow(group);
+			goto finish;
+		}
+		fsn_event = &event->fse;
+		ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
+		if (ret) {
+			/* Permission events shouldn't be merged */
+			BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS);
+			/* Our event wasn't used in the end. Free it. */
+			fsnotify_destroy_event(group, fsn_event);
+
+			ret = 0;
+		} else if (fanotify_is_perm_event(mask)) {
+			ret = fanotify_get_response(group, FANOTIFY_PERM(event),
+						    iter_info);
+		}
 	}
 finish:
 	if (fanotify_is_perm_event(mask))
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index fe605359af88..5031198bf7db 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -521,7 +521,9 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
 		 * Permission events get queued to wait for response.  Other
 		 * events can be destroyed now.
 		 */
-		if (!fanotify_is_perm_event(event->mask)) {
+		if (group->fanotify_data.flags & FAN_PREALLOC_QUEUE) {
+			fsnotify_ring_buffer_consume_event(group, &event->fse);
+		} else if (!fanotify_is_perm_event(event->mask)) {
 			fsnotify_destroy_event(group, &event->fse);
 		} else {
 			if (ret <= 0) {
@@ -587,40 +589,39 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 	 */
 	fsnotify_group_stop_queueing(group);
 
-	/*
-	 * Process all permission events on access_list and notification queue
-	 * and simulate reply from userspace.
-	 */
-	spin_lock(&group->notification_lock);
-	while (!list_empty(&group->fanotify_data.access_list)) {
-		struct fanotify_perm_event *event;
-
-		event = list_first_entry(&group->fanotify_data.access_list,
-				struct fanotify_perm_event, fae.fse.list);
-		list_del_init(&event->fae.fse.list);
-		finish_permission_event(group, event, FAN_ALLOW);
+	if (!(group->flags & FSN_SUBMISSION_RING_BUFFER)) {
+		/*
+		 * Process all permission events on access_list and notification queue
+		 * and simulate reply from userspace.
+		 */
 		spin_lock(&group->notification_lock);
-	}
-
-	/*
-	 * Destroy all non-permission events. For permission events just
-	 * dequeue them and set the response. They will be freed once the
-	 * response is consumed and fanotify_get_response() returns.
-	 */
-	while (!fsnotify_notify_queue_is_empty(group)) {
-		struct fanotify_event *event;
-
-		event = FANOTIFY_E(fsnotify_remove_first_event(group));
-		if (!(event->mask & FANOTIFY_PERM_EVENTS)) {
-			spin_unlock(&group->notification_lock);
-			fsnotify_destroy_event(group, &event->fse);
-		} else {
-			finish_permission_event(group, FANOTIFY_PERM(event),
-						FAN_ALLOW);
+		while (!list_empty(&group->fanotify_data.access_list)) {
+			struct fanotify_perm_event *event;
+			event = list_first_entry(&group->fanotify_data.access_list,
+						 struct fanotify_perm_event, fae.fse.list);
+			list_del_init(&event->fae.fse.list);
+			finish_permission_event(group, event, FAN_ALLOW);
+			spin_lock(&group->notification_lock);
 		}
-		spin_lock(&group->notification_lock);
+		/*
+		 * Destroy all non-permission events. For permission events just
+		 * dequeue them and set the response. They will be freed once the
+		 * response is consumed and fanotify_get_response() returns.
+		 */
+		while (!fsnotify_notify_queue_is_empty(group)) {
+			struct fanotify_event *event;
+			event = FANOTIFY_E(fsnotify_remove_first_event(group));
+			if (!(event->mask & FANOTIFY_PERM_EVENTS)) {
+				spin_unlock(&group->notification_lock);
+				fsnotify_destroy_event(group, &event->fse);
+			} else {
+				finish_permission_event(group, FANOTIFY_PERM(event),
+							FAN_ALLOW);
+			}
+			spin_lock(&group->notification_lock);
+		}
+		spin_unlock(&group->notification_lock);
 	}
-	spin_unlock(&group->notification_lock);
 
 	/* Response for all permission events it set, wakeup waiters */
 	wake_up(&group->fanotify_data.access_waitq);
@@ -981,6 +982,16 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	if (flags & FAN_NONBLOCK)
 		f_flags |= O_NONBLOCK;
 
+	if (flags & FAN_PREALLOC_QUEUE) {
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (flags & FAN_UNLIMITED_QUEUE)
+			return -EINVAL;
+
+		fsn_flags = FSN_SUBMISSION_RING_BUFFER;
+	}
+
 	/* fsnotify_alloc_group takes a ref.  Dropped in fanotify_release */
 	group = fsnotify_alloc_user_group(&fanotify_fsnotify_ops, fsn_flags);
 	if (IS_ERR(group)) {
@@ -1223,6 +1234,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 		goto fput_and_out;
 	}
 
+	if ((group->flags & FSN_SUBMISSION_RING_BUFFER) &&
+	    (mask & ~FANOTIFY_SUBMISSION_BUFFER_EVENTS))
+		goto fput_and_out;
+
 	ret = fanotify_find_path(dfd, pathname, &path, flags,
 			(mask & ALL_FSNOTIFY_EVENTS), obj_type);
 	if (ret)
@@ -1327,7 +1342,7 @@ SYSCALL32_DEFINE6(fanotify_mark,
  */
 static int __init fanotify_user_setup(void)
 {
-	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 10);
+	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 11);
 	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);
 
 	fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 3e9c56ee651f..5a4cefb4b1c3 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -23,7 +23,8 @@
 #define FANOTIFY_INIT_FLAGS	(FANOTIFY_CLASS_BITS | FANOTIFY_FID_BITS | \
 				 FAN_REPORT_TID | \
 				 FAN_CLOEXEC | FAN_NONBLOCK | \
-				 FAN_UNLIMITED_QUEUE | FAN_UNLIMITED_MARKS)
+				 FAN_UNLIMITED_QUEUE | FAN_UNLIMITED_MARKS | \
+				 FAN_PREALLOC_QUEUE)
 
 #define FANOTIFY_MARK_TYPE_BITS	(FAN_MARK_INODE | FAN_MARK_MOUNT | \
 				 FAN_MARK_FILESYSTEM)
@@ -71,6 +72,8 @@
 					 FANOTIFY_PERM_EVENTS | \
 					 FAN_Q_OVERFLOW | FAN_ONDIR)
 
+#define FANOTIFY_SUBMISSION_BUFFER_EVENTS 0
+
 #define ALL_FANOTIFY_EVENT_BITS		(FANOTIFY_OUTGOING_EVENTS | \
 					 FANOTIFY_EVENT_FLAGS)
 
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index fbf9c5c7dd59..b283531549f1 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -49,6 +49,7 @@
 #define FAN_UNLIMITED_QUEUE	0x00000010
 #define FAN_UNLIMITED_MARKS	0x00000020
 #define FAN_ENABLE_AUDIT	0x00000040
+#define FAN_PREALLOC_QUEUE	0x00000080
 
 /* Flags to determine fanotify event format */
 #define FAN_REPORT_TID		0x00000100	/* event->pid is thread id */
-- 
2.31.0




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux