[RFC/PATCH] revoke/frevoke system calls

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Pekka Enberg <penberg@xxxxxxxxxxxxxx>

This patch implements the revoke(2) and frevoke(2) system calls for all
types of files. We revoke files in two passes: first we scan all open 
files that refer to the inode and substitute the struct file pointer in fd 
table with NULL causing all subsequent operations on that fd to fail. 
After we have done that to all file descriptors, we close the files and 
take down mmaps.

Note that now we need to unconditionally do fput/fget in sys_write and
sys_read because they race with do_revoke.

Signed-off-by: Pekka Enberg <penberg@xxxxxxxxxxxxxx>
---

 arch/i386/kernel/syscall_table.S |    2 
 fs/Makefile                      |    2 
 fs/read_write.c                  |   10 -
 fs/revoke.c                      |  207 +++++++++++++++++++++++++++++++++++++++
 include/asm-i386/unistd.h        |    4 
 include/linux/syscalls.h         |    3 
 6 files changed, 220 insertions(+), 8 deletions(-)

Index: 2.6/arch/i386/kernel/syscall_table.S
===================================================================
--- 2.6.orig/arch/i386/kernel/syscall_table.S
+++ 2.6/arch/i386/kernel/syscall_table.S
@@ -317,3 +317,5 @@ ENTRY(sys_call_table)
 	.long sys_tee			/* 315 */
 	.long sys_vmsplice
 	.long sys_move_pages
+	.long sys_revoke
+	.long sys_frevoke
Index: 2.6/fs/Makefile
===================================================================
--- 2.6.orig/fs/Makefile
+++ 2.6/fs/Makefile
@@ -10,7 +10,7 @@ obj-y :=	open.o read_write.o file_table.
 		ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
-		ioprio.o pnode.o drop_caches.o splice.o sync.o
+		ioprio.o pnode.o drop_caches.o splice.o sync.o revoke.o
 
 obj-$(CONFIG_INOTIFY)		+= inotify.o
 obj-$(CONFIG_INOTIFY_USER)	+= inotify_user.o
Index: 2.6/fs/read_write.c
===================================================================
--- 2.6.orig/fs/read_write.c
+++ 2.6/fs/read_write.c
@@ -343,14 +343,13 @@ asmlinkage ssize_t sys_read(unsigned int
 {
 	struct file *file;
 	ssize_t ret = -EBADF;
-	int fput_needed;
 
-	file = fget_light(fd, &fput_needed);
+	file = fget(fd);
 	if (file) {
 		loff_t pos = file_pos_read(file);
 		ret = vfs_read(file, buf, count, &pos);
 		file_pos_write(file, pos);
-		fput_light(file, fput_needed);
+		fput(file);
 	}
 
 	return ret;
@@ -361,14 +360,13 @@ asmlinkage ssize_t sys_write(unsigned in
 {
 	struct file *file;
 	ssize_t ret = -EBADF;
-	int fput_needed;
 
-	file = fget_light(fd, &fput_needed);
+	file = fget(fd);
 	if (file) {
 		loff_t pos = file_pos_read(file);
 		ret = vfs_write(file, buf, count, &pos);
 		file_pos_write(file, pos);
-		fput_light(file, fput_needed);
+		fput(file);
 	}
 
 	return ret;
Index: 2.6/fs/revoke.c
===================================================================
--- /dev/null
+++ 2.6/fs/revoke.c
@@ -0,0 +1,207 @@
+/*
+ * fs/revoke.c - Invalidate all current open file descriptors of an inode.
+ *
+ * Copyright (C) 2006 Pekka Enberg
+ *
+ * This file is released under the GPLv2.
+ */
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+
+/*
+ * Auxiliary struct for keeping track of revoked files.
+ */
+struct revoked_file {
+	struct file *file;
+	struct task_struct *owner;
+	struct list_head list_node;
+};
+
+/*
+ * 	LOCKING: task_lock(owner)
+ */
+static int revoke_files(struct task_struct *owner, struct inode *inode,
+			struct file *exclude, struct list_head *to_close)
+{
+	int err = 0;
+	struct files_struct *files;
+	struct fdtable *fdt;
+	unsigned int fd;
+
+	files = owner->files;
+	spin_lock(&files->file_lock);
+
+	fdt = files_fdtable(files);
+	for (fd = 0; fd < fdt->max_fds; fd++) {
+		struct file *file;
+		struct revoked_file *revoked;
+
+		file = fcheck_files(files, fd);
+		if (!file)
+			continue;
+
+		if (file == exclude)
+			continue;
+
+		if (file->f_dentry->d_inode != inode)
+			continue;
+
+		/*
+		 * Leak the fd so it is not reused. After this point, we don't
+		 * need to worry about racing with sys_close or sys_dup.
+		 */
+		rcu_assign_pointer(fdt->fd[fd], NULL);
+		FD_CLR(fd, fdt->close_on_exec);
+
+		revoked = kmalloc(sizeof(*revoked), GFP_KERNEL);
+		if (!revoked) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		INIT_LIST_HEAD(&revoked->list_node);
+		revoked->file  = file;
+		revoked->owner = owner;
+
+		/*
+		 * Hold on to task until we can take down the file and its
+		 * mmap.
+		 */
+		get_task_struct(owner);
+
+		list_add(&revoked->list_node, to_close);
+	}
+  out:
+	spin_unlock(&files->file_lock);
+	return err;
+}
+
+static struct page *revoked_nopage(struct vm_area_struct *area,
+				   unsigned long address, int *type)
+{
+	return NULL;
+}
+
+static struct vm_operations_struct revoked_vm_ops = {
+        .nopage         = revoked_nopage,
+};
+
+static int revoke_mapping(struct address_space *mapping)
+{
+	struct vm_area_struct *vma;
+	struct prio_tree_iter iter;
+
+	spin_lock(&mapping->i_mmap_lock);
+	/* make ->nopage fail for all mmaps of the mapping */
+	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX)
+		vma->vm_ops = &revoked_vm_ops;
+	list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
+		vma->vm_ops = &revoked_vm_ops;
+	spin_unlock(&mapping->i_mmap_lock);
+
+	/* FIXME: If we fail to invalidate some pages, no one will take them
+	   down but subsequent revoke operations succeed... */
+	return invalidate_inode_pages2(mapping);
+}
+
+static int close_files(struct list_head *to_close)
+{
+	int ret = 0;
+	struct revoked_file *this, *next;
+
+	list_for_each_entry_safe(this, next, to_close, list_node) {
+		struct inode *inode;
+		struct task_struct *task;
+		int err;
+
+		inode = this->file->f_dentry->d_inode;
+
+		task = this->owner;
+		task_lock(task);
+		if (task->files) {
+			err = filp_close(this->file, task->files);
+			if (err)
+				ret = err;
+		}
+		task_unlock(task);
+		put_task_struct(task);
+
+		err = revoke_mapping(inode->i_mapping);
+		if (err)
+			ret = err;
+
+		list_del(&this->list_node);
+		kfree(this);
+	}
+	return ret;
+}
+
+static int do_revoke(struct inode *inode, struct file *exclude)
+{
+	int err, ret = 0;
+	struct task_struct *g, *p;
+	struct list_head to_close = LIST_HEAD_INIT(to_close);
+
+	if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) {
+		ret = -EPERM;
+		goto out;
+	}
+
+	/*
+	 * First revoke the file descriptors. After we are done, all new
+	 * operations on the descriptors will fail.
+	 */
+	read_lock(&tasklist_lock);
+	do_each_thread(g, p) {
+		task_lock(p);
+
+		if (p->files) {
+			ret = revoke_files(p, inode, exclude, &to_close);
+			if (ret) {
+				task_unlock(p);
+				goto out_unlock_tasklist;
+			}
+		}
+		task_unlock(p);
+	} while_each_thread(g, p);
+
+  out_unlock_tasklist:
+	read_unlock(&tasklist_lock);
+
+	/*
+	 * Now, close the files and take down mmaps.
+	 */
+	err = close_files(&to_close);
+	if (err)
+		ret = err;
+  out:
+	return ret;
+}
+
+asmlinkage int sys_revoke(const char __user *filename)
+{
+	int err;
+	struct nameidata nd;
+
+	err = __user_walk(filename, 0, &nd);
+	if (!err) {
+		err = do_revoke(nd.dentry->d_inode, NULL);
+		path_release(&nd);
+	}
+	return err;
+}
+
+asmlinkage int sys_frevoke(unsigned int fd)
+{
+	struct file *file = fget(fd);
+	int err = -EBADF;
+
+	if (file) {
+		err = do_revoke(file->f_dentry->d_inode, file);
+		fput(file);
+	}
+	return err;
+}
Index: 2.6/include/asm-i386/unistd.h
===================================================================
--- 2.6.orig/include/asm-i386/unistd.h
+++ 2.6/include/asm-i386/unistd.h
@@ -323,10 +323,12 @@
 #define __NR_tee		315
 #define __NR_vmsplice		316
 #define __NR_move_pages		317
+#define __NR_revoke		318
+#define __NR_frevoke		319
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 318
+#define NR_syscalls 320
 
 /*
  * user-visible error numbers are in the range -1 - -128: see
Index: 2.6/include/linux/syscalls.h
===================================================================
--- 2.6.orig/include/linux/syscalls.h
+++ 2.6/include/linux/syscalls.h
@@ -597,4 +597,7 @@ asmlinkage long sys_get_robust_list(int 
 asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
 				    size_t len);
 
+asmlinkage int sys_revoke(const char __user *filename);
+asmlinkage int sys_frevoke(unsigned int fd);
+
 #endif
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux