Re: [RFC] FUSE: Notifying the kernel of deletion.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



John Muir <john@xxxxxxxxx> writes:

> Allows a FUSE file-system to tell the kernel when a file or directory is deleted. If the specified dentry has the specified inode number, the kernel will unhash it.
>
> Signed-off-by: John Muir <john@xxxxxxxxx>
> ---
> Please find below a patch that add notification of deletion to the
> FUSE kernel interface. These patches allow the file-system to tell the
> kernel when a file (and more particularly) a directory is
> deleted. This is needed because using the current 'notify_inval_entry'
> does not cause the kernel to clean up directories that are in use
> properly, and as a result the users of those directories see incorrect
> semantics from the file-system. The error condition seen when
> 'notify_inval_entry' is used to notify of a deleted directory is
> avoided when 'notify_delete' is used instead.
>
> I'll demonstrate below with the following scenario:
>   1. User A chdirs into 'testdir' and starts reading 'testfile'.
>   2. User B rm -rf 'testdir'.
>   3. User B creates 'testdir'.
>   4. User C chdirs into 'testdir'.
>
> If you run the above within the same machine on any file-system
> (including fuse file-systems), there is no problem: user C is able to
> chdir into the new testdir. The old testdir is removed from the dentry
> tree, but still open by user A.
>
> If, on the other hand, the operations 2 and 3 are performed via the
> network such that the fuse file-system uses one of the notify
> functions to tell the kernel that the nodes are gone, then the
> following error occurs for user C while user A holds the original
> directory open:
>
>   muirj@empacher:~> ls /test/testdir
>   ls: cannot access /test/testdir: No such file or directory
>
> The issue here is that the kernel still has a dentry for testdir, and
> so it is requesting the attributes for the old directory, while my
> file-system is responding that the directory no longer exists.
>
> If on the other hand, if the file-system can notify the kernel that
> the directory is deleted using the new 'notify_delete' function, then
> the above ls will find the new directory as expected.

I like the patch mostly, you gave a lot attention to the details.

One thing I don't quite understand is the API.  Why is it necessary to
send the child ID in addition to the parent ID and the name?  It looks
like you are using it for sanity checking, but what exactly is the
purpose?

Thanks,
Miklos

>
> diff -updr orig/fs/fuse/dev.c new/fs/fuse/dev.c
> --- orig/fs/fuse/dev.c	2011-09-18 20:02:26.000000000 +0200
> +++ new/fs/fuse/dev.c	2011-09-20 14:40:06.364000087 +0200
> @@ -1378,7 +1378,59 @@ static int fuse_notify_inval_entry(struc
>  	down_read(&fc->killsb);
>  	err = -ENOENT;
>  	if (fc->sb)
> -		err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name);
> +		err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name);
> +	up_read(&fc->killsb);
> +	kfree(buf);
> +	return err;
> +
> +err:
> +	kfree(buf);
> +	fuse_copy_finish(cs);
> +	return err;
> +}
> +
> +static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
> +			      struct fuse_copy_state *cs)
> +{
> +	struct fuse_notify_delete_out outarg;
> +	int err = -ENOMEM;
> +	char *buf;
> +	struct qstr name;
> +
> +	buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
> +	if (!buf)
> +		goto err;
> +
> +	err = -EINVAL;
> +	if (size < sizeof(outarg))
> +		goto err;
> +
> +	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
> +	if (err)
> +		goto err;
> +
> +	err = -ENAMETOOLONG;
> +	if (outarg.namelen > FUSE_NAME_MAX)
> +		goto err;
> +
> +	err = -EINVAL;
> +	if (size != sizeof(outarg) + outarg.namelen + 1)
> +		goto err;
> +
> +	name.name = buf;
> +	name.len = outarg.namelen;
> +	err = fuse_copy_one(cs, buf, outarg.namelen + 1);
> +	if (err)
> +		goto err;
> +	fuse_copy_finish(cs);
> +	buf[outarg.namelen] = 0;
> +	name.hash = full_name_hash(name.name, name.len);
> +
> +	down_read(&fc->killsb);
> +	err = -ENOENT;
> +	if (fc->sb)
> +		err = fuse_reverse_inval_entry(fc->sb, outarg.parent,
> +                			       outarg.child, &name);
>  	up_read(&fc->killsb);
>  	kfree(buf);
>  	return err;
> @@ -1596,6 +1648,9 @@ static int fuse_notify(struct fuse_conn
>  	case FUSE_NOTIFY_RETRIEVE:
>  		return fuse_notify_retrieve(fc, size, cs);
>  
> +	case FUSE_NOTIFY_DELETE:
> +		return fuse_notify_delete(fc, size, cs);
> +
>  	default:
>  		fuse_copy_finish(cs);
>  		return -EINVAL;
> diff -updr orig/fs/fuse/dir.c new/fs/fuse/dir.c
> --- orig/fs/fuse/dir.c	2011-09-18 20:02:26.000000000 +0200
> +++ new/fs/fuse/dir.c	2011-09-20 14:40:06.364000087 +0200
> @@ -868,7 +868,7 @@ int fuse_update_attributes(struct inode
>  }
>  
>  int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
> -			     struct qstr *name)
> +			     u64 child_nodeid, struct qstr *name)
>  {
>  	int err = -ENOTDIR;
>  	struct inode *parent;
> @@ -895,8 +895,36 @@ int fuse_reverse_inval_entry(struct supe
>  
>  	fuse_invalidate_attr(parent);
>  	fuse_invalidate_entry(entry);
> +
> +	if (child_nodeid != 0 && entry->d_inode) {
> +		mutex_lock(&entry->d_inode->i_mutex);
> +		if (get_node_id(entry->d_inode) != child_nodeid) {
> +			err = -ENOENT;
> +			goto badentry;
> +		}
> +		if (d_mountpoint(entry)) {
> +			err = -EBUSY;
> +			goto badentry;
> +		}
> +		if (S_ISDIR(entry->d_inode->i_mode)) {
> +			shrink_dcache_parent(entry); 
> +			if (!simple_empty(entry)) {
> +				err = -ENOTEMPTY;
> +				goto badentry;
> +			}
> +			entry->d_inode->i_flags |= S_DEAD;
> +		}
> +		dont_mount(entry);
> +		clear_nlink(entry->d_inode);
> +		err = 0;
> + badentry:
> + 		mutex_unlock(&entry->d_inode->i_mutex);
> +		if (!err)
> +			d_delete(entry);
> +	} else {
> +		err = 0;
> +	}
>  	dput(entry);
> -	err = 0;
>  
>   unlock:
>  	mutex_unlock(&parent->i_mutex);
> diff -updr orig/fs/fuse/fuse_i.h new/fs/fuse/fuse_i.h
> --- orig/fs/fuse/fuse_i.h	2011-09-18 20:02:26.000000000 +0200
> +++ new/fs/fuse/fuse_i.h	2011-09-20 14:40:06.417000079 +0200
> @@ -755,9 +755,15 @@ int fuse_reverse_inval_inode(struct supe
>  /**
>   * File-system tells the kernel to invalidate parent attributes and
>   * the dentry matching parent/name.
> + *
> + * If the child_nodeid is non-zero and: 
> + *    - matches the inode number for the dentry matching parent/name,
> + *    - is not a mount point
> + *    - is a file or oan empty directory
> + * then the dentry is unhashed (d_delete()).
>   */
>  int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
> -			     struct qstr *name);
> +			     u64 child_nodeid, struct qstr *name);
>  
>  int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
>  		 bool isdir);
> diff -updr orig/include/linux/fuse.h new/include/linux/fuse.h
> --- orig/include/linux/fuse.h	2011-09-19 23:02:15.019000085 +0200
> +++ new/include/linux/fuse.h	2011-09-20 14:40:06.418000080 +0200
> @@ -50,6 +50,9 @@
>   *
>   * 7.17
>   *  - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK
> + *
> + * 7.18
> + *  - add FUSE_NOTIFY_DELETE
>   */
>  
>  #ifndef _LINUX_FUSE_H
> @@ -81,7 +84,7 @@
>  #define FUSE_KERNEL_VERSION 7
>  
>  /** Minor version number of this interface */
> -#define FUSE_KERNEL_MINOR_VERSION 17
> +#define FUSE_KERNEL_MINOR_VERSION 18
>  
>  /** The node ID of the root inode */
>  #define FUSE_ROOT_ID 1
> @@ -283,6 +286,7 @@ enum fuse_notify_code {
>  	FUSE_NOTIFY_INVAL_ENTRY = 3,
>  	FUSE_NOTIFY_STORE = 4,
>  	FUSE_NOTIFY_RETRIEVE = 5,
> +	FUSE_NOTIFY_DELETE = 6,
>  	FUSE_NOTIFY_CODE_MAX,
>  };
>  
> @@ -605,6 +609,13 @@ struct fuse_notify_inval_entry_out {
>  	__u32	namelen;
>  	__u32	padding;
>  };
> +
> +struct fuse_notify_delete_out {
> +	__u64	parent;
> +	__u64	child;
> +	__u32	namelen;
> +	__u32	padding;
> +};
>  
>  struct fuse_notify_store_out {
>  	__u64	nodeid;
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux