[PATCH/RFC] fscache/cachefiles versus btrfs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



hi,
 fscache cannot currently be used with btrfs as the backing store for the
 cache (managed by cachefilesd).
 This is because cachefiles needs the ->bmap address_space_operation, and
 btrfs doesn't provide it.

 cachefiles only uses this to find out if a particular page is a 'hole' or
 not.  For btrfs, this can be done with 'SEEK_DATA'.

 Unfortunately it doesn't seem to be possible to query a filesystem or a file
 to see if SEEK_DATA is reliable or not, so we cannot simply use SEEK_DATA
 when reliable, else ->bmap if available.

 The following patch make fscache work for me on btrfs.  It explicitly checks
 for BTRFS_SUPER_MAGIC.  Not really a nice solution, but all I could think of.

 Is there a better way?  Could a better way be created?  Maybe
 SEEK_DATA_RELIABLE ??

 Comments, suggestions welcome.


Also, if you do try to use fscache on btrfs with 3.19, then nothing gets
cached (as expected) and with a heavy load you can lose a race and get an
asserting fail in fscache_enqueue_operation

	ASSERT(fscache_object_is_available(op->object));

It looks like the object is being killed before it is available...

[  859.700765] kernel BUG at ../fs/fscache/operation.c:38!
...
[  859.703124] Call Trace:
[  859.703193]  [<ffffffffa0448044>] fscache_run_op.isra.4+0x34/0x80 [fscache]
[  859.703260]  [<ffffffffa0448160>] fscache_start_operations+0xa0/0xf0 [fscache]
[  859.703388]  [<ffffffffa0446cd8>] fscache_kill_object+0x98/0xc0 [fscache]
[  859.703455]  [<ffffffffa04475c1>] fscache_object_work_func+0x151/0x210 [fscache]
[  859.703578]  [<ffffffff81078b07>] process_one_work+0x147/0x3c0
[  859.703642]  [<ffffffff8107929c>] worker_thread+0x20c/0x470

I haven't figured out the cause of that yet.


Thanks,
NeilBrown




diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 1e51714eb33e..1389d8483d5d 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -20,6 +20,7 @@
 #include <linux/namei.h>
 #include <linux/security.h>
 #include <linux/slab.h>
+#include <linux/magic.h>
 #include "internal.h"
 
 #define CACHEFILES_KEYBUF_SIZE 512
@@ -647,7 +648,8 @@ lookup_again:
 
 			ret = -EPERM;
 			aops = object->dentry->d_inode->i_mapping->a_ops;
-			if (!aops->bmap)
+			if (!aops->bmap &&
+			    object->dentry->d_sb->s_magic != BTRFS_SUPER_MAGIC)
 				goto check_error;
 
 			object->backer = object->dentry;
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index c6cd8d7a4eef..49fb330c0ab8 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -410,11 +410,11 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
 
 	inode = object->backer->d_inode;
 	ASSERT(S_ISREG(inode->i_mode));
-	ASSERT(inode->i_mapping->a_ops->bmap);
 	ASSERT(inode->i_mapping->a_ops->readpages);
 
 	/* calculate the shift required to use bmap */
-	if (inode->i_sb->s_blocksize > PAGE_SIZE)
+	if (inode->i_mapping->a_ops->bmap &&
+	    inode->i_sb->s_blocksize > PAGE_SIZE)
 		goto enobufs;
 
 	shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
@@ -423,20 +423,36 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
 	op->op.flags |= FSCACHE_OP_ASYNC;
 	op->op.processor = cachefiles_read_copier;
 
-	/* we assume the absence or presence of the first block is a good
-	 * enough indication for the page as a whole
-	 * - TODO: don't use bmap() for this as it is _not_ actually good
-	 *   enough for this as it doesn't indicate errors, but it's all we've
-	 *   got for the moment
-	 */
-	block0 = page->index;
-	block0 <<= shift;
-
-	block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block0);
-	_debug("%llx -> %llx",
-	       (unsigned long long) block0,
-	       (unsigned long long) block);
+	if (inode->i_mapping->a_ops->bmap) {
+		/* we assume the absence or presence of the first block is a good
+		 * enough indication for the page as a whole
+		 * - TODO: don't use bmap() for this as it is _not_ actually good
+		 *   enough for this as it doesn't indicate errors, but it's all we've
+		 *   got for the moment
+		 */
+		block0 = page->index;
+		block0 <<= shift;
 
+		block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block0);
+		_debug("%llx -> %llx",
+		       (unsigned long long) block0,
+		       (unsigned long long) block);
+	} else {
+		/* Use llseek */
+		struct path path;
+		struct file *file;
+		path.mnt = cache->mnt;
+		path.dentry = object->backer;
+		file = dentry_open(&path, O_RDONLY, cache->cache_cred);
+		if (IS_ERR(file))
+			goto enobufs;
+		block = vfs_llseek(file, page->index << PAGE_SHIFT, SEEK_DATA);
+		filp_close(file, NULL);
+		if (block != page->index << PAGE_SHIFT)
+			block = 0;
+		else
+			block = 1;
+	}
 	if (block) {
 		/* submit the apparently valid page to the backing fs to be
 		 * read from disk */
@@ -707,11 +723,11 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
 
 	inode = object->backer->d_inode;
 	ASSERT(S_ISREG(inode->i_mode));
-	ASSERT(inode->i_mapping->a_ops->bmap);
 	ASSERT(inode->i_mapping->a_ops->readpages);
 
 	/* calculate the shift required to use bmap */
-	if (inode->i_sb->s_blocksize > PAGE_SIZE)
+	if (inode->i_mapping->a_ops->bmap &&
+	    inode->i_sb->s_blocksize > PAGE_SIZE)
 		goto all_enobufs;
 
 	shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
@@ -729,21 +745,38 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
 	list_for_each_entry_safe(page, _n, pages, lru) {
 		sector_t block0, block;
 
-		/* we assume the absence or presence of the first block is a
-		 * good enough indication for the page as a whole
-		 * - TODO: don't use bmap() for this as it is _not_ actually
-		 *   good enough for this as it doesn't indicate errors, but
-		 *   it's all we've got for the moment
-		 */
-		block0 = page->index;
-		block0 <<= shift;
-
-		block = inode->i_mapping->a_ops->bmap(inode->i_mapping,
-						      block0);
-		_debug("%llx -> %llx",
-		       (unsigned long long) block0,
-		       (unsigned long long) block);
+		if (inode->i_mapping->a_ops->bmap) {
+			/* we assume the absence or presence of the first block is a
+			 * good enough indication for the page as a whole
+			 * - TODO: don't use bmap() for this as it is _not_ actually
+			 *   good enough for this as it doesn't indicate errors, but
+			 *   it's all we've got for the moment
+			 */
+			block0 = page->index;
+			block0 <<= shift;
+
+			block = inode->i_mapping->a_ops->bmap(inode->i_mapping,
+							      block0);
+			_debug("%llx -> %llx",
+			       (unsigned long long) block0,
+			       (unsigned long long) block);
 
+		} else {
+			/* Use llseek */
+			struct path path;
+			struct file *file;
+			path.mnt = cache->mnt;
+			path.dentry = object->backer;
+			file = dentry_open(&path, O_RDONLY, cache->cache_cred);
+			if (IS_ERR(file))
+				goto all_enobufs;
+			block = vfs_llseek(file, page->index << PAGE_SHIFT, SEEK_DATA);
+			filp_close(file, NULL);
+			if (block != page->index << PAGE_SHIFT)
+				block = 0;
+			else
+				block = 1;
+		}
 		if (block) {
 			/* we have data - add it to the list to give to the
 			 * backing fs */

Attachment: pgpa7p0cS8gUM.pgp
Description: OpenPGP digital signature


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux