Re: [PATCH 2/2] Enable fscache as an optional feature of ceph.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Sage,

Thanks for taking a look at this. No worries about the timing.

I added two extra changes into my branch located here:
https://bitbucket.org/adfin/linux-fs/commits/branch/forceph. The first
one is a fix for kernel deadlock. The second one makes fsc cache a
non-default mount option (akin to NFS).

Finally, I observed an occasional oops in the fscache that's fixed in
David's branch that's waiting to get into mainline. The fix for the
issue is here: http://git.kernel.org/cgit/linux/kernel/git/dhowells/linux-fs.git/commit/?h=fscache&id=82958c45e35963c93fc6cbe6a27752e2d97e9f9a.
I can only cause that issue by forcing the kernel to drop it's caches
in some cases.

Let me know if you any other feedback, or if I can help in anyway.

Thanks,
- Milosz

On Tue, May 28, 2013 at 1:11 PM, Sage Weil <sage@xxxxxxxxxxx> wrote:
> Hi Milosz,
>
> Just a heads up that I hope to take a closer look at the patch this
> afternoon or tomorrow.  Just catching up after the long weekend.
>
> Thanks!
> sage
>
>
> On Thu, 23 May 2013, Milosz Tanski wrote:
>
>> Enable fscache as an optional feature of ceph.
>>
>> Adding support for fscache to the Ceph filesystem. This would bring it to on
>> par with some of the other network filesystems in Linux (like NFS, AFS, etc...)
>>
>> This exploits the existing Ceph cache & lazyio capabilities.
>>
>> Signed-off-by: Milosz Tanski <milosz@xxxxxxxxx>
>> ---
>>  fs/ceph/Kconfig  |    9 ++++++
>>  fs/ceph/Makefile |    2 ++
>>  fs/ceph/addr.c   |   85 ++++++++++++++++++++++++++++++++++++++++--------------
>>  fs/ceph/caps.c   |   21 +++++++++++++-
>>  fs/ceph/file.c   |    9 ++++++
>>  fs/ceph/inode.c  |   25 ++++++++++++++--
>>  fs/ceph/super.c  |   25 ++++++++++++++--
>>  fs/ceph/super.h  |   12 ++++++++
>>  8 files changed, 162 insertions(+), 26 deletions(-)
>>
>> diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
>> index 49bc782..ac9a2ef 100644
>> --- a/fs/ceph/Kconfig
>> +++ b/fs/ceph/Kconfig
>> @@ -16,3 +16,12 @@ config CEPH_FS
>>
>>    If unsure, say N.
>>
>> +if CEPH_FS
>> +config CEPH_FSCACHE
>> + bool "Enable Ceph client caching support"
>> + depends on CEPH_FS=m && FSCACHE || CEPH_FS=y && FSCACHE=y
>> + help
>> +  Choose Y here to enable persistent, read-only local
>> +  caching support for Ceph clients using FS-Cache
>> +
>> +endif
>> diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
>> index bd35212..0af0678 100644
>> --- a/fs/ceph/Makefile
>> +++ b/fs/ceph/Makefile
>> @@ -9,3 +9,5 @@ ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
>>   mds_client.o mdsmap.o strings.o ceph_frag.o \
>>   debugfs.o
>>
>> +ceph-$(CONFIG_CEPH_FSCACHE) += cache.o
>> +
>> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
>> index 3e68ac1..fd3a1cc 100644
>> --- a/fs/ceph/addr.c
>> +++ b/fs/ceph/addr.c
>> @@ -11,6 +11,7 @@
>>
>>  #include "super.h"
>>  #include "mds_client.h"
>> +#include "cache.h"
>>  #include <linux/ceph/osd_client.h>
>>
>>  /*
>> @@ -149,11 +150,26 @@ static void ceph_invalidatepage(struct page
>> *page, unsigned long offset)
>>   struct ceph_inode_info *ci;
>>   struct ceph_snap_context *snapc = page_snap_context(page);
>>
>> - BUG_ON(!PageLocked(page));
>> - BUG_ON(!PagePrivate(page));
>>   BUG_ON(!page->mapping);
>>
>>   inode = page->mapping->host;
>> + ci = ceph_inode(inode);
>> +
>> + if (offset != 0) {
>> + dout("%p invalidatepage %p idx %lu partial dirty page\n",
>> +     inode, page, page->index);
>> + return;
>> + }
>> +
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + if (PageFsCache(page))
>> + ceph_invalidate_fscache_page(inode, page);
>> +#endif
>> +
>> + if (!PagePrivate(page))
>> + return;
>> +
>> + BUG_ON(!PageLocked(page));
>>
>>   /*
>>   * We can get non-dirty pages here due to races between
>> @@ -163,31 +179,32 @@ static void ceph_invalidatepage(struct page
>> *page, unsigned long offset)
>>   if (!PageDirty(page))
>>   pr_err("%p invalidatepage %p page not dirty\n", inode, page);
>>
>> - if (offset == 0)
>> - ClearPageChecked(page);
>> + ClearPageChecked(page);
>>
>> - ci = ceph_inode(inode);
>> - if (offset == 0) {
>> - dout("%p invalidatepage %p idx %lu full dirty page %lu\n",
>> -     inode, page, page->index, offset);
>> - ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
>> - ceph_put_snap_context(snapc);
>> - page->private = 0;
>> - ClearPagePrivate(page);
>> - } else {
>> - dout("%p invalidatepage %p idx %lu partial dirty page\n",
>> -     inode, page, page->index);
>> - }
>> + dout("%p invalidatepage %p idx %lu full dirty page %lu\n",
>> +     inode, page, page->index, offset);
>> +
>> + ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
>> + ceph_put_snap_context(snapc);
>> + page->private = 0;
>> + ClearPagePrivate(page);
>>  }
>>
>> -/* just a sanity check */
>>  static int ceph_releasepage(struct page *page, gfp_t g)
>>  {
>>   struct inode *inode = page->mapping ? page->mapping->host : NULL;
>>   dout("%p releasepage %p idx %lu\n", inode, page, page->index);
>>   WARN_ON(PageDirty(page));
>> - WARN_ON(PagePrivate(page));
>> - return 0;
>> +
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + /* Can we release the page from the cache? */
>> + if (PageFsCache(page) && ceph_release_fscache_page(page, g) == 0)
>> + return 0;
>> +#endif
>> + if (PagePrivate(page))
>> + return 0;
>> +
>> + return 1;
>>  }
>>
>>  /*
>> @@ -197,11 +214,18 @@ static int readpage_nounlock(struct file *filp,
>> struct page *page)
>>  {
>>   struct inode *inode = file_inode(filp);
>>   struct ceph_inode_info *ci = ceph_inode(inode);
>> - struct ceph_osd_client *osdc =
>> + struct ceph_osd_client *osdc =
>>   &ceph_inode_to_client(inode)->client->osdc;
>>   int err = 0;
>>   u64 len = PAGE_CACHE_SIZE;
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + err = ceph_readpage_from_fscache(inode, page);
>> +
>> + if (err == 0)
>> + goto out;
>> +#endif
>> +
>>   dout("readpage inode %p file %p page %p index %lu\n",
>>       inode, filp, page, page->index);
>>   err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
>> @@ -219,6 +243,10 @@ static int readpage_nounlock(struct file *filp,
>> struct page *page)
>>   }
>>   SetPageUptodate(page);
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + ceph_readpage_to_fscache(inode, page);
>> +#endif
>> +
>>  out:
>>   return err < 0 ? err : 0;
>>  }
>> @@ -262,6 +290,9 @@ static void finish_read(struct ceph_osd_request
>> *req, struct ceph_msg *msg)
>>   flush_dcache_page(page);
>>   SetPageUptodate(page);
>>   unlock_page(page);
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + ceph_readpage_to_fscache(inode, page);
>> +#endif
>>   page_cache_release(page);
>>   bytes -= PAGE_CACHE_SIZE;
>>   }
>> @@ -330,7 +361,7 @@ static int start_read(struct inode *inode, struct
>> list_head *page_list, int max)
>>   page = list_entry(page_list->prev, struct page, lru);
>>   BUG_ON(PageLocked(page));
>>   list_del(&page->lru);
>> -
>> +
>>   dout("start_read %p adding %p idx %lu\n", inode, page,
>>       page->index);
>>   if (add_to_page_cache_lru(page, &inode->i_data, page->index,
>> @@ -377,6 +408,14 @@ static int ceph_readpages(struct file *file,
>> struct address_space *mapping,
>>   int rc = 0;
>>   int max = 0;
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
>> + &nr_pages);
>> +
>> + if (rc == 0)
>> + goto out;
>> +#endif
>> +
>>   if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
>>   max = (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
>>   >> PAGE_SHIFT;
>> @@ -490,6 +529,10 @@ static int writepage_nounlock(struct page *page,
>> struct writeback_control *wbc)
>>      CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
>>   set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + ceph_readpage_to_fscache(inode, page);
>> +#endif
>> +
>>   set_page_writeback(page);
>>   err = ceph_osdc_writepages(osdc, ceph_vino(inode),
>>     &ci->i_layout, snapc,
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index da0f9b8..7e8d8d3 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -10,6 +10,7 @@
>>
>>  #include "super.h"
>>  #include "mds_client.h"
>> +#include "cache.h"
>>  #include <linux/ceph/decode.h>
>>  #include <linux/ceph/messenger.h>
>>
>> @@ -486,8 +487,14 @@ static void __check_cap_issue(struct
>> ceph_inode_info *ci, struct ceph_cap *cap,
>>   * i_rdcache_gen.
>>   */
>>   if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
>> -    (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0)
>> +    (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) {
>>   ci->i_rdcache_gen++;
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + /* Invalidate the cache for the whole file. */
>> + dout("Invalidating inode data cache: %p", &ci->vfs_inode);
>> + fscache_invalidate(ci->fscache);
>> +#endif
>> + }
>>
>>   /*
>>   * if we are newly issued FILE_SHARED, mark dir not complete; we
>> @@ -2356,6 +2363,12 @@ static void handle_cap_grant(struct inode
>> *inode, struct ceph_mds_caps *grant,
>>   if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) &&
>>      (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 &&
>>      !ci->i_wrbuffer_ref) {
>> +
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + /* Close the fscache on inode */
>> + ceph_fscache_unregister_inode_cookie(ci);
>> +#endif
>> +
>>   if (try_nonblocking_invalidate(inode) == 0) {
>>   revoked_rdcache = 1;
>>   } else {
>> @@ -2425,6 +2438,12 @@ static void handle_cap_grant(struct inode
>> *inode, struct ceph_mds_caps *grant,
>>   wake = 1;
>>   }
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + /* Register cache (if needed); perform this after amny size change. */
>> + if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)))
>> + ceph_fscache_register_inode_cookie(session->s_mdsc->fsc, ci);
>> +#endif
>> +
>>   /* check cap bits */
>>   wanted = __ceph_caps_wanted(ci);
>>   used = __ceph_caps_used(ci);
>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
>> index 656e169..e7ecc04 100644
>> --- a/fs/ceph/file.c
>> +++ b/fs/ceph/file.c
>> @@ -11,6 +11,7 @@
>>
>>  #include "super.h"
>>  #include "mds_client.h"
>> +#include "cache.h"
>>
>>  /*
>>   * Ceph file operations
>> @@ -67,10 +68,17 @@ out:
>>  static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
>>  {
>>   struct ceph_file_info *cf;
>> + struct ceph_inode_info *ci = ceph_inode(inode);
>> + struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
>>   int ret = 0;
>>
>>   switch (inode->i_mode & S_IFMT) {
>>   case S_IFREG:
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + spin_lock(&ci->i_ceph_lock);
>> + ceph_fscache_register_inode_cookie(fsc, ci);
>> + spin_lock(&ci->i_ceph_lock);
>> +#endif
>>   case S_IFDIR:
>>   dout("init_file %p %p 0%o (regular)\n", inode, file,
>>       inode->i_mode);
>> @@ -181,6 +189,7 @@ int ceph_open(struct inode *inode, struct file *file)
>>   spin_unlock(&ci->i_ceph_lock);
>>   return ceph_init_file(inode, file, fmode);
>>   }
>> +
>>   spin_unlock(&ci->i_ceph_lock);
>>
>>   dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
>> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
>> index be0f7e2..620b84c 100644
>> --- a/fs/ceph/inode.c
>> +++ b/fs/ceph/inode.c
>> @@ -12,6 +12,7 @@
>>
>>  #include "super.h"
>>  #include "mds_client.h"
>> +#include "cache.h"
>>  #include <linux/ceph/decode.h>
>>
>>  /*
>> @@ -377,6 +378,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
>>
>>   INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work);
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + ci->fscache = NULL;
>> +#endif
>> +
>>   return &ci->vfs_inode;
>>  }
>>
>> @@ -396,6 +401,10 @@ void ceph_destroy_inode(struct inode *inode)
>>
>>   dout("destroy_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode));
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + ceph_fscache_unregister_inode_cookie(ci);
>> +#endif
>> +
>>   ceph_queue_caps_release(inode);
>>
>>   /*
>> @@ -430,7 +439,6 @@ void ceph_destroy_inode(struct inode *inode)
>>   call_rcu(&inode->i_rcu, ceph_i_callback);
>>  }
>>
>> -
>>  /*
>>   * Helpers to fill in size, ctime, mtime, and atime.  We have to be
>>   * careful because either the client or MDS may have more up to date
>> @@ -633,6 +641,14 @@ static int fill_inode(struct inode *inode,
>>      le32_to_cpu(info->time_warp_seq),
>>      &ctime, &mtime, &atime);
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + /* Notify the cache that size has changed */
>> + if (queue_trunc && ci->fscache) {
>> + pr_info("size changed inode: %p cap flags\n", &ci->vfs_inode);
>> + fscache_attr_changed(ci->fscache);
>> + }
>> +#endif
>> +
>>   /* only update max_size on auth cap */
>>   if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
>>      ci->i_max_size != le64_to_cpu(info->max_size)) {
>> @@ -1066,7 +1082,7 @@ int ceph_fill_trace(struct super_block *sb,
>> struct ceph_mds_request *req,
>>   * complete.
>>   */
>>   ceph_set_dentry_offset(req->r_old_dentry);
>> - dout("dn %p gets new offset %lld\n", req->r_old_dentry,
>> + dout("dn %p gets new offset %lld\n", req->r_old_dentry,
>>       ceph_dentry(req->r_old_dentry)->offset);
>>
>>   dn = req->r_old_dentry;  /* use old_dentry */
>> @@ -1430,6 +1446,11 @@ static void ceph_invalidate_work(struct
>> work_struct *work)
>>   orig_gen = ci->i_rdcache_gen;
>>   spin_unlock(&ci->i_ceph_lock);
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + pr_info("cache invalidating inode: %p cap flags\n", &ci->vfs_inode);
>> + fscache_invalidate(ci->fscache);
>> +#endif
>> +
>>   truncate_inode_pages(&inode->i_data, 0);
>>
>>   spin_lock(&ci->i_ceph_lock);
>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>> index 7d377c9..7847ef7 100644
>> --- a/fs/ceph/super.c
>> +++ b/fs/ceph/super.c
>> @@ -17,6 +17,7 @@
>>
>>  #include "super.h"
>>  #include "mds_client.h"
>> +#include "cache.h"
>>
>>  #include <linux/ceph/ceph_features.h>
>>  #include <linux/ceph/decode.h>
>> @@ -530,6 +531,11 @@ static struct ceph_fs_client
>> *create_fs_client(struct ceph_mount_options *fsopt,
>>   if (!fsc->wb_pagevec_pool)
>>   goto fail_trunc_wq;
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + /* fscache */
>> + ceph_fscache_register_fsid_cookie(fsc);
>> +#endif
>> +
>>   /* caps */
>>   fsc->min_caps = fsopt->max_readdir;
>>
>> @@ -554,6 +560,10 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
>>  {
>>   dout("destroy_fs_client %p\n", fsc);
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + ceph_fscache_unregister_fsid_cookie(fsc);
>> +#endif
>> +
>>   destroy_workqueue(fsc->wb_wq);
>>   destroy_workqueue(fsc->pg_inv_wq);
>>   destroy_workqueue(fsc->trunc_wq);
>> @@ -588,6 +598,8 @@ static void ceph_inode_init_once(void *foo)
>>
>>  static int __init init_caches(void)
>>  {
>> + int error = -ENOMEM;
>> +
>>   ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
>>        sizeof(struct ceph_inode_info),
>>        __alignof__(struct ceph_inode_info),
>> @@ -611,15 +623,19 @@ static int __init init_caches(void)
>>   if (ceph_file_cachep == NULL)
>>   goto bad_file;
>>
>> - return 0;
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + if ((error = fscache_register_netfs(&ceph_cache_netfs)))
>> + goto bad_file;
>> +#endif
>>
>> + return 0;
>>  bad_file:
>>   kmem_cache_destroy(ceph_dentry_cachep);
>>  bad_dentry:
>>   kmem_cache_destroy(ceph_cap_cachep);
>>  bad_cap:
>>   kmem_cache_destroy(ceph_inode_cachep);
>> - return -ENOMEM;
>> + return error;
>>  }
>>
>>  static void destroy_caches(void)
>> @@ -629,10 +645,15 @@ static void destroy_caches(void)
>>   * destroy cache.
>>   */
>>   rcu_barrier();
>> +
>>   kmem_cache_destroy(ceph_inode_cachep);
>>   kmem_cache_destroy(ceph_cap_cachep);
>>   kmem_cache_destroy(ceph_dentry_cachep);
>>   kmem_cache_destroy(ceph_file_cachep);
>> +
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + fscache_unregister_netfs(&ceph_cache_netfs);
>> +#endif
>>  }
>>
>>
>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>> index 8696be2..2980337 100644
>> --- a/fs/ceph/super.h
>> +++ b/fs/ceph/super.h
>> @@ -16,6 +16,10 @@
>>
>>  #include <linux/ceph/libceph.h>
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> +#include <linux/fscache.h>
>> +#endif
>> +
>>  /* f_type in struct statfs */
>>  #define CEPH_SUPER_MAGIC 0x00c36400
>>
>> @@ -90,6 +94,10 @@ struct ceph_fs_client {
>>   struct dentry *debugfs_bdi;
>>   struct dentry *debugfs_mdsc, *debugfs_mdsmap;
>>  #endif
>> +
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + struct fscache_cookie *fscache;
>> +#endif
>>  };
>>
>>
>> @@ -319,6 +327,10 @@ struct ceph_inode_info {
>>
>>   struct work_struct i_vmtruncate_work;
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + struct fscache_cookie *fscache;
>> +#endif
>> +
>>   struct inode vfs_inode; /* at end */
>>  };
>>
>> --
>> 1.7.9.5
>> --
>> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
>> the body of a message to majordomo@xxxxxxxxxxxxxxx
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
>>
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [CEPH Users]     [Ceph Large]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux