Current ceph uses FSID as primary index key of fscache data. This allows ceph to retain cached data across remount. But this causes problem (kernel opps, fscache does not support sharing data) when a filesystem get mounted (with fscache enabled) several times. The fix is adding a new mount option, which makes ceph use client ID as primary index key. Client ID is unique for each mount. For the old fscache mount option, only allow one fscache instance for each filesystem. Signed-off-by: "Yan, Zheng" <zyan@xxxxxxxxxx> --- fs/ceph/cache.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++----- fs/ceph/super.c | 32 ++++++++++------ fs/ceph/super.h | 5 ++- 3 files changed, 131 insertions(+), 23 deletions(-) diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index 4e7421c..b4956b4 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -35,8 +35,17 @@ struct fscache_netfs ceph_cache_netfs = { .version = 0, }; -static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data, - void *buffer, uint16_t maxbuf) +static DEFINE_MUTEX(ceph_fscache_fsid_lock); +static LIST_HEAD(ceph_fscache_fsid_list); + +struct ceph_fscache_fsid { + struct list_head list; + struct fscache_cookie *fscache; + struct ceph_fsid fsid; +}; + +static uint16_t ceph_fscache_fsid_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t maxbuf) { const struct ceph_fs_client* fsc = cookie_netfs_data; uint16_t klen; @@ -52,7 +61,32 @@ static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data, static const struct fscache_cookie_def ceph_fscache_fsid_object_def = { .name = "CEPH.fsid", .type = FSCACHE_COOKIE_TYPE_INDEX, - .get_key = ceph_fscache_session_get_key, + .get_key = ceph_fscache_fsid_get_key, +}; + +static uint16_t ceph_fscache_client_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t maxbuf) +{ + const struct ceph_fs_client* fsc = cookie_netfs_data; + const struct ceph_fsid *fsid = &fsc->client->fsid; + u64 client_id = fsc->client->monc.auth->global_id; + uint16_t fsid_len, key_len; + + fsid_len = sizeof(*fsid); + key_len = fsid_len + sizeof(client_id); + if (key_len > maxbuf) + return 0; + + memcpy(buffer, fsid, fsid_len); + memcpy(buffer + fsid_len, &client_id, sizeof(client_id)); + + return key_len; +} + +static const struct fscache_cookie_def ceph_fscache_client_object_def = { + .name = "CEPH.client", + .type = FSCACHE_COOKIE_TYPE_INDEX, + .get_key = ceph_fscache_client_get_key, }; int ceph_fscache_register(void) @@ -67,13 +101,54 @@ void ceph_fscache_unregister(void) int ceph_fscache_register_fs(struct ceph_fs_client* fsc) { - fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index, - &ceph_fscache_fsid_object_def, - fsc, true); - if (!fsc->fscache) - pr_err("Unable to register fsid: %p fscache cookie\n", fsc); + const struct ceph_fsid *fsid = &fsc->client->fsid; + struct ceph_fscache_fsid *ent; + int err = 0; + + if (fsc->mount_options->flags & CEPH_MOUNT_OPT_TMPFSCACHE) { + fsc->fscache = fscache_acquire_cookie( + ceph_cache_netfs.primary_index, + &ceph_fscache_client_object_def, + fsc, true); + if (!fsc->fscache) + pr_err("Unable to register fsid: %p " + "fscache cookie\n", fsc); + } else { + mutex_lock(&ceph_fscache_fsid_lock); + list_for_each_entry(ent, &ceph_fscache_fsid_list, list) { + if (!memcmp(&ent->fsid, fsid, sizeof(*fsid))) { + pr_err("fscache cookie already registered " + "for fsid %pU\n", fsid); + pr_err(" use tmpfsc mount option instead\n"); + err = -EBUSY; + goto out_unlock; + } + } - return 0; + ent = kzalloc(sizeof(*ent), GFP_KERNEL); + if (!ent) { + err = -ENOMEM; + goto out_unlock; + } + + fsc->fscache = fscache_acquire_cookie( + ceph_cache_netfs.primary_index, + &ceph_fscache_fsid_object_def, + fsc, true); + + if (fsc->fscache) { + memcpy(&ent->fsid, fsid, sizeof(*fsid)); + ent->fscache = fsc->fscache; + list_add_tail(&ent->list, &ceph_fscache_fsid_list); + } else { + kfree(ent); + pr_err("Unable to register fsid: %p " + "fscache cookie\n", fsc); + } +out_unlock: + mutex_unlock(&ceph_fscache_fsid_lock); + } + return err; } static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data, @@ -349,7 +424,29 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page) void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc) { - fscache_relinquish_cookie(fsc->fscache, 0); + if (fscache_cookie_valid(fsc->fscache)) { + if (fsc->fscache->def == &ceph_fscache_fsid_object_def) { + const struct ceph_fsid *fsid = &fsc->client->fsid; + struct ceph_fscache_fsid *ent, *found = NULL; + + mutex_lock(&ceph_fscache_fsid_lock); + list_for_each_entry(ent, &ceph_fscache_fsid_list, list) { + if (!memcmp(&ent->fsid, fsid, sizeof(*fsid))) { + found = ent; + break; + } + } + if (found) { + WARN_ON_ONCE(found->fscache != fsc->fscache); + list_del(&found->list); + kfree(found); + } else { + WARN_ON_ONCE(true); + } + mutex_unlock(&ceph_fscache_fsid_lock); + } + __fscache_relinquish_cookie(fsc->fscache, 0); + } fsc->fscache = NULL; } diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 14e78dd..bb6dd7f 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -134,6 +134,7 @@ enum { Opt_ino32, Opt_noino32, Opt_fscache, + Opt_tmpfscache, Opt_nofscache, Opt_poolperm, Opt_nopoolperm, @@ -170,6 +171,7 @@ static match_table_t fsopt_tokens = { {Opt_ino32, "ino32"}, {Opt_noino32, "noino32"}, {Opt_fscache, "fsc"}, + {Opt_tmpfscache, "tmpfsc"}, {Opt_nofscache, "nofsc"}, {Opt_poolperm, "poolperm"}, {Opt_nopoolperm, "nopoolperm"}, @@ -281,6 +283,10 @@ static int parse_fsopt_token(char *c, void *private) case Opt_fscache: fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; break; + case Opt_tmpfscache: + fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE | + CEPH_MOUNT_OPT_TMPFSCACHE; + break; case Opt_nofscache: fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; break; @@ -475,8 +481,12 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) seq_puts(m, ",noasyncreaddir"); if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) seq_puts(m, ",nodcache"); - if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) - seq_puts(m, ",fsc"); + if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { + if (fsopt->flags & CEPH_MOUNT_OPT_TMPFSCACHE) + seq_puts(m, ",tmpfsc"); + else + seq_puts(m, ",fsc"); + } if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) seq_puts(m, ",nopoolperm"); @@ -597,18 +607,11 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, if (!fsc->wb_pagevec_pool) goto fail_trunc_wq; - /* setup fscache */ - if ((fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) && - (ceph_fscache_register_fs(fsc) != 0)) - goto fail_fscache; - /* caps */ fsc->min_caps = fsopt->max_readdir; return fsc; -fail_fscache: - ceph_fscache_unregister_fs(fsc); fail_trunc_wq: destroy_workqueue(fsc->trunc_wq); fail_pg_inv_wq: @@ -626,8 +629,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc) { dout("destroy_fs_client %p\n", fsc); - ceph_fscache_unregister_fs(fsc); - destroy_workqueue(fsc->wb_wq); destroy_workqueue(fsc->pg_inv_wq); destroy_workqueue(fsc->trunc_wq); @@ -820,6 +821,13 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) if (err < 0) goto out; + /* setup fscache */ + if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) { + err = ceph_fscache_register_fs(fsc); + if (err < 0) + goto out; + } + if (!fsc->mount_options->server_path) { path = ""; dout("mount opening path \\t\n"); @@ -1042,6 +1050,8 @@ static void ceph_kill_sb(struct super_block *s) fsc->client->extra_mon_dispatch = NULL; ceph_fs_debugfs_cleanup(fsc); + ceph_fscache_unregister_fs(fsc); + ceph_mdsc_destroy(fsc); destroy_fs_client(fsc); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index f8a0aba..21e5562 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -36,8 +36,9 @@ #define CEPH_MOUNT_OPT_INO32 (1<<8) /* 32 bit inos */ #define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */ #define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */ -#define CEPH_MOUNT_OPT_NOPOOLPERM (1<<11) /* no pool permission check */ -#define CEPH_MOUNT_OPT_MOUNTWAIT (1<<12) /* mount waits if no mds is up */ +#define CEPH_MOUNT_OPT_TMPFSCACHE (1<<11) /* use temp fscache */ +#define CEPH_MOUNT_OPT_NOPOOLPERM (1<<12) /* no pool permission check */ +#define CEPH_MOUNT_OPT_MOUNTWAIT (1<<13) /* mount waits if no mds is up */ #define CEPH_MOUNT_OPT_DEFAULT CEPH_MOUNT_OPT_DCACHE -- 2.9.4 -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html