An overlay file handle contains a real underlying filesystem file handle and decoding an overlay file handle typically starts by decoding the real file handle. Decoding a real directory file handle can be expensive, and it could be avoided in some cases by looking in the overlay inode cache before decoding a lower overlay file handle. In order to be able to do that, we store a copy of the lower file handle in overlay inode i_private field and hash the overlay inode by the value of lower file handle instead of by the lower inode pointer. A helper ovl_lookup_inode_fh() is introduced to lookup overlay inode by lower file handle. A followup patch will use this helper to optimize overlay lower file handle decode for the hot inode cache case. Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx> --- fs/overlayfs/inode.c | 80 +++++++++++++++++++++++++++++++++++++++++++++--- fs/overlayfs/namei.c | 2 +- fs/overlayfs/overlayfs.h | 9 ++++++ fs/overlayfs/super.c | 2 ++ 4 files changed, 87 insertions(+), 6 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index b6b44cf9ec26..5fef29e2f80f 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -651,12 +651,65 @@ static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, return true; } +/* + * Overlay inodes are hashed by lower file handle for NFS export, so decode can + * lookup inode cache before doing index lookup and decoding real file handles. + * For pure upper or non-indexed upper or without NFS export, we hash by real + * inode pointer as before NFS export. i_private holds a copy of lower file + * handle or the real inode pointer respectively. + */ +static int ovl_inode_test_fh(struct inode *inode, void *data) +{ + return ovl_test_flag(OVL_I_PRIVATE_FH, inode) && + ovl_fs_equal(inode->i_private, data); +} + +/* Compute hash from real fs uuid and real fs file handle */ +static unsigned long ovl_fh_hash(struct ovl_fh *fh) +{ + /* + * Mixing the uuid with the real file handle is needed for an + * overlay configuration of many lower layers on different fs. + * Other ovl_fh header values are not likely to add random bits. + */ + return full_name_long_hash(NULL, (const char *)&fh->uuid, + fh->len - offsetof(struct ovl_fh, uuid)); +} + +static struct ovl_fh *ovl_real_fh_hash(struct dentry *real, bool is_upper, + unsigned long *pkey) +{ + struct ovl_fh *fh = ovl_encode_real_fh(real, is_upper); + + if (!IS_ERR(fh)) + *pkey = ovl_fh_hash(fh); + + return fh; +} + +/* Lookup overlay inode by real inode file handle */ +struct inode *ovl_lookup_inode_fh(struct super_block *sb, struct ovl_fh *fh) +{ + return ilookup5(sb, ovl_fh_hash(fh), ovl_inode_test_fh, fh); +} + +/* Lookup overlay inode by real upper inode or by lower inode file handle */ struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, bool is_upper) { struct inode *inode, *key = d_inode(real); + struct ovl_fh *fh; - inode = ilookup5(sb, (unsigned long) key, ovl_inode_test, key); + if (is_upper) { + inode = ilookup5(sb, (unsigned long) key, ovl_inode_test, key); + } else { + fh = ovl_encode_real_fh(real, false); + if (IS_ERR(fh)) + return ERR_CAST(fh); + + inode = ovl_lookup_inode_fh(sb, fh); + kfree(fh); + } if (!inode) return NULL; @@ -709,23 +762,35 @@ struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; struct inode *inode; bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, index); + struct ovl_fh *fh = NULL; + unsigned long key; bool is_dir; if (!realinode) realinode = d_inode(lowerdentry); + /* Hash non-upper and indexed by origin fh for NFS export */ + if (sb->s_export_op && bylower) { + fh = ovl_real_fh_hash(lowerdentry, false, &key); + if (IS_ERR(fh)) + return ERR_CAST(fh); + } else if (bylower) { + key = (unsigned long) d_inode(lowerdentry); + } else if (upperdentry) { + key = (unsigned long) d_inode(upperdentry); + } + /* * Copy up origin (lower) may exist for non-indexed upper, but we must * not use lower as hash key if this is a broken hardlink. */ is_dir = S_ISDIR(realinode->i_mode); if (upperdentry || bylower) { - struct inode *key = d_inode(bylower ? lowerdentry : - upperdentry); unsigned int nlink = is_dir ? 1 : realinode->i_nlink; - inode = iget5_locked(sb, (unsigned long) key, - ovl_inode_test, ovl_inode_set, key); + inode = iget5_locked(sb, key, fh ? ovl_inode_test_fh : + ovl_inode_test, + ovl_inode_set, fh ?: (void *)key); if (!inode) goto out_nomem; if (!(inode->i_state & I_NEW)) { @@ -742,6 +807,10 @@ struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, dput(upperdentry); goto out; + } else if (fh) { + /* i_private keeps the reference to fh */ + ovl_set_flag(OVL_I_PRIVATE_FH, inode); + fh = NULL; } /* Recalculate nlink for non-dir due to indexing */ @@ -771,6 +840,7 @@ struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, if (inode->i_state & I_NEW) unlock_new_inode(inode); out: + kfree(fh); return inode; out_nomem: diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 287938911abe..74ccbe0813cd 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -393,7 +393,7 @@ static int ovl_verify_fh(struct dentry *dentry, const char *name, if (IS_ERR(ofh)) return PTR_ERR(ofh); - if (fh->len != ofh->len || memcmp(fh, ofh, fh->len)) + if (!ovl_fs_equal(fh, ofh)) err = -ESTALE; kfree(ofh); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index dd6c10e5a7db..b94db3f6f4ae 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -35,6 +35,8 @@ enum ovl_inode_flag { /* Non-merge dir that may contain whiteout entries */ OVL_WHITEOUTS, OVL_INDEX, + /* inode->i_priavte is a file handle copy */ + OVL_I_PRIVATE_FH, }; enum ovl_entry_flag { @@ -83,6 +85,12 @@ struct ovl_fh { u8 fid[0]; /* file identifier */ } __packed; +static inline bool ovl_fs_equal(const struct ovl_fh *fh1, + const struct ovl_fh *fh2) +{ + return fh1->len == fh2->len && !memcmp(fh1, fh2, fh1->len); +} + static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry) { int err = vfs_rmdir(dir, dentry); @@ -326,6 +334,7 @@ int ovl_update_time(struct inode *inode, struct timespec *ts, int flags); bool ovl_is_private_xattr(const char *name); struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev); +struct inode *ovl_lookup_inode_fh(struct super_block *sb, struct ovl_fh *fh); struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, bool is_upper); struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 7c24619ae7fc..d17dffc611cc 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -220,6 +220,8 @@ static void ovl_destroy_inode(struct inode *inode) kfree(oi->redirect); ovl_dir_cache_free(inode); mutex_destroy(&oi->lock); + if (ovl_test_flag(OVL_I_PRIVATE_FH, inode)) + kfree(inode->i_private); call_rcu(&inode->i_rcu, ovl_i_callback); } -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-unionfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html