On Mon, 2019-09-09 at 22:43 -0400, simon gao wrote: > In larger clusters (hundreds of millions of files). We have to pin the > directory on a fixed mds now. Some op of client use USE_ANY_MDS mode > to access mds, which may result in requests being sent to noauth mds > and then forwarded to authmds. > the opt is used to reduce forward ops by sending req to auth mds. > --- > fs/ceph/mds_client.c | 7 ++++++- > fs/ceph/super.c | 7 +++++++ > fs/ceph/super.h | 1 + > 3 files changed, 14 insertions(+), 1 deletion(-) > > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c > index 920e9f0..aca4490 100644 > --- a/fs/ceph/mds_client.c > +++ b/fs/ceph/mds_client.c > @@ -878,6 +878,7 @@ static struct inode *get_nonsnap_parent(struct dentry *dentry) > static int __choose_mds(struct ceph_mds_client *mdsc, > struct ceph_mds_request *req) > { > + struct ceph_mount_options *ma = mdsc->fsc->mount_options; > struct inode *inode; > struct ceph_inode_info *ci; > struct ceph_cap *cap; > @@ -900,7 +901,11 @@ static int __choose_mds(struct ceph_mds_client *mdsc, > > if (mode == USE_RANDOM_MDS) > goto random; > - > + // force to send the req to auth mds > + if (ma->flags & CEPH_MOUNT_OPT_ALWAYS_AUTH && mode != USE_AUTH_MDS){ > + dout("change mode %d => USE_AUTH_MDS", mode); > + mode = USE_AUTH_MDS; > + } > inode = NULL; > if (req->r_inode) { > if (ceph_snap(req->r_inode) != CEPH_SNAPDIR) { > diff --git a/fs/ceph/super.c b/fs/ceph/super.c > index ab4868c..1e81ebc 100644 > --- a/fs/ceph/super.c > +++ b/fs/ceph/super.c > @@ -169,6 +169,7 @@ enum { > Opt_noquotadf, > Opt_copyfrom, > Opt_nocopyfrom, > + Opt_always_auth, > }; > > static match_table_t fsopt_tokens = { > @@ -210,6 +211,7 @@ enum { > {Opt_noquotadf, "noquotadf"}, > {Opt_copyfrom, "copyfrom"}, > {Opt_nocopyfrom, "nocopyfrom"}, > + {Opt_always_auth, "always_auth"}, > {-1, NULL} > }; > > @@ -381,6 +383,9 @@ static int parse_fsopt_token(char *c, void *private) > case Opt_noacl: > fsopt->sb_flags &= ~SB_POSIXACL; > break; > + case Opt_always_auth: > + fsopt->flags |= CEPH_MOUNT_OPT_ALWAYS_AUTH; > + break; > default: > BUG_ON(token); > } > @@ -563,6 +568,8 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) > seq_puts(m, ",nopoolperm"); > if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF) > seq_puts(m, ",noquotadf"); > + if (fsopt->flags & CEPH_MOUNT_OPT_ALWAYS_AUTH) > + seq_puts(m, ",always_auth"); > > #ifdef CONFIG_CEPH_FS_POSIX_ACL > if (fsopt->sb_flags & SB_POSIXACL) > diff --git a/fs/ceph/super.h b/fs/ceph/super.h > index 6b9f1ee..65f6423 100644 > --- a/fs/ceph/super.h > +++ b/fs/ceph/super.h > @@ -41,6 +41,7 @@ > #define CEPH_MOUNT_OPT_MOUNTWAIT (1<<12) /* mount waits if no mds is up */ > #define CEPH_MOUNT_OPT_NOQUOTADF (1<<13) /* no root dir quota in statfs */ > #define CEPH_MOUNT_OPT_NOCOPYFROM (1<<14) /* don't use RADOS 'copy-from' op */ > +#define CEPH_MOUNT_OPT_ALWAYS_AUTH (1<<15) /* send op to auth mds, not to replicative mds */ > > #define CEPH_MOUNT_OPT_DEFAULT \ > (CEPH_MOUNT_OPT_DCACHE | \ I've no particular objection here, but I'd prefer Greg's ack before we merge it, since he raised earlier concerns. If we are going to take it, then this will need to be rebased on top of the mount API conversion that's currently in ceph-client/testing branch. -- Jeff Layton <jlayton@xxxxxxxxxx>