On Fri, 2020-05-29 at 17:19 +0200, Ilya Dryomov wrote: > Expose balanced and localized reads through read_policy=balance > and read_policy=localize. The default is to read from primary. > > Signed-off-by: Ilya Dryomov <idryomov@xxxxxxxxx> > --- > include/linux/ceph/libceph.h | 2 ++ > net/ceph/ceph_common.c | 26 ++++++++++++++++++++++++++ > net/ceph/osd_client.c | 5 ++++- > 3 files changed, 32 insertions(+), 1 deletion(-) > > diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h > index 4733959f1ec7..0a9f807ceda6 100644 > --- a/include/linux/ceph/libceph.h > +++ b/include/linux/ceph/libceph.h > @@ -52,6 +52,8 @@ struct ceph_options { > unsigned long osd_idle_ttl; /* jiffies */ > unsigned long osd_keepalive_timeout; /* jiffies */ > unsigned long osd_request_timeout; /* jiffies */ > + unsigned int osd_req_flags; /* CEPH_OSD_FLAG_*, applied to > + each OSD request */ > > /* > * any type that can't be simply compared or doesn't need > diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c > index 6d495685ee03..1a834cb0d04d 100644 > --- a/net/ceph/ceph_common.c > +++ b/net/ceph/ceph_common.c > @@ -265,6 +265,7 @@ enum { > Opt_key, > Opt_ip, > Opt_crush_location, > + Opt_read_policy, > /* string args above */ > Opt_share, > Opt_crc, > @@ -274,6 +275,17 @@ enum { > Opt_abort_on_full, > }; > > +enum { > + Opt_read_policy_balance, > + Opt_read_policy_localize, > +}; > + > +static const struct constant_table ceph_param_read_policy[] = { > + {"balance", Opt_read_policy_balance}, > + {"localize", Opt_read_policy_localize}, > + {} > +}; > + > static const struct fs_parameter_spec ceph_parameters[] = { > fsparam_flag ("abort_on_full", Opt_abort_on_full), > fsparam_flag_no ("cephx_require_signatures", Opt_cephx_require_signatures), > @@ -290,6 +302,8 @@ static const struct fs_parameter_spec ceph_parameters[] = { > fsparam_u32 ("osdkeepalive", Opt_osdkeepalivetimeout), > __fsparam (fs_param_is_s32, "osdtimeout", Opt_osdtimeout, > fs_param_deprecated, NULL), > + fsparam_enum ("read_policy", Opt_read_policy, > + ceph_param_read_policy), > fsparam_string ("secret", Opt_secret), > fsparam_flag_no ("share", Opt_share), > fsparam_flag_no ("tcp_nodelay", Opt_tcp_nodelay), > @@ -470,6 +484,18 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, > return err; > } > break; > + case Opt_read_policy: > + switch (result.uint_32) { > + case Opt_read_policy_balance: > + opt->osd_req_flags |= CEPH_OSD_FLAG_BALANCE_READS; > + break; > + case Opt_read_policy_localize: > + opt->osd_req_flags |= CEPH_OSD_FLAG_LOCALIZE_READS; > + break; > + default: > + BUG(); > + } > + break; Suppose I specify "-o read_policy=balance,read_policy=localize". Principle of least surprise says "last one wins", but you'll end up with both flags set here, and I think the final result would still be "balance". I think it'd probably be best to rework this so that the last option specified is what you get. I also think you want a way to explicitly set it back to default behavior (read_policy=primary ?), as it's not uncommon for people to specify mount options in fstab but then append to them on the command line. e.g.: # mount /mnt/cephfs -o read_policy=primary ...when fstab already has read_policy=balance. > case Opt_osdtimeout: > warn_plog(&log, "Ignoring osdtimeout"); > diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c > index 15c3afa8089b..da7046db9fbe 100644 > --- a/net/ceph/osd_client.c > +++ b/net/ceph/osd_client.c > @@ -2425,11 +2425,14 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked) > > static void account_request(struct ceph_osd_request *req) > { > + struct ceph_osd_client *osdc = req->r_osdc; > + > WARN_ON(req->r_flags & (CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK)); > WARN_ON(!(req->r_flags & (CEPH_OSD_FLAG_READ | CEPH_OSD_FLAG_WRITE))); > > req->r_flags |= CEPH_OSD_FLAG_ONDISK; > - atomic_inc(&req->r_osdc->num_requests); > + req->r_flags |= osdc->client->options->osd_req_flags; > + atomic_inc(&osdc->num_requests); > > req->r_start_stamp = jiffies; > req->r_start_latency = ktime_get(); -- Jeff Layton <jlayton@xxxxxxxxxx>