Re: [PATCH] libceph: move away from global osd_req_flags

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Ilya Dryomov <idryomov@xxxxxxxxx> writes:

> osd_req_flags is overly general and doesn't suit its only user
> (read_from_replica option) well:
>
> - applying osd_req_flags in account_request() affects all OSD
>   requests, including linger (i.e. watch and notify).  However,
>   linger requests should always go to the primary even though
>   some of them are reads (e.g. notify has side effects but it
>   is a read because it doesn't result in mutation on the OSDs).
>
> - calls to class methods that are reads are allowed to go to
>   the replica, but most such calls issued for "rbd map" and/or
>   exclusive lock transitions are requested to be resent to the
>   primary via EAGAIN, doubling the latency.
>
> Get rid of global osd_req_flags and set read_from_replica flag
> only on specific OSD requests instead.
>
> Fixes: 8ad44d5e0d1e ("libceph: read_from_replica option")

Since this isn't yet in mainline, can't you simply merge these two
patches?

> Signed-off-by: Ilya Dryomov <idryomov@xxxxxxxxx>
> ---
>  drivers/block/rbd.c          |  4 +++-
>  include/linux/ceph/libceph.h |  4 ++--
>  net/ceph/ceph_common.c       | 14 ++++++--------
>  net/ceph/osd_client.c        |  3 +--
>  4 files changed, 12 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index e02089d550a4..b2bb2f10562a 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -1451,8 +1451,10 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req)
>  static void rbd_osd_format_read(struct ceph_osd_request *osd_req)
>  {
>  	struct rbd_obj_request *obj_request = osd_req->r_priv;
> +	struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev;
> +	struct ceph_options *opt = rbd_dev->rbd_client->client->options;
>  
> -	osd_req->r_flags = CEPH_OSD_FLAG_READ;
> +	osd_req->r_flags = CEPH_OSD_FLAG_READ | opt->read_from_replica;
>  	osd_req->r_snapid = obj_request->img_request->snap_id;
>  }
>  
> diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
> index 2247e71beb83..e5ed1c541e7f 100644
> --- a/include/linux/ceph/libceph.h
> +++ b/include/linux/ceph/libceph.h
> @@ -52,8 +52,7 @@ struct ceph_options {
>  	unsigned long osd_idle_ttl;		/* jiffies */
>  	unsigned long osd_keepalive_timeout;	/* jiffies */
>  	unsigned long osd_request_timeout;	/* jiffies */
> -
> -	u32 osd_req_flags;  /* CEPH_OSD_FLAG_*, applied to each OSD request */
> +	u32 read_from_replica;  /* CEPH_OSD_FLAG_BALANCE/LOCALIZE_READS */
>  
>  	/*
>  	 * any type that can't be simply compared or doesn't need
> @@ -76,6 +75,7 @@ struct ceph_options {
>  #define CEPH_OSD_KEEPALIVE_DEFAULT	msecs_to_jiffies(5 * 1000)
>  #define CEPH_OSD_IDLE_TTL_DEFAULT	msecs_to_jiffies(60 * 1000)
>  #define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT 0  /* no timeout */
> +#define CEPH_READ_FROM_REPLICA_DEFAULT	0  /* read from primary */
>  
>  #define CEPH_MONC_HUNT_INTERVAL		msecs_to_jiffies(3 * 1000)
>  #define CEPH_MONC_PING_INTERVAL		msecs_to_jiffies(10 * 1000)
> diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
> index 9bab3e9a039b..b7705e91ae9a 100644
> --- a/net/ceph/ceph_common.c
> +++ b/net/ceph/ceph_common.c
> @@ -333,6 +333,7 @@ struct ceph_options *ceph_alloc_options(void)
>  	opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
>  	opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
>  	opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT;
> +	opt->read_from_replica = CEPH_READ_FROM_REPLICA_DEFAULT;
>  	return opt;
>  }
>  EXPORT_SYMBOL(ceph_alloc_options);
> @@ -491,16 +492,13 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
>  	case Opt_read_from_replica:
>  		switch (result.uint_32) {
>  		case Opt_read_from_replica_no:
> -			opt->osd_req_flags &= ~(CEPH_OSD_FLAG_BALANCE_READS |
> -						CEPH_OSD_FLAG_LOCALIZE_READS);
> +			opt->read_from_replica = 0;

CEPH_READ_FROM_REPLICA_DEFAULT?

>  			break;
>  		case Opt_read_from_replica_balance:
> -			opt->osd_req_flags |= CEPH_OSD_FLAG_BALANCE_READS;
> -			opt->osd_req_flags &= ~CEPH_OSD_FLAG_LOCALIZE_READS;
> +			opt->read_from_replica = CEPH_OSD_FLAG_BALANCE_READS;
>  			break;
>  		case Opt_read_from_replica_localize:
> -			opt->osd_req_flags |= CEPH_OSD_FLAG_LOCALIZE_READS;
> -			opt->osd_req_flags &= ~CEPH_OSD_FLAG_BALANCE_READS;
> +			opt->read_from_replica = CEPH_OSD_FLAG_LOCALIZE_READS;
>  			break;
>  		default:
>  			BUG();
> @@ -614,9 +612,9 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
>  		}
>  		seq_putc(m, ',');
>  	}
> -	if (opt->osd_req_flags & CEPH_OSD_FLAG_BALANCE_READS) {
> +	if (opt->read_from_replica == CEPH_OSD_FLAG_BALANCE_READS) {
>  		seq_puts(m, "read_from_replica=balance,");
> -	} else if (opt->osd_req_flags & CEPH_OSD_FLAG_LOCALIZE_READS) {
> +	} else if (opt->read_from_replica == CEPH_OSD_FLAG_LOCALIZE_READS) {
>  		seq_puts(m, "read_from_replica=localize,");
>  	}
>  
> diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
> index 4fea3c33af2a..b151fe163821 100644
> --- a/net/ceph/osd_client.c
> +++ b/net/ceph/osd_client.c
> @@ -1117,10 +1117,10 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
>  				       truncate_size, truncate_seq);
>  	}
>  
> -	req->r_flags = flags;
>  	req->r_base_oloc.pool = layout->pool_id;
>  	req->r_base_oloc.pool_ns = ceph_try_get_string(layout->pool_ns);
>  	ceph_oid_printf(&req->r_base_oid, "%llx.%08llx", vino.ino, objnum);
> +	req->r_flags = flags | osdc->client->options->read_from_replica;
>  
>  	req->r_snapid = vino.snap;
>  	if (flags & CEPH_OSD_FLAG_WRITE)
> @@ -2437,7 +2437,6 @@ static void account_request(struct ceph_osd_request *req)
>  	WARN_ON(!(req->r_flags & (CEPH_OSD_FLAG_READ | CEPH_OSD_FLAG_WRITE)));
>  
>  	req->r_flags |= CEPH_OSD_FLAG_ONDISK;
> -	req->r_flags |= osdc->client->options->osd_req_flags;
>  	atomic_inc(&osdc->num_requests);
>  
>  	req->r_start_stamp = jiffies;

Cheers,
-- 
Luis



[Index of Archives]     [CEPH Users]     [Ceph Large]     [Ceph Dev]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux