Re: New read-balancing patch for dm-raid1.c

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Jon,

the "if (ms->read_count_reset)" should read "if (!ms->read_count_reset)"
to disable read balancing and the 2 while loops could be merged into one.

Heinz

On Mon, Oct 09, 2006 at 11:13:59AM -0500, Jonathan Brassow wrote:
> Looking for some opinions/comments on this read balancing patch.  (Basic
> implementation details in patch header.)
> 
> I haven't gone to too much trouble to make the read balancing code
> generic, since there is only one policy right now.  In the future, I
> will likely have to swap out the three fields in the mirror set
> structure for a void pointer that will contain data for various other
> read balancing policies.  I have, however, constructed the new table
> line arguments in such a way that the read-balancing arguments are easy
> to identify and pass to an initialization function without knowing too
> much about the actual arguments.
> 
> One thing I'm not all that fond of is the order that the mirror is set
> up.  The read-balancing arguments are parsed before 'alloc_context' is
> called - meaning that new read-balancing structures would be allocated
> first, then linked in later... and read-balancing is set up before the
> default mirror device is chosen.  I don't really care for
> 'alloc_context' setting up 'ms->read_mirror', while other fields are
> initialized after 'alloc_context'.  I don't think this is too much of a
> problem though.
> 
>  brassow
> 
> This patch adds read balancing.  The round-robin method is the first
> to be implemented, but provisions are made for others to be implemented
> in the future.
> 
> The allowable mirror table arguments has been expanded.  It is now
> as follows:
> 
>        <start> <length> mirror \
>        <log-type> <# log params> <log params> \
> *new*  [readbalance <# rb params> <rb params>] \
>        <# mirrors> <device1> <offset1> ... <deviceN> <offsetN>
> 
> The new read balancing arguments are optional, and the only
> currently valid read balancing arguments are:
>        readbalance 2 roundrobin <count>
> Where 'count' is the number of I/Os that go to a device before
> switching to the next device.
> 
> 'struct mirror *choose_mirror(struct mirror_set *ms)' is the
> function that chooses the read mirror based on read balancing
> policy.  It should only be called when the region of the
> mirror being read from is known to be in-sync.  'choose_mirror'
> will avoid selecting devices with error_counts > 0 - returning
> NULL if no devices are available.
> 
> Index: linux-2.6.18/drivers/md/dm-raid1.c
> ===================================================================
> --- linux-2.6.18.orig/drivers/md/dm-raid1.c	2006-10-05 13:38:27.000000000 -0500
> +++ linux-2.6.18/drivers/md/dm-raid1.c	2006-10-09 10:31:47.000000000 -0500
> @@ -135,6 +135,9 @@ struct mirror_set {
>  	struct mirror *default_mirror;	/* Default mirror */
>  
>  	unsigned int nr_mirrors;
> +	unsigned int read_count_reset; /* number of reads before switching */
> +	atomic_t read_count;      /* Read counter for read balancing */
> +	struct mirror *read_mirror; /* Last mirror read. */
>  	struct mirror mirror[0];
>  };
>  
> @@ -686,10 +689,59 @@ static void do_recovery(struct mirror_se
>  /*-----------------------------------------------------------------
>   * Reads
>   *---------------------------------------------------------------*/
> -static struct mirror *choose_mirror(struct mirror_set *ms, sector_t sector)
> +
> +/* choose_mirror
> + * @ms: the mirror set
> + *
> + * This function is used for read balancing.
> + *
> + * Returns: chosen mirror, or NULL on failure
> + */
> +static struct mirror *choose_mirror(struct mirror_set *ms)
>  {
> -	/* FIXME: add read balancing */
> -	return ms->default_mirror;
> +	unsigned int i;
> +	struct mirror *start_mirror = ms->read_mirror;
> +
> +	/*
> +	 * If 'read_count_reset' is zero here, then read-balancing
> +	 * is disabled.
> +	 */
> +	if (ms->read_count_reset)) {
> +		do {
> +			if (likely(!atomic_read(&ms->read_mirror->error_count)))
> +				goto use_mirror;
> +
> +			if (ms->read_mirror-- == ms->mirror)
> +				ms->read_mirror += ms->nr_mirrors;
> +		} while (ms->read_mirror != start_mirror);
> +		return NULL;
> +	}
> +
> +	/*
> +	 * Perform ms->read_count_reset reads on each working mirror then
> +	 * advance to the next one.  start_mirror stores
> +	 * the first we tried, so we know when we're done.
> +	 */
> +	do {
> +		if (likely(!atomic_read(&ms->read_mirror->error_count)) &&
> +		    !atomic_dec_and_test(&ms->read_count))
> +			goto use_mirror;
> +
> +		atomic_set(&ms->read_count, ms->read_count_reset);
> +
> +		if (ms->read_mirror-- == ms->mirror)
> +			ms->read_mirror += ms->nr_mirrors;
> +	} while (ms->read_mirror != start_mirror);
> +
> +	/*
> +	 * We've rejected every mirror.
> +	 * Confirm the start_mirror can be used.
> +	 */
> +	if (unlikely(atomic_read(&ms->read_mirror->error_count)))
> +		return NULL;
> +
> +use_mirror:
> +	return ms->read_mirror;
>  }
>  
>  /*
> @@ -714,7 +766,7 @@ static void do_reads(struct mirror_set *
>  		 * We can only read balance if the region is in sync.
>  		 */
>  		if (rh_in_sync(&ms->rh, region, 0))
> -			m = choose_mirror(ms, bio->bi_sector);
> +			m = choose_mirror(ms);
>  		else
>  			m = ms->default_mirror;
>  
> @@ -907,6 +959,7 @@ static struct mirror_set *alloc_context(
>  	ms->nr_mirrors = nr_mirrors;
>  	ms->nr_regions = dm_sector_div_up(ti->len, region_size);
>  	ms->in_sync = 0;
> +	ms->read_mirror = &ms->mirror[DEFAULT_MIRROR];
>  	ms->default_mirror = &ms->mirror[DEFAULT_MIRROR];
>  
>  	if (rh_init(&ms->rh, ms, dl, region_size, ms->nr_regions)) {
> @@ -1028,6 +1081,7 @@ static struct dirty_log *create_dirty_lo
>  static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>  {
>  	int r;
> +	unsigned int read_count_reset = 0, read_balance_args;
>  	unsigned int nr_mirrors, m, args_used;
>  	struct mirror_set *ms;
>  	struct dirty_log *dl;
> @@ -1039,6 +1093,29 @@ static int mirror_ctr(struct dm_target *
>  	argv += args_used;
>  	argc -= args_used;
>  
> +	if (!argc) {
> +		ti->error = "Invalid number of arguments";
> +		dm_destroy_dirty_log(dl);
> +		return -EINVAL;
> +	} else if (!strcmp("readbalance", argv[0]) &&
> +		   (sscanf(argv[1], "%u", &read_balance_args) == 1)) {
> +		/*
> +		 * When there is more than one read-balancing policy,
> +		 * we will push this next if statement into an
> +		 * initialization function.
> +		 */
> +		if ((read_balance_args == 2) &&
> +		    !strcmp("roundrobin", argv[2]) &&
> +		    (sscanf(argv[3], "%u", &read_count_reset) == 1)) {
> +			argv += 4;
> +			argc -= 4;
> +		} else {
> +			ti->error = "Invalid read-balancing arguments";
> +			dm_destroy_dirty_log(dl);
> +			return -EINVAL;
> +		}
> +	}
> +
>  	if (!argc || sscanf(argv[0], "%u", &nr_mirrors) != 1 ||
>  	    nr_mirrors < 2 || nr_mirrors > KCOPYD_MAX_REGIONS + 1) {
>  		ti->error = "Invalid number of mirrors";
> @@ -1060,6 +1137,8 @@ static int mirror_ctr(struct dm_target *
>  		return -ENOMEM;
>  	}
>  
> +	ms->read_count_reset = read_count_reset;
> +
>  	/* Get the mirror parameter sets */
>  	for (m = 0; m < nr_mirrors; m++) {
>  		r = get_mirror(ms, ti, m, argv);
> @@ -1147,7 +1226,7 @@ static int mirror_map(struct dm_target *
>  		return 0;
>  	}
>  
> -	m = choose_mirror(ms, bio->bi_sector);
> +	m = choose_mirror(ms);
>  	if (!m)
>  		return -EIO;

=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-

Heinz Mauelshagen                                 Red Hat GmbH
Consulting Development Engineer                   Am Sonnenhang 11
Storage Development                               56242 Marienrachdorf
                                                  Germany
Mauelshagen@xxxxxxxxxx                            PHONE +49  171 7803392
                                                  FAX   +49 2626 924446
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-

--
dm-devel mailing list
dm-devel@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/dm-devel

[Index of Archives]     [DM Crypt]     [Fedora Desktop]     [ATA RAID]     [Fedora Marketing]     [Fedora Packaging]     [Fedora SELinux]     [Yosemite Discussion]     [KDE Users]     [Fedora Docs]

  Powered by Linux