Jon, the "if (ms->read_count_reset)" should read "if (!ms->read_count_reset)" to disable read balancing and the 2 while loops could be merged into one. Heinz On Mon, Oct 09, 2006 at 11:13:59AM -0500, Jonathan Brassow wrote: > Looking for some opinions/comments on this read balancing patch. (Basic > implementation details in patch header.) > > I haven't gone to too much trouble to make the read balancing code > generic, since there is only one policy right now. In the future, I > will likely have to swap out the three fields in the mirror set > structure for a void pointer that will contain data for various other > read balancing policies. I have, however, constructed the new table > line arguments in such a way that the read-balancing arguments are easy > to identify and pass to an initialization function without knowing too > much about the actual arguments. > > One thing I'm not all that fond of is the order that the mirror is set > up. The read-balancing arguments are parsed before 'alloc_context' is > called - meaning that new read-balancing structures would be allocated > first, then linked in later... and read-balancing is set up before the > default mirror device is chosen. I don't really care for > 'alloc_context' setting up 'ms->read_mirror', while other fields are > initialized after 'alloc_context'. I don't think this is too much of a > problem though. > > brassow > > This patch adds read balancing. The round-robin method is the first > to be implemented, but provisions are made for others to be implemented > in the future. > > The allowable mirror table arguments has been expanded. It is now > as follows: > > <start> <length> mirror \ > <log-type> <# log params> <log params> \ > *new* [readbalance <# rb params> <rb params>] \ > <# mirrors> <device1> <offset1> ... <deviceN> <offsetN> > > The new read balancing arguments are optional, and the only > currently valid read balancing arguments are: > readbalance 2 roundrobin <count> > Where 'count' is the number of I/Os that go to a device before > switching to the next device. > > 'struct mirror *choose_mirror(struct mirror_set *ms)' is the > function that chooses the read mirror based on read balancing > policy. It should only be called when the region of the > mirror being read from is known to be in-sync. 'choose_mirror' > will avoid selecting devices with error_counts > 0 - returning > NULL if no devices are available. > > Index: linux-2.6.18/drivers/md/dm-raid1.c > =================================================================== > --- linux-2.6.18.orig/drivers/md/dm-raid1.c 2006-10-05 13:38:27.000000000 -0500 > +++ linux-2.6.18/drivers/md/dm-raid1.c 2006-10-09 10:31:47.000000000 -0500 > @@ -135,6 +135,9 @@ struct mirror_set { > struct mirror *default_mirror; /* Default mirror */ > > unsigned int nr_mirrors; > + unsigned int read_count_reset; /* number of reads before switching */ > + atomic_t read_count; /* Read counter for read balancing */ > + struct mirror *read_mirror; /* Last mirror read. */ > struct mirror mirror[0]; > }; > > @@ -686,10 +689,59 @@ static void do_recovery(struct mirror_se > /*----------------------------------------------------------------- > * Reads > *---------------------------------------------------------------*/ > -static struct mirror *choose_mirror(struct mirror_set *ms, sector_t sector) > + > +/* choose_mirror > + * @ms: the mirror set > + * > + * This function is used for read balancing. > + * > + * Returns: chosen mirror, or NULL on failure > + */ > +static struct mirror *choose_mirror(struct mirror_set *ms) > { > - /* FIXME: add read balancing */ > - return ms->default_mirror; > + unsigned int i; > + struct mirror *start_mirror = ms->read_mirror; > + > + /* > + * If 'read_count_reset' is zero here, then read-balancing > + * is disabled. > + */ > + if (ms->read_count_reset)) { > + do { > + if (likely(!atomic_read(&ms->read_mirror->error_count))) > + goto use_mirror; > + > + if (ms->read_mirror-- == ms->mirror) > + ms->read_mirror += ms->nr_mirrors; > + } while (ms->read_mirror != start_mirror); > + return NULL; > + } > + > + /* > + * Perform ms->read_count_reset reads on each working mirror then > + * advance to the next one. start_mirror stores > + * the first we tried, so we know when we're done. > + */ > + do { > + if (likely(!atomic_read(&ms->read_mirror->error_count)) && > + !atomic_dec_and_test(&ms->read_count)) > + goto use_mirror; > + > + atomic_set(&ms->read_count, ms->read_count_reset); > + > + if (ms->read_mirror-- == ms->mirror) > + ms->read_mirror += ms->nr_mirrors; > + } while (ms->read_mirror != start_mirror); > + > + /* > + * We've rejected every mirror. > + * Confirm the start_mirror can be used. > + */ > + if (unlikely(atomic_read(&ms->read_mirror->error_count))) > + return NULL; > + > +use_mirror: > + return ms->read_mirror; > } > > /* > @@ -714,7 +766,7 @@ static void do_reads(struct mirror_set * > * We can only read balance if the region is in sync. > */ > if (rh_in_sync(&ms->rh, region, 0)) > - m = choose_mirror(ms, bio->bi_sector); > + m = choose_mirror(ms); > else > m = ms->default_mirror; > > @@ -907,6 +959,7 @@ static struct mirror_set *alloc_context( > ms->nr_mirrors = nr_mirrors; > ms->nr_regions = dm_sector_div_up(ti->len, region_size); > ms->in_sync = 0; > + ms->read_mirror = &ms->mirror[DEFAULT_MIRROR]; > ms->default_mirror = &ms->mirror[DEFAULT_MIRROR]; > > if (rh_init(&ms->rh, ms, dl, region_size, ms->nr_regions)) { > @@ -1028,6 +1081,7 @@ static struct dirty_log *create_dirty_lo > static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) > { > int r; > + unsigned int read_count_reset = 0, read_balance_args; > unsigned int nr_mirrors, m, args_used; > struct mirror_set *ms; > struct dirty_log *dl; > @@ -1039,6 +1093,29 @@ static int mirror_ctr(struct dm_target * > argv += args_used; > argc -= args_used; > > + if (!argc) { > + ti->error = "Invalid number of arguments"; > + dm_destroy_dirty_log(dl); > + return -EINVAL; > + } else if (!strcmp("readbalance", argv[0]) && > + (sscanf(argv[1], "%u", &read_balance_args) == 1)) { > + /* > + * When there is more than one read-balancing policy, > + * we will push this next if statement into an > + * initialization function. > + */ > + if ((read_balance_args == 2) && > + !strcmp("roundrobin", argv[2]) && > + (sscanf(argv[3], "%u", &read_count_reset) == 1)) { > + argv += 4; > + argc -= 4; > + } else { > + ti->error = "Invalid read-balancing arguments"; > + dm_destroy_dirty_log(dl); > + return -EINVAL; > + } > + } > + > if (!argc || sscanf(argv[0], "%u", &nr_mirrors) != 1 || > nr_mirrors < 2 || nr_mirrors > KCOPYD_MAX_REGIONS + 1) { > ti->error = "Invalid number of mirrors"; > @@ -1060,6 +1137,8 @@ static int mirror_ctr(struct dm_target * > return -ENOMEM; > } > > + ms->read_count_reset = read_count_reset; > + > /* Get the mirror parameter sets */ > for (m = 0; m < nr_mirrors; m++) { > r = get_mirror(ms, ti, m, argv); > @@ -1147,7 +1226,7 @@ static int mirror_map(struct dm_target * > return 0; > } > > - m = choose_mirror(ms, bio->bi_sector); > + m = choose_mirror(ms); > if (!m) > return -EIO; =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- Heinz Mauelshagen Red Hat GmbH Consulting Development Engineer Am Sonnenhang 11 Storage Development 56242 Marienrachdorf Germany Mauelshagen@xxxxxxxxxx PHONE +49 171 7803392 FAX +49 2626 924446 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- -- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel