On Mon, Jun 18, 2012 at 6:59 PM, Shaohua Li <shli@xxxxxxxxxx> wrote: > Raid5 overrides bio->bi_phys_segments, accessing it is with device_lock hold, > which is unnecessary, We can make it lockless actually. > > Signed-off-by: Shaohua Li <shli@xxxxxxxxxxxx> > --- > drivers/md/raid5.c | 58 +++++++++++++++++++++++++++++------------------------ > 1 file changed, 32 insertions(+), 26 deletions(-) > > Index: linux/drivers/md/raid5.c > =================================================================== > --- linux.orig/drivers/md/raid5.c 2012-06-08 11:48:25.652618012 +0800 > +++ linux/drivers/md/raid5.c 2012-06-08 13:15:31.458919391 +0800 > @@ -99,34 +99,40 @@ static inline struct bio *r5_next_bio(st > * We maintain a biased count of active stripes in the bottom 16 bits of > * bi_phys_segments, and a count of processed stripes in the upper 16 bits > */ > -static inline int raid5_bi_phys_segments(struct bio *bio) > +static inline int raid5_bi_processed_stripes(struct bio *bio) > { > - return bio->bi_phys_segments & 0xffff; > + atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; > + return (atomic_read(segments) >> 16) & 0xffff; > } > > -static inline int raid5_bi_hw_segments(struct bio *bio) > +static inline int raid5_dec_bi_active_stripes(struct bio *bio) > { > - return (bio->bi_phys_segments >> 16) & 0xffff; > + atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; > + return atomic_sub_return(1, segments) & 0xffff; > } > > -static inline int raid5_dec_bi_phys_segments(struct bio *bio) > +static inline void raid5_inc_bi_active_stripes(struct bio *bio) > { > - --bio->bi_phys_segments; > - return raid5_bi_phys_segments(bio); > + atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; > + atomic_inc(segments); > } > > -static inline int raid5_dec_bi_hw_segments(struct bio *bio) > +static inline void raid5_set_bi_processed_stripes(struct bio *bio, > + unsigned int cnt) > { > - unsigned short val = raid5_bi_hw_segments(bio); > + atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; > + int old, new; > > - --val; > - bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio); > - return val; > + do { > + old = atomic_read(segments); > + new = (old & 0xffff) | (cnt << 16); > + } while (atomic_cmpxchg(segments, old, new) != old); > } > > -static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt) > +static inline void raid5_set_bi_stripes(struct bio *bio, unsigned int cnt) > { > - bio->bi_phys_segments = raid5_bi_phys_segments(bio) | (cnt << 16); > + atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; > + atomic_set(segments, cnt); > } > > /* Find first data disk in a raid6 stripe */ > @@ -767,7 +773,7 @@ static void ops_complete_biofill(void *s > while (rbi && rbi->bi_sector < > dev->sector + STRIPE_SECTORS) { > rbi2 = r5_next_bio(rbi, dev->sector); > - if (!raid5_dec_bi_phys_segments(rbi)) { > + if (!raid5_dec_bi_active_stripes(rbi)) { > rbi->bi_next = return_bi; > return_bi = rbi; > } > @@ -2354,7 +2360,7 @@ static int add_stripe_bio(struct stripe_ > if (*bip) > bi->bi_next = *bip; > *bip = bi; > - bi->bi_phys_segments++; > + raid5_inc_bi_active_stripes(bi); Now that device_lock does not globally sync bio ingress/egress would it be worth having a a comment here that says this ordering of adding bi to the to{read|write} list is safe because make_request holds a 'segments' reference count until the bio has been added to all affected stripes, and that within a given stripe bios are still synced via stripe_lock? -- To unsubscribe from this list: send the line "unsubscribe linux-raid" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html