Re: [PATCH 2/4] md/r5cache: read data into orig_page for prexor of cached data

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Jan 12, 2017 at 05:22:41PM -0800, Song Liu wrote:
> With write back cache, we use orig_page to do prexor. This patch
> makes sure we read data into orig_page for it.
> 
> Flag R5_OrigPageUPTDODATE is added to show whether orig_page
> has the latest data from raid disk.
> 
> We introduce a helper function uptodate_for_rmw() to simplify
> the a couple conditions in handle_stripe_dirtying().

applied patch 2 & 3

Thanks,
Shaohua
> 
> Signed-off-by: Song Liu <songliubraving@xxxxxx>
> ---
>  drivers/md/raid5-cache.c |  2 ++
>  drivers/md/raid5.c       | 44 +++++++++++++++++++++++++++++++++++---------
>  drivers/md/raid5.h       |  5 +++++
>  3 files changed, 42 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
> index 2bbc38b..248fede 100644
> --- a/drivers/md/raid5-cache.c
> +++ b/drivers/md/raid5-cache.c
> @@ -2459,6 +2459,8 @@ void r5c_release_extra_page(struct stripe_head *sh)
>  			struct page *p = sh->dev[i].orig_page;
>  
>  			sh->dev[i].orig_page = sh->dev[i].page;
> +			clear_bit(R5_OrigPageUPTDODATE, &sh->dev[i].flags);
> +
>  			if (!using_disk_info_extra_page)
>  				put_page(p);
>  		}
> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> index 0d2082d..3e75289 100644
> --- a/drivers/md/raid5.c
> +++ b/drivers/md/raid5.c
> @@ -1056,7 +1056,17 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
>  
>  			if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
>  				WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
> -			sh->dev[i].vec.bv_page = sh->dev[i].page;
> +
> +			if (!op_is_write(op) &&
> +			    test_bit(R5_InJournal, &sh->dev[i].flags))
> +				/*
> +				 * issuing read for a page in journal, this
> +				 * must be preparing for prexor in rmw; read
> +				 * the data into orig_page
> +				 */
> +				sh->dev[i].vec.bv_page = sh->dev[i].orig_page;
> +			else
> +				sh->dev[i].vec.bv_page = sh->dev[i].page;
>  			bi->bi_vcnt = 1;
>  			bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
>  			bi->bi_io_vec[0].bv_offset = 0;
> @@ -2421,6 +2431,13 @@ static void raid5_end_read_request(struct bio * bi)
>  		} else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
>  			clear_bit(R5_ReadNoMerge, &sh->dev[i].flags);
>  
> +		if (test_bit(R5_InJournal, &sh->dev[i].flags))
> +			/*
> +			 * end read for a page in journal, this
> +			 * must be preparing for prexor in rmw
> +			 */
> +			set_bit(R5_OrigPageUPTDODATE, &sh->dev[i].flags);
> +
>  		if (atomic_read(&rdev->read_errors))
>  			atomic_set(&rdev->read_errors, 0);
>  	} else {
> @@ -3635,6 +3652,21 @@ static void handle_stripe_clean_event(struct r5conf *conf,
>  		break_stripe_batch_list(head_sh, STRIPE_EXPAND_SYNC_FLAGS);
>  }
>  
> +/*
> + * For RMW in write back cache, we need extra page in prexor to store the
> + * old data. This page is stored in dev->orig_page.
> + *
> + * This function checks whether we have data for prexor. The exact logic
> + * is:
> + *       R5_UPTODATE && (!R5_InJournal || R5_OrigPageUPTDODATE)
> + */
> +static inline bool uptodate_for_rmw(struct r5dev *dev)
> +{
> +	return (test_bit(R5_UPTODATE, &dev->flags)) &&
> +		(!test_bit(R5_InJournal, &dev->flags) ||
> +		 test_bit(R5_OrigPageUPTDODATE, &dev->flags));
> +}
> +
>  static int handle_stripe_dirtying(struct r5conf *conf,
>  				  struct stripe_head *sh,
>  				  struct stripe_head_state *s,
> @@ -3666,9 +3698,7 @@ static int handle_stripe_dirtying(struct r5conf *conf,
>  		if ((dev->towrite || i == sh->pd_idx || i == sh->qd_idx ||
>  		     test_bit(R5_InJournal, &dev->flags)) &&
>  		    !test_bit(R5_LOCKED, &dev->flags) &&
> -		    !((test_bit(R5_UPTODATE, &dev->flags) &&
> -		       (!test_bit(R5_InJournal, &dev->flags) ||
> -			dev->page != dev->orig_page)) ||
> +		    !(uptodate_for_rmw(dev) ||
>  		      test_bit(R5_Wantcompute, &dev->flags))) {
>  			if (test_bit(R5_Insync, &dev->flags))
>  				rmw++;
> @@ -3680,7 +3710,6 @@ static int handle_stripe_dirtying(struct r5conf *conf,
>  		    i != sh->pd_idx && i != sh->qd_idx &&
>  		    !test_bit(R5_LOCKED, &dev->flags) &&
>  		    !(test_bit(R5_UPTODATE, &dev->flags) ||
> -		      test_bit(R5_InJournal, &dev->flags) ||
>  		      test_bit(R5_Wantcompute, &dev->flags))) {
>  			if (test_bit(R5_Insync, &dev->flags))
>  				rcw++;
> @@ -3734,9 +3763,7 @@ static int handle_stripe_dirtying(struct r5conf *conf,
>  			     i == sh->pd_idx || i == sh->qd_idx ||
>  			     test_bit(R5_InJournal, &dev->flags)) &&
>  			    !test_bit(R5_LOCKED, &dev->flags) &&
> -			    !((test_bit(R5_UPTODATE, &dev->flags) &&
> -			       (!test_bit(R5_InJournal, &dev->flags) ||
> -				dev->page != dev->orig_page)) ||
> +			    !(uptodate_for_rmw(dev) ||
>  			      test_bit(R5_Wantcompute, &dev->flags)) &&
>  			    test_bit(R5_Insync, &dev->flags)) {
>  				if (test_bit(STRIPE_PREREAD_ACTIVE,
> @@ -3763,7 +3790,6 @@ static int handle_stripe_dirtying(struct r5conf *conf,
>  			    i != sh->pd_idx && i != sh->qd_idx &&
>  			    !test_bit(R5_LOCKED, &dev->flags) &&
>  			    !(test_bit(R5_UPTODATE, &dev->flags) ||
> -			      test_bit(R5_InJournal, &dev->flags) ||
>  			      test_bit(R5_Wantcompute, &dev->flags))) {
>  				rcw++;
>  				if (test_bit(R5_Insync, &dev->flags) &&
> diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
> index c582086..50855ad 100644
> --- a/drivers/md/raid5.h
> +++ b/drivers/md/raid5.h
> @@ -322,6 +322,11 @@ enum r5dev_flags {
>  			 * data and parity being written are in the journal
>  			 * device
>  			 */
> +	R5_OrigPageUPTDODATE,	/* with write back cache, we read old data into
> +				 * dev->orig_page for prexor. When this flag is
> +				 * set, orig_page contains latest data in the
> +				 * raid disk.
> +				 */
>  };
>  
>  /*
> -- 
> 2.9.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-raid" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux