Hi Jackie, This patch has a lot of good fixes. Thanks for reviewing the code and proposing the fixes. Could you explain a little more why we need write an empty block before calling r5c_recovery_rewrite_data_only_stripes()? Thanks, Song > On Nov 25, 2016, at 3:39 AM, Jackie Liu <liuyun01@xxxxxxxxxx> wrote: > > Hi Song. > > There is a doubt for r5l_recovery_log. I think we need write an empty block first, > then call r5c_recovery_rewrite_data_only_stripes functions. this empty > block will be mark as the last_checkpoint. when the CACHING block is rewritten, > the superblock should be update this time. at the same time, we cann't be > released the stripe_head at the front, it also be used in > r5c_recovery_rewrite_data_only_stripes. > > here is the patch > > diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c > index 5f817bd..fad1808 100644 > --- a/drivers/md/raid5-cache.c > +++ b/drivers/md/raid5-cache.c > @@ -67,7 +67,7 @@ static char *r5c_journal_mode_str[] = {"write-through", > /* > * raid5 cache state machine > * > - * With rhe RAID cache, each stripe works in two phases: > + * With the RAID cache, each stripe works in two phases: > * - caching phase > * - writing-out phase > * > @@ -1674,7 +1674,6 @@ r5l_recovery_replay_one_stripe(struct r5conf *conf, > > static struct stripe_head * > r5c_recovery_alloc_stripe(struct r5conf *conf, > - struct list_head *recovery_list, > sector_t stripe_sect, > sector_t log_start) > { > @@ -1855,8 +1854,8 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, > stripe_sect); > > if (!sh) { > - sh = r5c_recovery_alloc_stripe(conf, cached_stripe_list, > - stripe_sect, ctx->pos); > + sh = r5c_recovery_alloc_stripe(conf, stripe_sect, > + ctx->pos); > /* > * cannot get stripe from raid5_get_active_stripe > * try replay some stripes > @@ -1865,8 +1864,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, > r5c_recovery_replay_stripes( > cached_stripe_list, ctx); > sh = r5c_recovery_alloc_stripe( > - conf, cached_stripe_list, > - stripe_sect, ctx->pos); > + conf, stripe_sect, ctx->pos); > } > if (!sh) { > pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n", > @@ -1875,8 +1873,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, > raid5_set_cache_size(mddev, > conf->min_nr_stripes * 2); > sh = r5c_recovery_alloc_stripe( > - conf, cached_stripe_list, stripe_sect, > - ctx->pos); > + conf, stripe_sect, ctx->pos); > } > if (!sh) { > pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n", > @@ -1986,8 +1983,6 @@ static int r5c_recovery_flush_log(struct r5l_log *log, > list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) { > WARN_ON(!test_bit(STRIPE_R5C_CACHING, &sh->state)); > r5c_recovery_load_one_stripe(log, sh); > - list_del_init(&sh->lru); > - raid5_release_stripe(sh); > ctx->data_only_stripes++; > } > > @@ -2078,7 +2073,6 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, > return -ENOMEM; > } > > - ctx->seq += 10; > list_for_each_entry(sh, &ctx->cached_list, lru) { > struct r5l_meta_block *mb; > int i; > @@ -2090,7 +2084,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, > ctx->pos, ctx->seq); > mb = page_address(page); > offset = le32_to_cpu(mb->meta_size); > - write_pos = ctx->pos + BLOCK_SECTORS; > + write_pos = r5l_ring_add(log, ctx->pos, BLOCK_SECTORS); > > for (i = sh->disks; i--; ) { > struct r5dev *dev = &sh->dev[i]; > @@ -2125,6 +2119,9 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, > sh->log_start = ctx->pos; > ctx->pos = write_pos; > ctx->seq += 1; > + > + list_del_init(&sh->lru); > + raid5_release_stripe(sh); > } > __free_page(page); > return 0; > @@ -2135,6 +2132,7 @@ static int r5l_recovery_log(struct r5l_log *log) > struct mddev *mddev = log->rdev->mddev; > struct r5l_recovery_ctx ctx; > int ret; > + sector_t pos; > > ctx.pos = log->last_checkpoint; > ctx.seq = log->last_cp_seq; > @@ -2152,6 +2150,10 @@ static int r5l_recovery_log(struct r5l_log *log) > if (ret) > return ret; > > + pos = ctx.pos; > + r5l_log_write_empty_meta_block(log, ctx.pos, (ctx.seq += 10)); > + ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS); > + > if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0)) > pr_debug("md/raid:%s: starting from clean shutdown\n", > mdname(mddev)); > @@ -2170,9 +2172,9 @@ static int r5l_recovery_log(struct r5l_log *log) > > log->log_start = ctx.pos; > log->next_checkpoint = ctx.pos; > + log->last_checkpoint = pos; > log->seq = ctx.seq; > - r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq); > - r5l_write_super(log, ctx.pos); > + r5l_write_super(log, pos); > return 0; > } > > -- > 2.7.4 > > Thanks. > Jackie ?韬{.n?????%??檩??w?{.n???{炳盯w???塄}?财??j:+v??????2??璀??摺?囤??z夸z罐?+?????w棹f