On Fri, Apr 20, 2018 at 9:49 PM, Javier Gonzalez <javier@xxxxxxxxxxxx> wrote: >> On 19 Apr 2018, at 09.39, Hans Holmberg <hans.ml.holmberg@xxxxxxxxxxxxx> wrote: >> >> From: Hans Holmberg <hans.holmberg@xxxxxxxxxxxx> >> >> Write failures should not happen under normal circumstances, >> so in order to bring the chunk back into a known state as soon >> as possible, evacuate all the valid data out of the line and let the >> fw judge if the block can be written to in the next reset cycle. >> >> Do this by introducing a new gc list for lines with failed writes, >> and ensure that the rate limiter allocates a small portion of >> the write bandwidth to get the job done. >> >> The lba list is saved in memory for use during gc as we >> cannot gurantee that the emeta data is readable if a write >> error occurred. >> >> Signed-off-by: Hans Holmberg <hans.holmberg@xxxxxxxxxxxx> >> --- >> drivers/lightnvm/pblk-core.c | 43 +++++++++++++++++++++-- >> drivers/lightnvm/pblk-gc.c | 79 +++++++++++++++++++++++++++---------------- >> drivers/lightnvm/pblk-init.c | 39 ++++++++++++++------- >> drivers/lightnvm/pblk-rl.c | 29 +++++++++++++--- >> drivers/lightnvm/pblk-sysfs.c | 15 ++++++-- >> drivers/lightnvm/pblk-write.c | 2 ++ >> drivers/lightnvm/pblk.h | 25 +++++++++++--- >> 7 files changed, 178 insertions(+), 54 deletions(-) >> >> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c >> index 7762e89..f6135e4 100644 >> --- a/drivers/lightnvm/pblk-core.c >> +++ b/drivers/lightnvm/pblk-core.c >> @@ -373,7 +373,13 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line) >> >> lockdep_assert_held(&line->lock); >> >> - if (!vsc) { >> + if (line->w_err_gc->has_write_err) { >> + if (line->gc_group != PBLK_LINEGC_WERR) { >> + line->gc_group = PBLK_LINEGC_WERR; >> + move_list = &l_mg->gc_werr_list; >> + pblk_rl_werr_line_in(&pblk->rl); >> + } >> + } else if (!vsc) { >> if (line->gc_group != PBLK_LINEGC_FULL) { >> line->gc_group = PBLK_LINEGC_FULL; >> move_list = &l_mg->gc_full_list; >> @@ -1603,8 +1609,13 @@ static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line) >> line->state = PBLK_LINESTATE_FREE; >> line->gc_group = PBLK_LINEGC_NONE; >> pblk_line_free(line); >> - spin_unlock(&line->lock); >> >> + if (line->w_err_gc->has_write_err) { >> + pblk_rl_werr_line_out(&pblk->rl); >> + line->w_err_gc->has_write_err = 0; >> + } >> + >> + spin_unlock(&line->lock); >> atomic_dec(&gc->pipeline_gc); >> >> spin_lock(&l_mg->free_lock); >> @@ -1767,11 +1778,32 @@ void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line) >> >> spin_lock(&l_mg->close_lock); >> spin_lock(&line->lock); >> + >> + /* Update the in-memory start address for emeta, in case it has >> + * shifted due to write errors >> + */ >> + if (line->emeta_ssec != line->cur_sec) >> + line->emeta_ssec = line->cur_sec; >> + >> list_add_tail(&line->list, &l_mg->emeta_list); >> spin_unlock(&line->lock); >> spin_unlock(&l_mg->close_lock); >> >> pblk_line_should_sync_meta(pblk); >> + >> + >> +} >> + >> +static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line) >> +{ >> + struct pblk_line_meta *lm = &pblk->lm; >> + unsigned int lba_list_size = lm->emeta_len[2]; >> + struct pblk_w_err_gc *w_err_gc = line->w_err_gc; >> + struct pblk_emeta *emeta = line->emeta; >> + >> + w_err_gc->lba_list = kmalloc(lba_list_size, GFP_KERNEL); >> + memcpy(w_err_gc->lba_list, emeta_to_lbas(pblk, emeta->buf), >> + lba_list_size); >> } >> >> void pblk_line_close_ws(struct work_struct *work) >> @@ -1780,6 +1812,13 @@ void pblk_line_close_ws(struct work_struct *work) >> ws); >> struct pblk *pblk = line_ws->pblk; >> struct pblk_line *line = line_ws->line; >> + struct pblk_w_err_gc *w_err_gc = line->w_err_gc; >> + >> + /* Write errors makes the emeta start address stored in smeta invalid, >> + * so keep a copy of the lba list until we've gc'd the line >> + */ >> + if (w_err_gc->has_write_err) >> + pblk_save_lba_list(pblk, line); >> >> pblk_line_close(pblk, line); >> mempool_free(line_ws, pblk->gen_ws_pool); >> diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c >> index b0cc277..62f0548 100644 >> --- a/drivers/lightnvm/pblk-gc.c >> +++ b/drivers/lightnvm/pblk-gc.c >> @@ -138,10 +138,10 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work) >> struct pblk_line_mgmt *l_mg = &pblk->l_mg; >> struct pblk_line_meta *lm = &pblk->lm; >> struct pblk_gc *gc = &pblk->gc; >> - struct line_emeta *emeta_buf; >> + struct line_emeta *emeta_buf = NULL; >> struct pblk_line_ws *gc_rq_ws; >> struct pblk_gc_rq *gc_rq; >> - __le64 *lba_list; >> + __le64 *lba_list = NULL; >> unsigned long *invalid_bitmap; >> int sec_left, nr_secs, bit; >> int ret; >> @@ -150,34 +150,42 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work) >> if (!invalid_bitmap) >> goto fail_free_ws; >> >> - emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type, >> - GFP_KERNEL); >> - if (!emeta_buf) { >> - pr_err("pblk: cannot use GC emeta\n"); >> - goto fail_free_bitmap; >> - } >> - >> - ret = pblk_line_read_emeta(pblk, line, emeta_buf); >> - if (ret) { >> - pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret); >> - goto fail_free_emeta; >> - } >> + if (line->w_err_gc->has_write_err) { >> + lba_list = line->w_err_gc->lba_list; >> + } else { >> + emeta_buf = pblk_malloc(lm->emeta_len[0], >> + l_mg->emeta_alloc_type, GFP_KERNEL); >> + if (!emeta_buf) { >> + pr_err("pblk: cannot use GC emeta\n"); >> + goto fail_free_bitmap; >> + } >> >> - /* If this read fails, it means that emeta is corrupted. For now, leave >> - * the line untouched. TODO: Implement a recovery routine that scans and >> - * moves all sectors on the line. >> - */ >> + ret = pblk_line_read_emeta(pblk, line, emeta_buf); >> + if (ret) { >> + pr_err("pblk: line %d read emeta failed (%d)\n", >> + line->id, ret); >> + goto fail_free_emeta; >> + } >> >> - ret = pblk_recov_check_emeta(pblk, emeta_buf); >> - if (ret) { >> - pr_err("pblk: inconsistent emeta (line %d)\n", line->id); >> - goto fail_free_emeta; >> - } >> + /* If this read fails, it means that emeta is corrupted. >> + * For now, leave the line untouched. >> + * TODO: Implement a recovery routine that scans and moves >> + * all sectors on the line., >> + */ >> + >> + ret = pblk_recov_check_emeta(pblk, emeta_buf); >> + if (ret) { >> + pr_err("pblk: inconsistent emeta (line %d)\n", >> + line->id); >> + goto fail_free_emeta; >> + } >> >> - lba_list = emeta_to_lbas(pblk, emeta_buf); >> - if (!lba_list) { >> - pr_err("pblk: could not interpret emeta (line %d)\n", line->id); >> - goto fail_free_emeta; >> + lba_list = emeta_to_lbas(pblk, emeta_buf); >> + if (!lba_list) { >> + pr_err("pblk: could not interpret emeta (line %d)\n", >> + line->id); >> + goto fail_free_emeta; >> + } >> } > > > would it be an idea to make move all the logic above to a different > function returning lba_list? This way, we do not have an extra indent > for a single line use case. Yes, indeed it would. I'll refactor this into a separate function. > >> >> spin_lock(&line->lock); >> @@ -240,7 +248,12 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work) >> goto next_rq; >> >> out: >> - pblk_mfree(emeta_buf, l_mg->emeta_alloc_type); >> + if (line->w_err_gc->has_write_err) { >> + kfree(lba_list); >> + line->w_err_gc->lba_list = NULL; >> + } else >> + pblk_mfree(emeta_buf, l_mg->emeta_alloc_type); >> + >> kfree(line_ws); >> kfree(invalid_bitmap); >> >> @@ -252,7 +265,11 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work) >> fail_free_gc_rq: >> kfree(gc_rq); >> fail_free_emeta: >> - pblk_mfree(emeta_buf, l_mg->emeta_alloc_type); >> + if (line->w_err_gc->has_write_err) { >> + kfree(lba_list); >> + line->w_err_gc->lba_list = NULL; >> + } else >> + pblk_mfree(emeta_buf, l_mg->emeta_alloc_type); > > Can you open/close brackets here too? I'll tidy this up along with the refactoring mentioned above. > >> fail_free_bitmap: >> kfree(invalid_bitmap); >> fail_free_ws: >> @@ -349,12 +366,14 @@ static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk, >> static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl) >> { >> unsigned int nr_blocks_free, nr_blocks_need; >> + unsigned int werr_lines = atomic_read(&rl->werr_lines); >> >> nr_blocks_need = pblk_rl_high_thrs(rl); >> nr_blocks_free = pblk_rl_nr_free_blks(rl); >> >> /* This is not critical, no need to take lock here */ >> - return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free)); >> + return ((werr_lines > 0) || >> + ((gc->gc_active) && (nr_blocks_need > nr_blocks_free))); >> } >> >> void pblk_gc_free_full_lines(struct pblk *pblk) >> diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c >> index 6f06727..092e361 100644 >> --- a/drivers/lightnvm/pblk-init.c >> +++ b/drivers/lightnvm/pblk-init.c >> @@ -495,9 +495,14 @@ static void pblk_line_mg_free(struct pblk *pblk) >> >> static void pblk_line_meta_free(struct pblk_line *line) >> { >> + struct pblk_w_err_gc *w_err_gc = line->w_err_gc; >> + >> kfree(line->blk_bitmap); >> kfree(line->erase_bitmap); >> kfree(line->chks); >> + >> + kfree(w_err_gc->lba_list); >> + kfree(w_err_gc); >> } >> >> static void pblk_lines_free(struct pblk *pblk) >> @@ -813,20 +818,28 @@ static int pblk_alloc_line_meta(struct pblk *pblk, struct pblk_line *line) >> return -ENOMEM; >> >> line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL); >> - if (!line->erase_bitmap) { >> - kfree(line->blk_bitmap); >> - return -ENOMEM; >> - } >> + if (!line->erase_bitmap) >> + goto free_blk_bitmap; >> + >> >> line->chks = kmalloc(lm->blk_per_line * sizeof(struct nvm_chk_meta), >> GFP_KERNEL); >> - if (!line->chks) { >> - kfree(line->erase_bitmap); >> - kfree(line->blk_bitmap); >> - return -ENOMEM; >> - } >> + if (!line->chks) >> + goto free_erase_bitmap; >> + >> + line->w_err_gc = kzalloc(sizeof(struct pblk_w_err_gc), GFP_KERNEL); >> + if (!line->w_err_gc) >> + goto free_chks; >> >> return 0; >> + >> +free_chks: >> + kfree(line->chks); >> +free_erase_bitmap: >> + kfree(line->erase_bitmap); >> +free_blk_bitmap: >> + kfree(line->blk_bitmap); >> + return -ENOMEM; >> } >> >> static int pblk_line_mg_init(struct pblk *pblk) >> @@ -851,12 +864,14 @@ static int pblk_line_mg_init(struct pblk *pblk) >> INIT_LIST_HEAD(&l_mg->gc_mid_list); >> INIT_LIST_HEAD(&l_mg->gc_low_list); >> INIT_LIST_HEAD(&l_mg->gc_empty_list); >> + INIT_LIST_HEAD(&l_mg->gc_werr_list); >> >> INIT_LIST_HEAD(&l_mg->emeta_list); >> >> - l_mg->gc_lists[0] = &l_mg->gc_high_list; >> - l_mg->gc_lists[1] = &l_mg->gc_mid_list; >> - l_mg->gc_lists[2] = &l_mg->gc_low_list; >> + l_mg->gc_lists[0] = &l_mg->gc_werr_list; >> + l_mg->gc_lists[1] = &l_mg->gc_high_list; >> + l_mg->gc_lists[2] = &l_mg->gc_mid_list; >> + l_mg->gc_lists[3] = &l_mg->gc_low_list; >> >> spin_lock_init(&l_mg->free_lock); >> spin_lock_init(&l_mg->close_lock); >> diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c >> index 883a711..6a0616a 100644 >> --- a/drivers/lightnvm/pblk-rl.c >> +++ b/drivers/lightnvm/pblk-rl.c >> @@ -73,6 +73,16 @@ void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries) >> pblk_rl_kick_u_timer(rl); >> } >> >> +void pblk_rl_werr_line_in(struct pblk_rl *rl) >> +{ >> + atomic_inc(&rl->werr_lines); >> +} >> + >> +void pblk_rl_werr_line_out(struct pblk_rl *rl) >> +{ >> + atomic_dec(&rl->werr_lines); >> +} >> + >> void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries) >> { >> atomic_add(nr_entries, &rl->rb_gc_cnt); >> @@ -99,11 +109,21 @@ static void __pblk_rl_update_rates(struct pblk_rl *rl, >> { >> struct pblk *pblk = container_of(rl, struct pblk, rl); >> int max = rl->rb_budget; >> + int werr_gc_needed = atomic_read(&rl->werr_lines); >> >> if (free_blocks >= rl->high) { >> - rl->rb_user_max = max; >> - rl->rb_gc_max = 0; >> - rl->rb_state = PBLK_RL_HIGH; >> + if (werr_gc_needed) { >> + /* Allocate a small budget for recovering >> + * lines with write errors >> + */ >> + rl->rb_gc_max = 1 << rl->rb_windows_pw; >> + rl->rb_user_max = max - rl->rb_gc_max; >> + rl->rb_state = PBLK_RL_WERR; >> + } else { >> + rl->rb_user_max = max; >> + rl->rb_gc_max = 0; >> + rl->rb_state = PBLK_RL_OFF; >> + } >> } else if (free_blocks < rl->high) { >> int shift = rl->high_pw - rl->rb_windows_pw; >> int user_windows = free_blocks >> shift; >> @@ -124,7 +144,7 @@ static void __pblk_rl_update_rates(struct pblk_rl *rl, >> rl->rb_state = PBLK_RL_LOW; >> } >> >> - if (rl->rb_state == (PBLK_RL_MID | PBLK_RL_LOW)) >> + if (rl->rb_state != PBLK_RL_OFF) >> pblk_gc_should_start(pblk); >> else >> pblk_gc_should_stop(pblk); >> @@ -221,6 +241,7 @@ void pblk_rl_init(struct pblk_rl *rl, int budget) >> atomic_set(&rl->rb_user_cnt, 0); >> atomic_set(&rl->rb_gc_cnt, 0); >> atomic_set(&rl->rb_space, -1); >> + atomic_set(&rl->werr_lines, 0); >> >> timer_setup(&rl->u_timer, pblk_rl_u_timer, 0); >> >> diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c >> index e61909a..88a0a7c 100644 >> --- a/drivers/lightnvm/pblk-sysfs.c >> +++ b/drivers/lightnvm/pblk-sysfs.c >> @@ -173,6 +173,8 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) >> int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0; >> int d_line_cnt = 0, l_line_cnt = 0; >> int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0; >> + int gc_werr = 0; >> + >> int bad = 0, cor = 0; >> int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0; >> int map_weight = 0, meta_weight = 0; >> @@ -237,6 +239,15 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) >> gc_empty++; >> } >> >> + list_for_each_entry(line, &l_mg->gc_werr_list, list) { >> + if (line->type == PBLK_LINETYPE_DATA) >> + d_line_cnt++; >> + else if (line->type == PBLK_LINETYPE_LOG) >> + l_line_cnt++; >> + closed_line_cnt++; >> + gc_werr++; >> + } >> + >> list_for_each_entry(line, &l_mg->bad_list, list) >> bad++; >> list_for_each_entry(line, &l_mg->corrupt_list, list) >> @@ -275,8 +286,8 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) >> l_mg->nr_lines); >> >> sz += snprintf(page + sz, PAGE_SIZE - sz, >> - "GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, queue:%d\n", >> - gc_full, gc_high, gc_mid, gc_low, gc_empty, >> + "GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, werr: %d, queue:%d\n", >> + gc_full, gc_high, gc_mid, gc_low, gc_empty, gc_werr, >> atomic_read(&pblk->gc.read_inflight_gc)); >> >> sz += snprintf(page + sz, PAGE_SIZE - sz, >> diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c >> index ab45157..3b6bead 100644 >> --- a/drivers/lightnvm/pblk-write.c >> +++ b/drivers/lightnvm/pblk-write.c >> @@ -136,6 +136,7 @@ static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa) >> } >> } >> >> + line->w_err_gc->has_write_err = 1; >> spin_unlock(&line->lock); >> } >> >> @@ -277,6 +278,7 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd) >> if (rqd->error) { >> pblk_log_write_err(pblk, rqd); >> pr_err("pblk: metadata I/O failed. Line %d\n", line->id); >> + line->w_err_gc->has_write_err = 1; >> } >> >> sync = atomic_add_return(rqd->nr_ppas, &emeta->sync); >> diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h >> index cff6aea..a4e55d8 100644 >> --- a/drivers/lightnvm/pblk.h >> +++ b/drivers/lightnvm/pblk.h >> @@ -89,12 +89,14 @@ struct pblk_sec_meta { >> /* The number of GC lists and the rate-limiter states go together. This way the >> * rate-limiter can dictate how much GC is needed based on resource utilization. >> */ >> -#define PBLK_GC_NR_LISTS 3 >> +#define PBLK_GC_NR_LISTS 4 >> >> enum { >> - PBLK_RL_HIGH = 1, >> - PBLK_RL_MID = 2, >> - PBLK_RL_LOW = 3, >> + PBLK_RL_OFF = 0, >> + PBLK_RL_WERR = 1, >> + PBLK_RL_HIGH = 2, >> + PBLK_RL_MID = 3, >> + PBLK_RL_LOW = 4 >> }; >> >> #define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS) >> @@ -278,6 +280,8 @@ struct pblk_rl { >> int rb_user_active; >> int rb_gc_active; >> >> + atomic_t werr_lines; /* Number of write error lines that needs gc */ >> + >> struct timer_list u_timer; >> >> unsigned long long nr_secs; >> @@ -311,6 +315,7 @@ enum { >> PBLK_LINEGC_MID = 23, >> PBLK_LINEGC_HIGH = 24, >> PBLK_LINEGC_FULL = 25, >> + PBLK_LINEGC_WERR = 26 >> }; >> >> #define PBLK_MAGIC 0x70626c6b /*pblk*/ >> @@ -412,6 +417,11 @@ struct pblk_smeta { >> struct line_smeta *buf; /* smeta buffer in persistent format */ >> }; >> >> +struct pblk_w_err_gc { >> + int has_write_err; >> + __le64 *lba_list; >> +}; >> + >> struct pblk_line { >> struct pblk *pblk; >> unsigned int id; /* Line number corresponds to the >> @@ -457,6 +467,8 @@ struct pblk_line { >> >> struct kref ref; /* Write buffer L2P references */ >> >> + struct pblk_w_err_gc *w_err_gc; /* Write error gc recovery metadata */ >> + >> spinlock_t lock; /* Necessary for invalid_bitmap only */ >> }; >> >> @@ -488,6 +500,8 @@ struct pblk_line_mgmt { >> struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */ >> struct list_head gc_low_list; /* Full lines ready to GC, low isc */ >> >> + struct list_head gc_werr_list; /* Write err recovery list */ >> + >> struct list_head gc_full_list; /* Full lines ready to GC, no valid */ >> struct list_head gc_empty_list; /* Full lines close, all valid */ >> >> @@ -894,6 +908,9 @@ void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line, >> bool used); >> int pblk_rl_is_limit(struct pblk_rl *rl); >> >> +void pblk_rl_werr_line_in(struct pblk_rl *rl); >> +void pblk_rl_werr_line_out(struct pblk_rl *rl); >> + >> /* >> * pblk sysfs >> */ >> -- >> 2.7.4 > > Otherwise, it looks good to me Great, thanks. > > Javier