Re: [PATCH v2 13/16] lightnvm: pblk: store multiple copies of smeta

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



> On 22 Mar 2019, at 22.48, Igor Konopko <igor.j.konopko@xxxxxxxxx> wrote:
> 
> Currently there is only one copy of smeta stored per line in pblk. This
> is risky, because in case of read error on such a chunk, we are losing
> all the data from whole line, what leads to silent data corruption.
> 
> This patch changes this behaviour and allows to store more then one
> copy of the smeta (specified by module parameter) in order to provide
> higher reliability by storing mirrored copies of smeta struct and
> providing possibility to failover to another copy of that struct in
> case of read error. Such an approach ensures that copies of that
> critical structures will be stored on different dies and thus predicted
> UBER is multiple times higher
> 
> Signed-off-by: Igor Konopko <igor.j.konopko@xxxxxxxxx>
> ---
> drivers/lightnvm/pblk-core.c     | 124 ++++++++++++++++++++++++++++++++-------
> drivers/lightnvm/pblk-init.c     |  23 ++++++--
> drivers/lightnvm/pblk-recovery.c |  14 +++--
> drivers/lightnvm/pblk-rl.c       |   3 +-
> drivers/lightnvm/pblk.h          |   1 +
> 5 files changed, 132 insertions(+), 33 deletions(-)
> 
> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
> index 06ac3f0..9d9a9e2 100644
> --- a/drivers/lightnvm/pblk-core.c
> +++ b/drivers/lightnvm/pblk-core.c
> @@ -720,13 +720,14 @@ u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line)
> 	return bit * geo->ws_opt;
> }
> 
> -int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
> +static int pblk_line_smeta_read_copy(struct pblk *pblk,
> +				     struct pblk_line *line, u64 paddr)
> {
> 	struct nvm_tgt_dev *dev = pblk->dev;
> +	struct nvm_geo *geo = &dev->geo;
> 	struct pblk_line_meta *lm = &pblk->lm;
> 	struct bio *bio;
> 	struct nvm_rq rqd;
> -	u64 paddr = pblk_line_smeta_start(pblk, line);
> 	int i, ret;
> 
> 	memset(&rqd, 0, sizeof(struct nvm_rq));
> @@ -749,8 +750,20 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
> 	rqd.nr_ppas = lm->smeta_sec;
> 	rqd.is_seq = 1;
> 
> -	for (i = 0; i < lm->smeta_sec; i++, paddr++)
> -		rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
> +	for (i = 0; i < rqd.nr_ppas; i++, paddr++) {
> +		struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, line->id);
> +		int pos = pblk_ppa_to_pos(geo, ppa);
> +
> +		while (test_bit(pos, line->blk_bitmap)) {
> +			paddr += pblk->min_write_pgs;
> +			ppa = addr_to_gen_ppa(pblk, paddr, line->id);
> +			pos = pblk_ppa_to_pos(geo, ppa);
> +		}
> +
> +		rqd.ppa_list[i] = ppa;
> +		pblk_get_meta(pblk, rqd.meta_list, i)->lba =
> +				  cpu_to_le64(ADDR_EMPTY);
> +	}
> 
> 	ret = pblk_submit_io_sync(pblk, &rqd);
> 	if (ret) {
> @@ -771,16 +784,63 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
> 	return ret;
> }
> 
> -static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
> -				 u64 paddr)
> +int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
> +{
> +	struct pblk_line_meta *lm = &pblk->lm;
> +	int i, ret = 0;
> +	u64 paddr = pblk_line_smeta_start(pblk, line);
> +
> +	for (i = 0; i < lm->smeta_copies; i++) {
> +		ret = pblk_line_smeta_read_copy(pblk, line,
> +						paddr + (i * lm->smeta_sec));
> +		if (!ret) {
> +			/*
> +			 * Just one successfully read copy of smeta is
> +			 * enough for us for recovery, don't need to
> +			 * read another one.
> +			 */
> +			return ret;
> +		}
> +	}
> +	return ret;
> +}
> +
> +static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line)
> {
> 	struct nvm_tgt_dev *dev = pblk->dev;
> +	struct nvm_geo *geo = &dev->geo;
> 	struct pblk_line_meta *lm = &pblk->lm;
> 	struct bio *bio;
> 	struct nvm_rq rqd;
> 	__le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
> 	__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
> -	int i, ret;
> +	u64 paddr = 0;
> +	int smeta_wr_len = lm->smeta_len;
> +	int smeta_wr_sec = lm->smeta_sec;
> +	int i, ret, rq_writes;
> +
> +	/*
> +	 * Check if we can write all the smeta copies with
> +	 * a single write command.
> +	 * If yes -> copy smeta sector into multiple copies
> +	 * in buffer to write.
> +	 * If no -> issue writes one by one using the same
> +	 * buffer space.
> +	 * Only if all the copies are written correctly
> +	 * we are treating this line as valid for proper
> +	 * UBER reliability.
> +	 */
> +	if (lm->smeta_sec * lm->smeta_copies > pblk->max_write_pgs) {
> +		rq_writes = lm->smeta_copies;
> +	} else {
> +		rq_writes = 1;
> +		for (i = 1; i < lm->smeta_copies; i++) {
> +			memcpy(line->smeta + i * lm->smeta_len,
> +			       line->smeta, lm->smeta_len);
> +		}
> +		smeta_wr_len *= lm->smeta_copies;
> +		smeta_wr_sec *= lm->smeta_copies;
> +	}
> 
> 	memset(&rqd, 0, sizeof(struct nvm_rq));
> 
> @@ -788,7 +848,8 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
> 	if (ret)
> 		return ret;
> 
> -	bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
> +next_rq:
> +	bio = bio_map_kern(dev->q, line->smeta, smeta_wr_len, GFP_KERNEL);
> 	if (IS_ERR(bio)) {
> 		ret = PTR_ERR(bio);
> 		goto clear_rqd;
> @@ -799,15 +860,23 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
> 
> 	rqd.bio = bio;
> 	rqd.opcode = NVM_OP_PWRITE;
> -	rqd.nr_ppas = lm->smeta_sec;
> +	rqd.nr_ppas = smeta_wr_sec;
> 	rqd.is_seq = 1;
> 
> -	for (i = 0; i < lm->smeta_sec; i++, paddr++) {
> -		struct pblk_sec_meta *meta = pblk_get_meta(pblk,
> -							   rqd.meta_list, i);
> +	for (i = 0; i < rqd.nr_ppas; i++, paddr++) {
> +		void *meta_list = rqd.meta_list;
> +		struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, line->id);
> +		int pos = pblk_ppa_to_pos(geo, ppa);
> 
> -		rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
> -		meta->lba = lba_list[paddr] = addr_empty;
> +		while (test_bit(pos, line->blk_bitmap)) {
> +			paddr += pblk->min_write_pgs;
> +			ppa = addr_to_gen_ppa(pblk, paddr, line->id);
> +			pos = pblk_ppa_to_pos(geo, ppa);
> +		}
> +
> +		rqd.ppa_list[i] = ppa;
> +		pblk_get_meta(pblk, meta_list, i)->lba = addr_empty;
> +		lba_list[paddr] = addr_empty;
> 	}
> 
> 	ret = pblk_submit_io_sync_sem(pblk, &rqd);
> @@ -822,8 +891,13 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
> 	if (rqd.error) {
> 		pblk_log_write_err(pblk, &rqd);
> 		ret = -EIO;
> +		goto clear_rqd;
> 	}
> 
> +	rq_writes--;
> +	if (rq_writes > 0)
> +		goto next_rq;
> +
> clear_rqd:
> 	pblk_free_rqd_meta(pblk, &rqd);
> 	return ret;
> @@ -1020,7 +1094,7 @@ static void pblk_line_setup_metadata(struct pblk_line *line,
> 	line->smeta = l_mg->sline_meta[meta_line];
> 	line->emeta = l_mg->eline_meta[meta_line];
> 
> -	memset(line->smeta, 0, lm->smeta_len);
> +	memset(line->smeta, 0, lm->smeta_len * lm->smeta_copies);
> 	memset(line->emeta->buf, 0, lm->emeta_len[0]);
> 
> 	line->emeta->mem = 0;
> @@ -1147,7 +1221,7 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
> 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
> 	u64 off;
> 	int bit = -1;
> -	int emeta_secs;
> +	int emeta_secs, smeta_secs;
> 
> 	line->sec_in_line = lm->sec_per_line;
> 
> @@ -1163,13 +1237,19 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
> 	}
> 
> 	/* Mark smeta metadata sectors as bad sectors */
> -	bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
> -	off = bit * geo->ws_opt;
> -	bitmap_set(line->map_bitmap, off, lm->smeta_sec);
> -	line->sec_in_line -= lm->smeta_sec;
> -	line->cur_sec = off + lm->smeta_sec;
> +	smeta_secs = lm->smeta_sec * lm->smeta_copies;
> +	bit = -1;
> +	while (smeta_secs) {
> +		bit = find_next_zero_bit(line->blk_bitmap, lm->blk_per_line,
> +					bit + 1);
> +		off = bit * geo->ws_opt;
> +		bitmap_set(line->map_bitmap, off, geo->ws_opt);
> +		line->cur_sec = off + geo->ws_opt;
> +		smeta_secs -= lm->smeta_sec;
> +	}
> +	line->sec_in_line -= (lm->smeta_sec * lm->smeta_copies);
> 
> -	if (init && pblk_line_smeta_write(pblk, line, off)) {
> +	if (init && pblk_line_smeta_write(pblk, line)) {
> 		pblk_debug(pblk, "line smeta I/O failed. Retry\n");
> 		return 0;
> 	}
> diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
> index 4211cd1..5717ad4 100644
> --- a/drivers/lightnvm/pblk-init.c
> +++ b/drivers/lightnvm/pblk-init.c
> @@ -27,6 +27,11 @@ static unsigned int write_buffer_size;
> module_param(write_buffer_size, uint, 0644);
> MODULE_PARM_DESC(write_buffer_size, "number of entries in a write buffer");
> 
> +static unsigned int smeta_copies = 1;
> +
> +module_param(smeta_copies, int, 0644);
> +MODULE_PARM_DESC(smeta_copies, "number of smeta copies");
> +
> struct pblk_global_caches {
> 	struct kmem_cache	*ws;
> 	struct kmem_cache	*rec;
> @@ -867,7 +872,8 @@ static int pblk_line_mg_init(struct pblk *pblk)
> 	 * emeta depends on the number of LUNs allocated to the pblk instance
> 	 */
> 	for (i = 0; i < PBLK_DATA_LINES; i++) {
> -		l_mg->sline_meta[i] = kmalloc(lm->smeta_len, GFP_KERNEL);
> +		l_mg->sline_meta[i] = kmalloc(lm->smeta_len
> +						* lm->smeta_copies, GFP_KERNEL);
> 		if (!l_mg->sline_meta[i])
> 			goto fail_free_smeta;
> 	}
> @@ -967,6 +973,12 @@ static int pblk_line_meta_init(struct pblk *pblk)
> 	lm->mid_thrs = lm->sec_per_line / 2;
> 	lm->high_thrs = lm->sec_per_line / 4;
> 	lm->meta_distance = (geo->all_luns / 2) * pblk->min_write_pgs;
> +	lm->smeta_copies = smeta_copies;
> +
> +	if (lm->smeta_copies < 1 || lm->smeta_copies > geo->all_luns) {
> +		pblk_err(pblk, "unsupported smeta copies parameter\n");
> +		return -EINVAL;
> +	}
> 
> 	/* Calculate necessary pages for smeta. See comment over struct
> 	 * line_smeta definition
> @@ -998,10 +1010,11 @@ static int pblk_line_meta_init(struct pblk *pblk)
> 
> 	lm->emeta_bb = geo->all_luns > i ? geo->all_luns - i : 0;
> 
> -	lm->min_blk_line = 1;
> -	if (geo->all_luns > 1)
> -		lm->min_blk_line += DIV_ROUND_UP(lm->smeta_sec +
> -					lm->emeta_sec[0], geo->clba);
> +	lm->min_blk_line = lm->smeta_copies;
> +	if (geo->all_luns > lm->smeta_copies) {
> +		lm->min_blk_line += DIV_ROUND_UP((lm->smeta_sec
> +			* lm->smeta_copies) + lm->emeta_sec[0], geo->clba);
> +	}
> 
> 	if (lm->min_blk_line > lm->blk_per_line) {
> 		pblk_err(pblk, "config. not supported. Min. LUN in line:%d\n",
> diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
> index 74e5b17..038931d 100644
> --- a/drivers/lightnvm/pblk-recovery.c
> +++ b/drivers/lightnvm/pblk-recovery.c
> @@ -51,7 +51,8 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
> 	if (!lba_list)
> 		return 1;
> 
> -	data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
> +	data_start = pblk_line_smeta_start(pblk, line)
> +					+ (lm->smeta_sec * lm->smeta_copies);
> 	data_end = line->emeta_ssec;
> 	nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas);
> 
> @@ -140,7 +141,8 @@ static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line)
> 	if (lm->blk_per_line - nr_bb != valid_chunks)
> 		pblk_err(pblk, "recovery line %d is bad\n", line->id);
> 
> -	pblk_update_line_wp(pblk, line, written_secs - lm->smeta_sec);
> +	pblk_update_line_wp(pblk, line, written_secs -
> +					(lm->smeta_sec * lm->smeta_copies));
> 
> 	return written_secs;
> }
> @@ -383,12 +385,14 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
> 	void *data;
> 	dma_addr_t dma_ppa_list, dma_meta_list;
> 	__le64 *lba_list;
> -	u64 paddr = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
> +	u64 paddr = pblk_line_smeta_start(pblk, line) +
> +					(lm->smeta_sec * lm->smeta_copies);
> 	bool padded = false;
> 	int rq_ppas, rq_len;
> 	int i, j;
> 	int ret;
> -	u64 left_ppas = pblk_sec_in_open_line(pblk, line) - lm->smeta_sec;
> +	u64 left_ppas = pblk_sec_in_open_line(pblk, line) -
> +					(lm->smeta_sec * lm->smeta_copies);
> 
> 	if (pblk_line_wps_are_unbalanced(pblk, line))
> 		pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id);
> @@ -722,7 +726,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
> 
> 		line = &pblk->lines[i];
> 
> -		memset(smeta, 0, lm->smeta_len);
> +		memset(smeta, 0, lm->smeta_len * lm->smeta_copies);
> 		line->smeta = smeta;
> 		line->lun_bitmap = ((void *)(smeta_buf)) +
> 						sizeof(struct line_smeta);
> diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c
> index b014957..944372c 100644
> --- a/drivers/lightnvm/pblk-rl.c
> +++ b/drivers/lightnvm/pblk-rl.c
> @@ -218,7 +218,8 @@ void pblk_rl_init(struct pblk_rl *rl, int budget, int threshold)
> 	unsigned int rb_windows;
> 
> 	/* Consider sectors used for metadata */
> -	sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
> +	sec_meta = ((lm->smeta_sec * lm->smeta_copies)
> +			+ lm->emeta_sec[0]) * l_mg->nr_free_lines;
> 	blk_meta = DIV_ROUND_UP(sec_meta, geo->clba);
> 
> 	rl->high = pblk->op_blks - blk_meta - lm->blk_per_line;
> diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
> index 3a84c8a..0999245 100644
> --- a/drivers/lightnvm/pblk.h
> +++ b/drivers/lightnvm/pblk.h
> @@ -547,6 +547,7 @@ struct pblk_line_mgmt {
> struct pblk_line_meta {
> 	unsigned int smeta_len;		/* Total length for smeta */
> 	unsigned int smeta_sec;		/* Sectors needed for smeta */
> +	unsigned int smeta_copies;	/* Number of smeta copies */
> 
> 	unsigned int emeta_len[4];	/* Lengths for emeta:
> 					 *  [0]: Total
> --
> 2.9.5

Looks good.


Reviewed-by: Javier González <javier@xxxxxxxxxxx>

Attachment: signature.asc
Description: Message signed with OpenPGP


[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux