Re: [PATCH 2/2] lightnvm: pblk: retrieve chunk metadata on erase

Javier Gonzalez <javier@xxxxxxxxxxxx> · Mon, 17 Sep 2018 10:17:02 +0000

> On 17 Sep 2018, at 10.26, Matias Bjørling <mb@xxxxxxxxxxx> wrote:
> 
> On 09/11/2018 01:35 PM, Javier González wrote:
>> On the OCSSD 2.0 spec, the device populates the metadata pointer (if
>> provided) when a chunk is reset. Implement this path in pblk and use it
>> for sanity chunk checks.
>> For 1.2, reset the write pointer and the state on core so that the erase
>> path is transparent to pblk wrt OCSSD version.
>> Signed-off-by: Javier González <javier@xxxxxxxxxxxx>
>> ---
>>  drivers/lightnvm/core.c      | 44 ++++++++++++++++++++++++++++++++++++--
>>  drivers/lightnvm/pblk-core.c | 51 +++++++++++++++++++++++++++++++++-----------
>>  2 files changed, 80 insertions(+), 15 deletions(-)
>> diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
>> index efb976a863d2..dceaae4e795f 100644
>> --- a/drivers/lightnvm/core.c
>> +++ b/drivers/lightnvm/core.c
>> @@ -750,9 +750,40 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
>>  }
>>  EXPORT_SYMBOL(nvm_submit_io);
>>  +/* Take only addresses in generic format */
>> +static void nvm_set_chunk_state_12(struct nvm_dev *dev, struct nvm_rq *rqd)
>> +{
>> +	struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
>> +	int i;
>> +
>> +	for (i = 0; i < rqd->nr_ppas; i++) {
>> +		struct ppa_addr ppa;
>> +		struct nvm_chk_meta *chunk;
>> +
>> +		chunk = ((struct nvm_chk_meta *)rqd->meta_list) + i;
>> +
>> +		if (rqd->error)
>> +			chunk->state = NVM_CHK_ST_OFFLINE;
>> +		else
>> +			chunk->state = NVM_CHK_ST_FREE;
>> +
>> +		chunk->wp = 0;
>> +		chunk->wi = 0;
>> +		chunk->type = NVM_CHK_TP_W_SEQ;
>> +		chunk->cnlb = dev->geo.clba;
>> +
>> +		/* recalculate slba for the chunk */
>> +		ppa = ppa_list[i];
>> +		ppa.g.pg = ppa.g.pl = ppa.g.sec = 0;
>> +
>> +		chunk->slba = generic_to_dev_addr(dev, ppa).ppa;
>> +	}
>> +}
>> +
>>  int nvm_submit_io_sync(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
>>  {
>>  	struct nvm_dev *dev = tgt_dev->parent;
>> +	struct nvm_geo *geo = &dev->geo;
>>  	int ret;
>>    	if (!dev->ops->submit_io_sync)
>> @@ -765,8 +796,12 @@ int nvm_submit_io_sync(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
>>    	/* In case of error, fail with right address format */
>>  	ret = dev->ops->submit_io_sync(dev, rqd);
>> +
>>  	nvm_rq_dev_to_tgt(tgt_dev, rqd);
>>  +	if (geo->version == NVM_OCSSD_SPEC_12 && rqd->opcode == NVM_OP_ERASE)
>> +		nvm_set_chunk_state_12(dev, rqd);
>> +
>>  	return ret;
>>  }
>>  EXPORT_SYMBOL(nvm_submit_io_sync);
>> @@ -775,10 +810,15 @@ void nvm_end_io(struct nvm_rq *rqd)
>>  {
>>  	struct nvm_tgt_dev *tgt_dev = rqd->dev;
>>  -	/* Convert address space */
>> -	if (tgt_dev)
>> +	if (tgt_dev) {
>> +		/* Convert address space */
>>  		nvm_rq_dev_to_tgt(tgt_dev, rqd);
>>  +		if (tgt_dev->geo.version == NVM_OCSSD_SPEC_12 &&
>> +						rqd->opcode == NVM_OP_ERASE)
>> +			nvm_set_chunk_state_12(tgt_dev->parent, rqd);
>> +	}
>> +
>>  	if (rqd->end_io)
>>  		rqd->end_io(rqd);
>>  }
>> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
>> index 417d12b274da..80f0ec756672 100644
>> --- a/drivers/lightnvm/pblk-core.c
>> +++ b/drivers/lightnvm/pblk-core.c
>> @@ -79,7 +79,7 @@ static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
>>  {
>>  	struct nvm_tgt_dev *dev = pblk->dev;
>>  	struct nvm_geo *geo = &dev->geo;
>> -	struct nvm_chk_meta *chunk;
>> +	struct nvm_chk_meta *chunk, *dev_chunk;
>>  	struct pblk_line *line;
>>  	int pos;
>>  @@ -89,22 +89,39 @@ static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
>>    	atomic_dec(&line->left_seblks);
>>  +	/* pblk submits a single erase per command */
>> +	dev_chunk = rqd->meta_list;
>> +
>> +	if (dev_chunk->slba != chunk->slba || dev_chunk->wp)
>> +		print_chunk(pblk, chunk, "corrupted erase chunk", 0);
>> +
>> +	memcpy(chunk, dev_chunk, sizeof(struct nvm_chk_meta));
>> +
>>  	if (rqd->error) {
>>  		trace_pblk_chunk_reset(pblk_disk_name(pblk),
>>  				&rqd->ppa_addr, PBLK_CHUNK_RESET_FAILED);
>>  -		chunk->state = NVM_CHK_ST_OFFLINE;
>> +#ifdef CONFIG_NVM_PBLK_DEBUG
>> +		if (chunk->state != NVM_CHK_ST_OFFLINE)
>> +			print_chunk(pblk, chunk,
>> +					"corrupted erase chunk state", 0);
>> +#endif
>>  		pblk_mark_bb(pblk, line, rqd->ppa_addr);
>>  	} else {
>>  		trace_pblk_chunk_reset(pblk_disk_name(pblk),
>>  				&rqd->ppa_addr, PBLK_CHUNK_RESET_DONE);
>>  -		chunk->state = NVM_CHK_ST_FREE;
>> +#ifdef CONFIG_NVM_PBLK_DEBUG
>> +		if (chunk->state != NVM_CHK_ST_FREE)
>> +			print_chunk(pblk, chunk,
>> +					"corrupted erase chunk state", 0);
>> +#endif >   	}
>>    	trace_pblk_chunk_state(pblk_disk_name(pblk), &rqd->ppa_addr,
>>  				chunk->state);
>>  +	pblk_free_rqd_meta(pblk, rqd);
>>  	atomic_dec(&pblk->inflight_io);
>>  }
>>  @@ -952,14 +969,16 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
>>  	return ret;
>>  }
>>  -static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
>> -			    struct ppa_addr ppa)
>> +static int pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
>> +			   struct ppa_addr ppa)
>>  {
>>  	rqd->opcode = NVM_OP_ERASE;
>>  	rqd->ppa_addr = ppa;
>>  	rqd->nr_ppas = 1;
>>  	rqd->is_seq = 1;
>>  	rqd->bio = NULL;
>> +
>> +	return pblk_alloc_rqd_meta(pblk, rqd);
>>  }
>>    static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
>> @@ -967,10 +986,12 @@ static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
>>  	struct nvm_rq rqd = {NULL};
>>  	int ret;
>>  +	ret = pblk_setup_e_rq(pblk, &rqd, ppa);
>> +	if (ret)
>> +		return ret;
>> +
>>  	trace_pblk_chunk_reset(pblk_disk_name(pblk), &ppa,
>> -				PBLK_CHUNK_RESET_START);
>> -
>> -	pblk_setup_e_rq(pblk, &rqd, ppa);
>> +						PBLK_CHUNK_RESET_START);
>>    	/* The write thread schedules erases so that it minimizes disturbances
>>  	 * with writes. Thus, there is no need to take the LUN semaphore.
>> @@ -1774,11 +1795,15 @@ void pblk_line_put_wq(struct kref *ref)
>>  int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
>>  {
>>  	struct nvm_rq *rqd;
>> -	int err;
>> +	int ret;
>>    	rqd = pblk_alloc_rqd(pblk, PBLK_ERASE);
>>  -	pblk_setup_e_rq(pblk, rqd, ppa);
>> +	ret = pblk_setup_e_rq(pblk, rqd, ppa);
>> +	if (ret) {
>> +		pblk_free_rqd(pblk, rqd, PBLK_ERASE);
>> +		return ret;
>> +	}
>>    	rqd->end_io = pblk_end_io_erase;
>>  	rqd->private = pblk;
>> @@ -1789,8 +1814,8 @@ int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
>>  	/* The write thread schedules erases so that it minimizes disturbances
>>  	 * with writes. Thus, there is no need to take the LUN semaphore.
>>  	 */
>> -	err = pblk_submit_io(pblk, rqd);
>> -	if (err) {
>> +	ret = pblk_submit_io(pblk, rqd);
>> +	if (ret) {
>>  		struct nvm_tgt_dev *dev = pblk->dev;
>>  		struct nvm_geo *geo = &dev->geo;
>>  @@ -1799,7 +1824,7 @@ int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
>>  					pblk_ppa_to_pos(geo, ppa));
>>  	}
>>  -	return err;
>> +	return ret;
>>  }
>>    struct pblk_line *pblk_line_get_data(struct pblk *pblk)
> 
> I'll lean towards not taking this patch. The usecase adds extra logic,
> overhead, complexity, and its only usecase for debugging.
> 
> I am not an advocate of defensive coding in the fast path. For
> example, if an SLBA is inequal with the chunk slba, the drive is
> buggy, and does not behave according to the spec. and therefore does
> not warrant the checks. These type of bugs can be caught in the
> general drive testing suite.

The use case for this patch is to retrieve the wear-leveling information
and the CNLB, which only the drive knows about. Also, this will help
removing the bitmap metadata per line and maintain a per-chunk metadata
instead, reusing the chunk structure retrieved on erase. I want to have
this in before pushing the rest of the patches as they will be depending
on it.

As for the checks, they are sanity that I believe do not hurt, but I'm
ok with removing them (or putting them under debug flags).

Javier
Attachment:
signature.asc

Description: Message signed with OpenPGP