is risky, because in case of read error on such a chunk, we are losing
all the data from whole line, what leads to silent data corruption.
This patch changes this behaviour and stores 2 copies of smeta (but
can be easily increased with kernel parameter to different value) in
order to provide higher reliability by storing mirrored copies of
smeta struct and providing possibility to failover to another copy of
that struct in case of read error. Such an approach ensures that copies
of that critical structures will be stored on different dies and thus
predicted UBER is multiple times higher
Signed-off-by: Igor Konopko <igor.j.konopko@xxxxxxxxx>
---
drivers/lightnvm/pblk-core.c | 125 ++++++++++++++++++++++++++++++++-------
drivers/lightnvm/pblk-init.c | 23 +++++--
drivers/lightnvm/pblk-recovery.c | 2 +-
drivers/lightnvm/pblk.h | 1 +
4 files changed, 123 insertions(+), 28 deletions(-)
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index a683d1f..4d5cd99 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -720,13 +720,14 @@ u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line)
return bit * geo->ws_opt;
}
-int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
+static int pblk_line_smeta_read_copy(struct pblk *pblk,
+ struct pblk_line *line, u64 paddr)
{
struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
struct pblk_line_meta *lm = &pblk->lm;
struct bio *bio;
struct nvm_rq rqd;
- u64 paddr = pblk_line_smeta_start(pblk, line);
int i, ret;
memset(&rqd, 0, sizeof(struct nvm_rq));
@@ -735,7 +736,8 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
if (ret)
return ret;
- bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
+ bio = bio_map_kern(dev->q, line->smeta,
+ lm->smeta_len / lm->smeta_copies, GFP_KERNEL);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
goto clear_rqd;
@@ -746,11 +748,23 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
rqd.bio = bio;
rqd.opcode = NVM_OP_PREAD;
- rqd.nr_ppas = lm->smeta_sec;
+ rqd.nr_ppas = lm->smeta_sec / lm->smeta_copies;
rqd.is_seq = 1;
- for (i = 0; i < lm->smeta_sec; i++, paddr++)
- rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
+ for (i = 0; i < rqd.nr_ppas; i++, paddr++) {
+ struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, line->id);
+ int pos = pblk_ppa_to_pos(geo, ppa);
+
+ while (test_bit(pos, line->blk_bitmap)) {
+ paddr += pblk->min_write_pgs;
+ ppa = addr_to_gen_ppa(pblk, paddr, line->id);
+ pos = pblk_ppa_to_pos(geo, ppa);
+ }
+
+ rqd.ppa_list[i] = ppa;
+ pblk_get_meta(pblk, rqd.meta_list, i)->lba =
+ cpu_to_le64(ADDR_EMPTY);
+ }
ret = pblk_submit_io_sync(pblk, &rqd);
if (ret) {
@@ -771,16 +785,63 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
return ret;
}
-static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
- u64 paddr)
+int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
+{
+ struct pblk_line_meta *lm = &pblk->lm;
+ int i, ret = 0, smeta_sec = lm->smeta_sec / lm->smeta_copies;
+ u64 paddr = pblk_line_smeta_start(pblk, line);
+
+ for (i = 0; i < lm->smeta_copies; i++) {
+ ret = pblk_line_smeta_read_copy(pblk, line,
+ paddr + (i * smeta_sec));
+ if (!ret) {
+ /*
+ * Just one successfully read copy of smeta is
+ * enough for us for recovery, don't need to
+ * read another one.
+ */
+ return ret;
+ }
+ }
+ return ret;
+}
+
+static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line)
{
struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
struct pblk_line_meta *lm = &pblk->lm;
struct bio *bio;
struct nvm_rq rqd;
__le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
- int i, ret;
+ u64 paddr = 0;
+ int smeta_cpy_len = lm->smeta_len / lm->smeta_copies;
+ int smeta_cpy_sec = lm->smeta_sec / lm->smeta_copies;
+ int i, ret, rq_writes;
+
+ /*
+ * Check if we can write all the smeta copies with
+ * a single write command.
+ * If yes -> copy smeta sector into multiple copies
+ * in buffer to write.
+ * If no -> issue writes one by one using the same
+ * buffer space.
+ * Only if all the copies are written correctly
+ * we are treating this line as valid for proper
+ * UBER reliability.
+ */
+ if (lm->smeta_sec > pblk->max_write_pgs) {
+ rq_writes = lm->smeta_copies;
+ } else {
+ rq_writes = 1;
+ for (i = 1; i < lm->smeta_copies; i++) {
+ memcpy(line->smeta + i * smeta_cpy_len,
+ line->smeta, smeta_cpy_len);
+ }
+ smeta_cpy_len = lm->smeta_len;
+ smeta_cpy_sec = lm->smeta_sec;
+ }