Re: [PATCH 2/5] lightnvm: pblk: Helpers for OOB metadata

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 09.10.2018 11:02, Hans Holmberg wrote:
Hi Igor!

One important thing: this patch breaks the on-disk-storage format so
that needs to be handled(see my comment on this) and  some additional
nitpicks below.

Thanks,
Hans

On Fri, Oct 5, 2018 at 3:38 PM Igor Konopko <igor.j.konopko@xxxxxxxxx> wrote:

Currently pblk assumes that size of OOB metadata on drive is always
equal to size of pblk_sec_meta struct. This commit add helpers which will
allow to handle different sizes of OOB metadata on drive.

Signed-off-by: Igor Konopko <igor.j.konopko@xxxxxxxxx>
---
  drivers/lightnvm/pblk-core.c     |  6 ++---
  drivers/lightnvm/pblk-map.c      | 21 ++++++++++------
  drivers/lightnvm/pblk-read.c     | 41 +++++++++++++++++++-------------
  drivers/lightnvm/pblk-recovery.c | 14 ++++++-----
  drivers/lightnvm/pblk.h          | 37 +++++++++++++++++++++++++++-
  5 files changed, 86 insertions(+), 33 deletions(-)

diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 6944aac43b01..7cb39d84c833 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -743,7 +743,6 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
         rqd.opcode = NVM_OP_PREAD;
         rqd.nr_ppas = lm->smeta_sec;
         rqd.is_seq = 1;
-
         for (i = 0; i < lm->smeta_sec; i++, paddr++)
                 rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);

@@ -796,10 +795,11 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
         rqd.is_seq = 1;

         for (i = 0; i < lm->smeta_sec; i++, paddr++) {
-               struct pblk_sec_meta *meta_list = rqd.meta_list;
+               void *meta_list = rqd.meta_list;

                 rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
-               meta_list[i].lba = lba_list[paddr] = addr_empty;
+               pblk_set_meta_lba(pblk, meta_list, i, ADDR_EMPTY);
+               lba_list[paddr] = addr_empty;
         }

         ret = pblk_submit_io_sync_sem(pblk, &rqd);
diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
index 6dcbd44e3acb..4c7a9909308e 100644
--- a/drivers/lightnvm/pblk-map.c
+++ b/drivers/lightnvm/pblk-map.c
@@ -22,7 +22,7 @@
  static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
                               struct ppa_addr *ppa_list,
                               unsigned long *lun_bitmap,
-                             struct pblk_sec_meta *meta_list,
+                             void *meta_list,
                               unsigned int valid_secs)
  {
         struct pblk_line *line = pblk_line_get_data(pblk);
@@ -68,14 +68,15 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
                         kref_get(&line->ref);
                         w_ctx = pblk_rb_w_ctx(&pblk->rwb, sentry + i);
                         w_ctx->ppa = ppa_list[i];
-                       meta_list[i].lba = cpu_to_le64(w_ctx->lba);
+                       pblk_set_meta_lba(pblk, meta_list, i, w_ctx->lba);
                         lba_list[paddr] = cpu_to_le64(w_ctx->lba);
                         if (lba_list[paddr] != addr_empty)
                                 line->nr_valid_lbas++;
                         else
                                 atomic64_inc(&pblk->pad_wa);
                 } else {
-                       lba_list[paddr] = meta_list[i].lba = addr_empty;
+                       lba_list[paddr] = addr_empty;
+                       pblk_set_meta_lba(pblk, meta_list, i, ADDR_EMPTY);
                         __pblk_map_invalidate(pblk, line, paddr);
                 }
         }
@@ -88,7 +89,7 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
                  unsigned long *lun_bitmap, unsigned int valid_secs,
                  unsigned int off)
  {
-       struct pblk_sec_meta *meta_list = rqd->meta_list;
+       void *meta_list = rqd->meta_list;
         struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
         unsigned int map_secs;
         int min = pblk->min_write_pgs;
@@ -97,7 +98,10 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
         for (i = off; i < rqd->nr_ppas; i += min) {
                 map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
                 if (pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
-                                       lun_bitmap, &meta_list[i], map_secs)) {
+                                       lun_bitmap,
+                                       pblk_get_meta_buffer(pblk,
+                                                            meta_list, i),
+                                       map_secs)) {
                         bio_put(rqd->bio);
                         pblk_free_rqd(pblk, rqd, PBLK_WRITE);
                         pblk_pipeline_stop(pblk);
@@ -113,7 +117,7 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
         struct nvm_tgt_dev *dev = pblk->dev;
         struct nvm_geo *geo = &dev->geo;
         struct pblk_line_meta *lm = &pblk->lm;
-       struct pblk_sec_meta *meta_list = rqd->meta_list;
+       void *meta_list = rqd->meta_list;
         struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
         struct pblk_line *e_line, *d_line;
         unsigned int map_secs;
@@ -123,7 +127,10 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
         for (i = 0; i < rqd->nr_ppas; i += min) {
                 map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
                 if (pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
-                                       lun_bitmap, &meta_list[i], map_secs)) {
+                                       lun_bitmap,
+                                       pblk_get_meta_buffer(pblk,
+                                                            meta_list, i),
+                                       map_secs)) {
                         bio_put(rqd->bio);
                         pblk_free_rqd(pblk, rqd, PBLK_WRITE);
                         pblk_pipeline_stop(pblk);
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 08f6ebd4bc48..6584a2588f61 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -43,7 +43,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
                                  struct bio *bio, sector_t blba,
                                  unsigned long *read_bitmap)
  {
-       struct pblk_sec_meta *meta_list = rqd->meta_list;
+       void *meta_list = rqd->meta_list;
         struct ppa_addr ppas[NVM_MAX_VLBA];
         int nr_secs = rqd->nr_ppas;
         bool advanced_bio = false;
@@ -58,7 +58,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
  retry:
                 if (pblk_ppa_empty(p)) {
                         WARN_ON(test_and_set_bit(i, read_bitmap));
-                       meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
+                       pblk_set_meta_lba(pblk, meta_list, i, ADDR_EMPTY);

                         if (unlikely(!advanced_bio)) {
                                 bio_advance(bio, (i) * PBLK_EXPOSED_PAGE_SIZE);
@@ -78,7 +78,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
                                 goto retry;
                         }
                         WARN_ON(test_and_set_bit(i, read_bitmap));
-                       meta_list[i].lba = cpu_to_le64(lba);
+                       pblk_set_meta_lba(pblk, meta_list, i, lba);
                         advanced_bio = true;
  #ifdef CONFIG_NVM_PBLK_DEBUG
                         atomic_long_inc(&pblk->cache_reads);
@@ -105,12 +105,15 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
  static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd,
                                 sector_t blba)
  {
-       struct pblk_sec_meta *meta_lba_list = rqd->meta_list;
+       void *meta_list = rqd->meta_list;
         int nr_lbas = rqd->nr_ppas;
         int i;

+       if (!pblk_is_oob_meta_supported(pblk))
+               return;
+
         for (i = 0; i < nr_lbas; i++) {
-               u64 lba = le64_to_cpu(meta_lba_list[i].lba);
+               u64 lba = pblk_get_meta_lba(pblk, meta_list, i);

                 if (lba == ADDR_EMPTY)
                         continue;
@@ -134,9 +137,12 @@ static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd,
  static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
                                  u64 *lba_list, int nr_lbas)
  {
-       struct pblk_sec_meta *meta_lba_list = rqd->meta_list;
+       void *meta_lba_list = rqd->meta_list;
         int i, j;

+       if (!pblk_is_oob_meta_supported(pblk))
+               return;
+
         for (i = 0, j = 0; i < nr_lbas; i++) {
                 u64 lba = lba_list[i];
                 u64 meta_lba;
@@ -144,7 +150,7 @@ static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
                 if (lba == ADDR_EMPTY)
                         continue;

-               meta_lba = le64_to_cpu(meta_lba_list[j].lba);
+               meta_lba = pblk_get_meta_lba(pblk, meta_lba_list, j);

                 if (lba != meta_lba) {
  #ifdef CONFIG_NVM_PBLK_DEBUG
@@ -219,7 +225,7 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
         struct bio *new_bio = rqd->bio;
         struct bio *bio = pr_ctx->orig_bio;
         struct bio_vec src_bv, dst_bv;
-       struct pblk_sec_meta *meta_list = rqd->meta_list;
+       void *meta_list = rqd->meta_list;
         int bio_init_idx = pr_ctx->bio_init_idx;
         unsigned long *read_bitmap = pr_ctx->bitmap;
         int nr_secs = pr_ctx->orig_nr_secs;
@@ -237,8 +243,10 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
         }

         for (i = 0; i < nr_secs; i++) {
-               pr_ctx->lba_list_media[i] = meta_list[i].lba;
-               meta_list[i].lba = pr_ctx->lba_list_mem[i];
+               pr_ctx->lba_list_media[i] = pblk_get_meta_lba(pblk,
+                                                       meta_list, i);
+               pblk_set_meta_lba(pblk, meta_list, i,
+                                 pr_ctx->lba_list_mem[i]);
         }

         /* Fill the holes in the original bio */
@@ -250,7 +258,8 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
                 line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]);
                 kref_put(&line->ref, pblk_line_put);

-               meta_list[hole].lba = pr_ctx->lba_list_media[i];
+               pblk_set_meta_lba(pblk, meta_list, hole,
+                                 pr_ctx->lba_list_media[i]);

                 src_bv = new_bio->bi_io_vec[i++];
                 dst_bv = bio->bi_io_vec[bio_init_idx + hole];
@@ -286,7 +295,7 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
                             unsigned long *read_bitmap,
                             int nr_holes)
  {
-       struct pblk_sec_meta *meta_list = rqd->meta_list;
+       void *meta_list = rqd->meta_list;
         struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
         struct pblk_pr_ctx *pr_ctx;
         struct bio *new_bio, *bio = r_ctx->private;
@@ -308,7 +317,7 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
                 goto fail_free_pages;

         for (i = 0; i < nr_secs; i++)
-               pr_ctx->lba_list_mem[i] = meta_list[i].lba;
+               pr_ctx->lba_list_mem[i] = pblk_get_meta_lba(pblk, meta_list, i);

         new_bio->bi_iter.bi_sector = 0; /* internal bio */
         bio_set_op_attrs(new_bio, REQ_OP_READ, 0);
@@ -373,7 +382,7 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
  static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
                          sector_t lba, unsigned long *read_bitmap)
  {
-       struct pblk_sec_meta *meta_list = rqd->meta_list;
+       void *meta_list = rqd->meta_list;
         struct ppa_addr ppa;

         pblk_lookup_l2p_seq(pblk, &ppa, lba, 1);
@@ -385,7 +394,7 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
  retry:
         if (pblk_ppa_empty(ppa)) {
                 WARN_ON(test_and_set_bit(0, read_bitmap));
-               meta_list[0].lba = cpu_to_le64(ADDR_EMPTY);
+               pblk_set_meta_lba(pblk, meta_list, 0, ADDR_EMPTY);
                 return;
         }

@@ -399,7 +408,7 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
                 }

                 WARN_ON(test_and_set_bit(0, read_bitmap));
-               meta_list[0].lba = cpu_to_le64(lba);
+               pblk_set_meta_lba(pblk, meta_list, 0, lba);

  #ifdef CONFIG_NVM_PBLK_DEBUG
                 atomic_long_inc(&pblk->cache_reads);
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index 5740b7509bd8..fa63f9fa5ba8 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -124,7 +124,7 @@ static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line)

  struct pblk_recov_alloc {
         struct ppa_addr *ppa_list;
-       struct pblk_sec_meta *meta_list;
+       void *meta_list;
         struct nvm_rq *rqd;
         void *data;
         dma_addr_t dma_ppa_list;
@@ -158,7 +158,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
  {
         struct nvm_tgt_dev *dev = pblk->dev;
         struct nvm_geo *geo = &dev->geo;
-       struct pblk_sec_meta *meta_list;
+       void *meta_list;
         struct pblk_pad_rq *pad_rq;
         struct nvm_rq *rqd;
         struct bio *bio;
@@ -242,7 +242,8 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
                         dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);

                         pblk_map_invalidate(pblk, dev_ppa);
-                       lba_list[w_ptr] = meta_list[i].lba = addr_empty;
+                       lba_list[w_ptr] = addr_empty;
+                       pblk_set_meta_lba(pblk, meta_list, i, ADDR_EMPTY);
                         rqd->ppa_list[i] = dev_ppa;
                 }
         }
@@ -325,6 +326,7 @@ static int pblk_line_wp_is_unbalanced(struct pblk *pblk,
                         return 1;
                 else if (chunk->wp < line_wp)
                         line_wp = chunk->wp;
+

Please remove the introduced newline

         }

         return 0;
@@ -336,7 +338,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
         struct nvm_tgt_dev *dev = pblk->dev;
         struct nvm_geo *geo = &dev->geo;
         struct ppa_addr *ppa_list;
-       struct pblk_sec_meta *meta_list;
+       void *meta_list;
         struct nvm_rq *rqd;
         struct bio *bio;
         void *data;
@@ -434,7 +436,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
         }

         for (i = 0; i < rqd->nr_ppas; i++) {
-               u64 lba = le64_to_cpu(meta_list[i].lba);
+               u64 lba = pblk_get_meta_lba(pblk, meta_list, i);

                 lba_list[paddr++] = cpu_to_le64(lba);

@@ -463,7 +465,7 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
         struct nvm_geo *geo = &dev->geo;
         struct nvm_rq *rqd;
         struct ppa_addr *ppa_list;
-       struct pblk_sec_meta *meta_list;
+       void *meta_list;
         struct pblk_recov_alloc p;
         void *data;
         dma_addr_t dma_ppa_list, dma_meta_list;
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index aea09879636f..53156b6f99a3 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -87,7 +87,6 @@ enum {
  };

  struct pblk_sec_meta {
-       u64 reserved;
         __le64 lba;
  };

It's nice to reduce the required metadata size, but this silently
breaks pblk the on-disk-storage format. We can't have that.

I suggest breaking out this change as a separate patch that also
increases SMETA_VERSION_MAJOR and instructs the user via the kernel
log to migrate the data manually (or factory reset the decice/pblk
instance).


Sure, for now we can keep 16b of meta. I hadn't thought about breaking on-disk-storage format, so thanks for that comment. I will revert that part for now.



@@ -1366,4 +1365,40 @@ static inline char *pblk_disk_name(struct pblk *pblk)

         return disk->disk_name;
  }
+
+static inline int pblk_is_oob_meta_supported(struct pblk *pblk)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+
+       /* Pblk uses OOB meta to store LBA of given physical sector.
+        * The LBA is eventually used in recovery mode and/or for handling
+        * telemetry events (e.g., relocate sector).
+        */
+
+       return (geo->sos >= sizeof(struct pblk_sec_meta));
+}
+
+static inline struct pblk_sec_meta *pblk_get_meta_buffer(struct pblk *pblk,
+                                                        void *meta_ptr,

_ptr is surperflous, we know it's a pointer already.

Sure, makes sense, I can change that.


+                                                        int index)

The name of the function is misleading, maybe pblk_get_sec_meta instead?

+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+
+       return meta_ptr + max_t(int, geo->sos, sizeof(struct pblk_sec_meta))
+               * index;

What is the max_t for? How could pblk_sec_meta ever be bigger than geo->sos?


In patch 5 of that series I'm implementing handling for case when geo->sos is 0, so that's why I use max_t here as preparation for the next patch.

+}
+
+static inline void pblk_set_meta_lba(struct pblk *pblk, void *meta_ptr,
+                                    int index, u64 lba)
+{
+       pblk_get_meta_buffer(pblk, meta_ptr, index)->lba = cpu_to_le64(lba);
+}
+
+static inline u64 pblk_get_meta_lba(struct pblk *pblk, void *meta_ptr,
+                                   int index)
+{
+       return le64_to_cpu(pblk_get_meta_buffer(pblk, meta_ptr, index)->lba);
+}

I would prefer having these functions get/set lbas of type __le64
instead, as we always have to update the emeta lba list at the same
time we set the oob metadata, and the emeta lba list is __le64,
It's easy to mess this up and byte-ordering bugs are hard to detect.


Sure, makes sense, I can change that.

  #endif /* PBLK_H_ */
--
2.17.1




[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux