Hi On Thu, 2 Jan 2020, Maged Mokhtar wrote: > Any feedback on this patch please. This will definitely not work for persistent memory - it could corrupt data if a crash happens. The CPU can flush data in arbitrary order and it may happen that the seq count is flushed before the pertaining data. As for SSD mode - we could avoid updating the refcount in the superblock, but it wouldn't be much helpful. I.e. normally, commit is done this way: 1. submit data writes 2. submit metadata writes 3. flush disk cache 4. submit the write of superblock with increased seq_count 5. flush disk cache If we wanted to avoid writing the seq_count, we would need to change it to: 1. submit data writes 2. flush disk cache 3. submit metadata writes 4. flush disk cache - i.e. it sill needs two disk cache flushes per one commit request - and it is not much better than the existing solution. Mikulas > On 06/12/2019 21:03, Maged Mokhtar wrote: > > Removes seq_count from super block. Currently the super block gets written > > in each commit to update the seq_count which is just used when the target is > > restarted/resumed. This extra iop has a performance impact on small block > > size writes which do FUA/sync on every request. A 4k sync write currently > > requires 3 write ops: submitted data, metadata + super block seq_count > > update, removal of seq_count update reduces required write ops to 2. > > > > Rebuild of seq_count at start/resumption can be done quickly by looping > > through memory entry metadata within the resume() function. > > > > Signed-off-by: Maged Mokhtar <mmokhtar@xxxxxxxxxxx> > > --- > > drivers/md/dm-writecache.c | 56 ++++++++++++++++++++++++++--------- > > 1 file changed, 42 insertions(+), 14 deletions(-) > > > > --- a/drivers/md/dm-writecache.c 2019-12-06 03:07:53.000000000 -0800 > > +++ b/drivers/md/dm-writecache.c 2019-12-06 09:25:45.000000000 -0800 > > @@ -52,7 +52,8 @@ do { \ > > #endif > > > > #define MEMORY_SUPERBLOCK_MAGIC 0x23489321 > > -#define MEMORY_SUPERBLOCK_VERSION 1 > > +#define MEMORY_SUPERBLOCK_VERSION_1 1 > > +#define MEMORY_SUPERBLOCK_VERSION_2 2 > > > > struct wc_memory_entry { > > __le64 original_sector; > > @@ -67,7 +68,6 @@ struct wc_memory_superblock { > > __le32 block_size; > > __le32 pad; > > __le64 n_blocks; > > - __le64 seq_count; > > }; > > __le64 padding[8]; > > }; > > @@ -380,6 +380,41 @@ static uint64_t read_seq_count(struct dm > > #endif > > } > > > > +static uint64_t read_last_seq_count(struct dm_writecache *wc) > > +{ > > + size_t b; > > + uint64_t last_seq_count = 0; > > + uint64_t seq_count; > > + __le64 empty = cpu_to_le64(-1); > > + > > + if (WC_MODE_PMEM(wc)) { > > + struct wc_memory_entry wme; > > + for (b = 0; b < wc->n_blocks; b++) { > > + BUG_ON(memcpy_mcsafe(&wme, &sb(wc)->entries[b], > > + sizeof(struct wc_memory_entry))); > > + if (wme.seq_count != empty) { > > + seq_count = le64_to_cpu(wme.seq_count); > > + if (last_seq_count < seq_count) > > + last_seq_count = seq_count; > > + } > > + } > > + } > > + else { > > + struct wc_memory_entry *p = &sb(wc)->entries[0]; > > + b = wc->n_blocks; > > + while (0 < b) { > > + if (p->seq_count != empty) { > > + seq_count = le64_to_cpu(p->seq_count); > > + if (last_seq_count < seq_count) > > + last_seq_count = seq_count; > > + } > > + p++; > > + b--; > > + } > > + } > > + return last_seq_count; > > +} > > + > > static void clear_seq_count(struct dm_writecache *wc, struct wc_entry *e) > > { > > #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS > > @@ -730,8 +765,6 @@ static void writecache_flush(struct dm_w > > writecache_wait_for_ios(wc, WRITE); > > > > wc->seq_count++; > > - pmem_assign(sb(wc)->seq_count, cpu_to_le64(wc->seq_count)); > > - writecache_flush_region(wc, &sb(wc)->seq_count, sizeof > > sb(wc)->seq_count); > > writecache_commit_flushed(wc); > > > > wc->overwrote_committed = false; > > @@ -876,7 +909,6 @@ static void writecache_resume(struct dm_ > > struct dm_writecache *wc = ti->private; > > size_t b; > > bool need_flush = false; > > - __le64 sb_seq_count; > > int r; > > > > wc_lock(wc); > > @@ -894,12 +926,7 @@ static void writecache_resume(struct dm_ > > } > > wc->freelist_size = 0; > > > > - r = memcpy_mcsafe(&sb_seq_count, &sb(wc)->seq_count, sizeof(uint64_t)); > > - if (r) { > > - writecache_error(wc, r, "hardware memory error when reading > > superblock: %d", r); > > - sb_seq_count = cpu_to_le64(0); > > - } > > - wc->seq_count = le64_to_cpu(sb_seq_count); > > + wc->seq_count = read_last_seq_count(wc) + 1; > > > > #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS > > for (b = 0; b < wc->n_blocks; b++) { > > @@ -1757,10 +1784,9 @@ static int init_memory(struct dm_writeca > > > > for (b = 0; b < ARRAY_SIZE(sb(wc)->padding); b++) > > pmem_assign(sb(wc)->padding[b], cpu_to_le64(0)); > > - pmem_assign(sb(wc)->version, cpu_to_le32(MEMORY_SUPERBLOCK_VERSION)); > > + pmem_assign(sb(wc)->version, cpu_to_le32(MEMORY_SUPERBLOCK_VERSION_2)); > > pmem_assign(sb(wc)->block_size, cpu_to_le32(wc->block_size)); > > pmem_assign(sb(wc)->n_blocks, cpu_to_le64(wc->n_blocks)); > > - pmem_assign(sb(wc)->seq_count, cpu_to_le64(0)); > > > > for (b = 0; b < wc->n_blocks; b++) > > write_original_sector_seq_count(wc, &wc->entries[b], -1, -1); > > @@ -2159,11 +2185,13 @@ invalid_optional: > > goto bad; > > } > > > > - if (le32_to_cpu(s.version) != MEMORY_SUPERBLOCK_VERSION) { > > + if (le32_to_cpu(s.version) != MEMORY_SUPERBLOCK_VERSION_1 && > > + le32_to_cpu(s.version) != MEMORY_SUPERBLOCK_VERSION_2) { > > ti->error = "Invalid version in the superblock"; > > r = -EINVAL; > > goto bad; > > } > > + pmem_assign(sb(wc)->version, cpu_to_le32(MEMORY_SUPERBLOCK_VERSION_2)); > > > > if (le32_to_cpu(s.block_size) != wc->block_size) { > > ti->error = "Block size does not match superblock"; > > -- > Maged Mokhtar > CEO PetaSAN > 4 Emad El Deen Kamel > Cairo 11371, Egypt > www.petasan.org > +201006979931 > skype: maged.mokhtar >
-- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel