Re: [PATCH 4/4 v2] persistent-data: reduce lock contention while walking the btree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



fio results (many jobs, large queue depth, 64 core machine, preallocated thin):

upstream:
    READ: bw=303MiB/s (318MB/s), 15.7MiB/s-24.9MiB/s (16.5MB/s-26.1MB/s), io=9093MiB (9535MB), run=30004-30007msec
    WRITE: bw=304MiB/s (318MB/s), 15.7MiB/s-24.8MiB/s (16.5MB/s-26.0MB/s), io=9108MiB (9550MB), run=30004-30007msec

Mikulas' patches:
    READ: bw=524MiB/s (549MB/s), 32.4MiB/s-32.9MiB/s (34.0MB/s-34.5MB/s), io=15.3GiB (16.5GB), run=30001-30002msec
    WRITE: bw=524MiB/s (550MB/s), 32.5MiB/s-32.9MiB/s (34.1MB/s-34.5MB/s), io=15.4GiB (16.5GB), run=30001-30002msec

My patches:
    READ: bw=538MiB/s (564MB/s), 33.4MiB/s-33.7MiB/s (35.1MB/s-35.4MB/s), io=15.8GiB (16.9GB), run=30001-30002msec
    WRITE: bw=539MiB/s (565MB/s), 33.4MiB/s-33.8MiB/s (35.0MB/s-35.4MB/s), io=15.8GiB (16.9GB), run=30001-30002msec

Both our approaches give similar results.  Mine is slightly faster because the changes are all internal to dm-bufio, so everything
benefits from the added concurrency not just the btrees (eg, space maps).

As mentioned on irc I don't like the new interface to dm-bufio.  Having different methods of getting a buffer is ugly, you have to fall back to the other method if it fails, and remember how you eventually got it for unlocking.  Plus we need to change any code to take advantage of it.  Currently dm-bufio is used by dm-thin, dm-cache, dm-era, dm-verity, dm-ebs-target, dm-integrity, and dm-snap-persistent.  We're not going to audit all these and see which can benefit.

On the other hand I don't like the size of my patch (~1200 line diff).  I'll post it when it's complete and we can continue the discussion then.

- Joe


On Wed, Oct 12, 2022 at 7:31 AM Joe Thornber <thornber@xxxxxxxxxx> wrote:
Thanks Mikulas,

I'll test this morning.

- Joe


On Tue, Oct 11, 2022 at 8:10 PM Mikulas Patocka <mpatocka@xxxxxxxxxx> wrote:
Hi

Here I'm sending updated patch 4 that fixes hang on discard. We must not
do the optimization in dm_btree_lookup_next.

Mikulas


From: Mikulas Patocka <mpatocka@xxxxxxxxxx>

This patch reduces lock contention in btree walks. We modify the
functions init_ro_wpin, exit_ro_spine and ro_step so that they use
dm_bufio_lock_read/dm_bufio_get_unlocked/dm_bufio_unlock_read. If
dm_bm_fast_get_block fails, we fallback to normal locking.

When doing tests on pmem and fully provisioned thin volume, it improves
thoughput from 286MiB/s to 442MiB/s.

fio --ioengine=psync --iodepth=1 --rw=randrw --bs=4k --direct=1 --numjobs=12 --time_based --runtime=10 --group_reporting --name=/dev/vg/thin
before:
READ: bw=286MiB/s
WRITE: bw=286MiB/s
after:
READ: bw=442MiB/s
WRITE: bw=442MiB/s

Signed-off-by: Mikulas Patocka <mpatocka@xxxxxxxxxx>

---
 drivers/md/persistent-data/dm-block-manager.c       |   32 +++++++++++++++
 drivers/md/persistent-data/dm-block-manager.h       |    6 ++
 drivers/md/persistent-data/dm-btree-internal.h      |    4 +
 drivers/md/persistent-data/dm-btree-spine.c         |   41 ++++++++++++++++++--
 drivers/md/persistent-data/dm-btree.c               |    6 +-
 drivers/md/persistent-data/dm-transaction-manager.c |   17 ++++++++
 drivers/md/persistent-data/dm-transaction-manager.h |    6 ++
 7 files changed, 104 insertions(+), 8 deletions(-)

Index: linux-2.6/drivers/md/persistent-data/dm-block-manager.c
===================================================================
--- linux-2.6.orig/drivers/md/persistent-data/dm-block-manager.c
+++ linux-2.6/drivers/md/persistent-data/dm-block-manager.c
@@ -601,6 +601,38 @@ void dm_bm_unlock(struct dm_block *b)
 }
 EXPORT_SYMBOL_GPL(dm_bm_unlock);

+void dm_bm_fast_lock(struct dm_block_manager *bm)
+{
+       dm_bufio_lock_read(bm->bufio);
+}
+
+void dm_bm_fast_unlock(struct dm_block_manager *bm)
+{
+       dm_bufio_unlock_read(bm->bufio);
+}
+
+int dm_bm_fast_get_block(struct dm_block_manager *bm,
+                        dm_block_t b, struct dm_block_validator *v,
+                        struct dm_block **result)
+{
+       int r;
+       struct buffer_aux *aux;
+       void *p;
+
+       p = dm_bufio_get_unlocked(bm->bufio, b, (struct dm_buffer **) result);
+       if (IS_ERR(p))
+               return PTR_ERR(p);
+       if (unlikely(!p))
+               return -EWOULDBLOCK;
+
+       aux = dm_bufio_get_aux_data(to_buffer(*result));
+       r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
+       if (unlikely(r))
+               return r;
+
+       return 0;
+}
+
 int dm_bm_flush(struct dm_block_manager *bm)
 {
        if (dm_bm_is_read_only(bm))
Index: linux-2.6/drivers/md/persistent-data/dm-block-manager.h
===================================================================
--- linux-2.6.orig/drivers/md/persistent-data/dm-block-manager.h
+++ linux-2.6/drivers/md/persistent-data/dm-block-manager.h
@@ -96,6 +96,12 @@ int dm_bm_write_lock_zero(struct dm_bloc

 void dm_bm_unlock(struct dm_block *b);

+void dm_bm_fast_lock(struct dm_block_manager *bm);
+void dm_bm_fast_unlock(struct dm_block_manager *bm);
+int dm_bm_fast_get_block(struct dm_block_manager *bm,
+                        dm_block_t b, struct dm_block_validator *v,
+                        struct dm_block **result);
+
 /*
  * It's a common idiom to have a superblock that should be committed last.
  *
Index: linux-2.6/drivers/md/persistent-data/dm-btree-internal.h
===================================================================
--- linux-2.6.orig/drivers/md/persistent-data/dm-btree-internal.h
+++ linux-2.6/drivers/md/persistent-data/dm-btree-internal.h
@@ -64,9 +64,11 @@ struct ro_spine {
        struct dm_btree_info *info;

        struct dm_block *node;
+       bool fast_locked;
+       bool fast_lock_failed;
 };

-void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info);
+void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info, bool disable_fast_access);
 void exit_ro_spine(struct ro_spine *s);
 int ro_step(struct ro_spine *s, dm_block_t new_child);
 struct btree_node *ro_node(struct ro_spine *s);
Index: linux-2.6/drivers/md/persistent-data/dm-btree-spine.c
===================================================================
--- linux-2.6.orig/drivers/md/persistent-data/dm-btree-spine.c
+++ linux-2.6/drivers/md/persistent-data/dm-btree-spine.c
@@ -118,27 +118,60 @@ void unlock_block(struct dm_btree_info *
        dm_tm_unlock(info->tm, b);
 }

+static void bn_fast_lock(struct dm_btree_info *info)
+{
+       dm_tm_fast_lock(info->tm);
+}
+
+static void bn_fast_unlock(struct dm_btree_info *info)
+{
+       dm_tm_fast_unlock(info->tm);
+}
+
+static int bn_fast_get_block(struct dm_btree_info *info, dm_block_t b,
+                     struct dm_block **result)
+{
+       return dm_tm_fast_get_block(info->tm, b, &btree_node_validator, result);
+}
+
 /*----------------------------------------------------------------*/

-void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info)
+void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info, bool disable_fast_access)
 {
        s->info = info;
        s->node = NULL;
+       s->fast_locked = false;
+       s->fast_lock_failed = disable_fast_access;
 }

 void exit_ro_spine(struct ro_spine *s)
 {
-       if (s->node)
+       if (s->fast_locked)
+               bn_fast_unlock(s->info);
+       else if (s->node)
                unlock_block(s->info, s->node);
 }

 int ro_step(struct ro_spine *s, dm_block_t new_child)
 {
        if (s->node) {
-               unlock_block(s->info, s->node);
+               if (unlikely(!s->fast_locked))
+                       unlock_block(s->info, s->node);
                s->node = NULL;
        }
-
+       if (likely(!s->fast_lock_failed)) {
+               int r;
+               if (!s->fast_locked) {
+                       bn_fast_lock(s->info);
+                       s->fast_locked = true;
+               }
+               r = bn_fast_get_block(s->info, new_child, &s->node);
+               if (likely(!r))
+                       return 0;
+               s->fast_lock_failed = true;
+               s->fast_locked = false;
+               bn_fast_unlock(s->info);
+       }
        return bn_read_lock(s->info, new_child, &s->node);
 }

Index: linux-2.6/drivers/md/persistent-data/dm-transaction-manager.c
===================================================================
--- linux-2.6.orig/drivers/md/persistent-data/dm-transaction-manager.c
+++ linux-2.6/drivers/md/persistent-data/dm-transaction-manager.c
@@ -348,6 +348,23 @@ void dm_tm_unlock(struct dm_transaction_
 }
 EXPORT_SYMBOL_GPL(dm_tm_unlock);

+void dm_tm_fast_lock(struct dm_transaction_manager *tm)
+{
+       dm_bm_fast_lock(tm->is_clone ? tm->real->bm : tm->bm);
+}
+
+void dm_tm_fast_unlock(struct dm_transaction_manager *tm)
+{
+       dm_bm_fast_unlock(tm->is_clone ? tm->real->bm : tm->bm);
+}
+
+int dm_tm_fast_get_block(struct dm_transaction_manager *tm, dm_block_t b,
+                        struct dm_block_validator *v,
+                        struct dm_block **blk)
+{
+       return dm_bm_fast_get_block(tm->is_clone ? tm->real->bm : tm->bm, b, v, blk);
+}
+
 void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b)
 {
        /*
Index: linux-2.6/drivers/md/persistent-data/dm-transaction-manager.h
===================================================================
--- linux-2.6.orig/drivers/md/persistent-data/dm-transaction-manager.h
+++ linux-2.6/drivers/md/persistent-data/dm-transaction-manager.h
@@ -96,6 +96,12 @@ int dm_tm_read_lock(struct dm_transactio

 void dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b);

+void dm_tm_fast_lock(struct dm_transaction_manager *tm);
+void dm_tm_fast_unlock(struct dm_transaction_manager *tm);
+int dm_tm_fast_get_block(struct dm_transaction_manager *tm, dm_block_t b,
+                        struct dm_block_validator *v,
+                        struct dm_block **blk);
+
 /*
  * Functions for altering the reference count of a block directly.
  */
Index: linux-2.6/drivers/md/persistent-data/dm-btree.c
===================================================================
--- linux-2.6.orig/drivers/md/persistent-data/dm-btree.c
+++ linux-2.6/drivers/md/persistent-data/dm-btree.c
@@ -377,7 +377,7 @@ int dm_btree_lookup(struct dm_btree_info
        __le64 internal_value_le;
        struct ro_spine spine;

-       init_ro_spine(&spine, info);
+       init_ro_spine(&spine, info, false);
        for (level = 0; level < info->levels; level++) {
                size_t size;
                void *value_p;
@@ -472,7 +472,7 @@ int dm_btree_lookup_next(struct dm_btree
        __le64 internal_value_le;
        struct ro_spine spine;

-       init_ro_spine(&spine, info);
+       init_ro_spine(&spine, info, true);
        for (level = 0; level < info->levels - 1u; level++) {
                r = btree_lookup_raw(&spine, root, keys[level],
                                     lower_bound, rkey,
@@ -1369,7 +1369,7 @@ static int dm_btree_find_key(struct dm_b
        int r = 0, count = 0, level;
        struct ro_spine spine;

-       init_ro_spine(&spine, info);
+       init_ro_spine(&spine, info, false);
        for (level = 0; level < info->levels; level++) {
                r = find_key(&spine, root, find_highest, result_keys + level,
                             level == info->levels - 1 ? NULL : &root);

--
dm-devel mailing list
dm-devel@xxxxxxxxxx
https://listman.redhat.com/mailman/listinfo/dm-devel

[Index of Archives]     [DM Crypt]     [Fedora Desktop]     [ATA RAID]     [Fedora Marketing]     [Fedora Packaging]     [Fedora SELinux]     [Yosemite Discussion]     [KDE Users]     [Fedora Docs]

  Powered by Linux