* Mark Brown <broonie@xxxxxxxxxx> [230325 17:53]: > The current state of the art for sparse register maps is the rbtree cache. > This works well for most applications but isn't always ideal for sparser > register maps since the rbtree can get deep, requiring a lot of walking. > Fortunately the kernel has a data structure intended to address this very > problem, the maple tree. Provide an initial implementation of a register > cache based on the maple tree to start taking advantage of it. > > This initial implementation is very simplistic and doesn't take full > advantage of the capabilities of the maple tree, we simply store each > register as a single value within the tree. Since maple tree values are > pointers and iteration doesn't naturally give us the key we allocate a > small structure for each register, effectively adding another layer to the > tree. > > We also store data in host native format rather than device native format > as we do for rbtree, this will be a benefit for devices where we don't > marshal data within regmap and until we are able to store more than one > register in a node there's no reason to have preformatted data even where > we do marshal. > > This works well enough to get started and should already work well for some > devices but there is a great deal of room for improvement, as well as > storing blocks rather than just individual registers we don't need the > locking that the maple tree does and are likely to benefit from caching the > last accessed entry. Very small register maps may continue to to better > with rbtree longer term. Without use of the locking, the maple tree will generate lockdep complaints. With the mas_* family of functions, you are responsible for locking. On read you can take the rcu_read_lock(), on writes you need to take mas_lock(&mas). If you have a lock you already use, you can init the tree with the external lock flag and set the external lock like we do in the VMA code. mtree_* functions handle this for you. > > Signed-off-by: Mark Brown <broonie@xxxxxxxxxx> > --- > drivers/base/regmap/Makefile | 2 +- > drivers/base/regmap/internal.h | 1 + > drivers/base/regmap/regcache-maple.c | 154 +++++++++++++++++++++++++++++++++++ > drivers/base/regmap/regcache.c | 1 + > drivers/base/regmap/regmap-kunit.c | 3 + > include/linux/regmap.h | 1 + > 6 files changed, 161 insertions(+), 1 deletion(-) > > diff --git a/drivers/base/regmap/Makefile b/drivers/base/regmap/Makefile > index 4cb73468a197..f6c6cb017200 100644 > --- a/drivers/base/regmap/Makefile > +++ b/drivers/base/regmap/Makefile > @@ -3,7 +3,7 @@ > CFLAGS_regmap.o := -I$(src) > > obj-$(CONFIG_REGMAP) += regmap.o regcache.o > -obj-$(CONFIG_REGMAP) += regcache-rbtree.o regcache-flat.o > +obj-$(CONFIG_REGMAP) += regcache-rbtree.o regcache-flat.o regcache-maple.o > obj-$(CONFIG_DEBUG_FS) += regmap-debugfs.o > obj-$(CONFIG_REGMAP_KUNIT) += regmap-kunit.o > obj-$(CONFIG_REGMAP_AC97) += regmap-ac97.o > diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h > index 7b9ef43bcea6..6361df6f553a 100644 > --- a/drivers/base/regmap/internal.h > +++ b/drivers/base/regmap/internal.h > @@ -282,6 +282,7 @@ enum regmap_endian regmap_get_val_endian(struct device *dev, > const struct regmap_config *config); > > extern struct regcache_ops regcache_rbtree_ops; > +extern struct regcache_ops regcache_maple_ops; > extern struct regcache_ops regcache_flat_ops; > > static inline const char *regmap_name(const struct regmap *map) > diff --git a/drivers/base/regmap/regcache-maple.c b/drivers/base/regmap/regcache-maple.c > new file mode 100644 > index 000000000000..a18966aed27e > --- /dev/null > +++ b/drivers/base/regmap/regcache-maple.c > @@ -0,0 +1,154 @@ > +// SPDX-License-Identifier: GPL-2.0 > +// > +// Register cache access API - maple tree based cache > +// > +// Copyright 2023 Arm, Ltd > +// > +// Author: Mark Brown <broonie@xxxxxxxxxx> > + > +#include <linux/debugfs.h> > +#include <linux/device.h> > +#include <linux/maple_tree.h> > +#include <linux/slab.h> > + > +#include "internal.h" > + > +struct cache_entry { > + unsigned int reg; > + unsigned int val; > +}; > + > +static int regcache_maple_read(struct regmap *map, > + unsigned int reg, unsigned int *value) > +{ > + struct maple_tree *mt = map->cache; > + struct cache_entry *entry; > + > + entry = mtree_load(mt, reg); > + if (!entry) > + return -ENOENT; > + > + *value = entry->val; > + > + return 0; > +} > + > +static int regcache_maple_write(struct regmap *map, unsigned int reg, > + unsigned int val) > +{ > + struct maple_tree *mt = map->cache; > + MA_STATE(mas, mt, reg, reg); > + struct cache_entry *entry; > + int ret; > + The locking is missing here, so you will get lockdep complaints. > + entry = mas_find(&mas, reg); > + if (entry) { > + entry->val = val; > + return 0; > + } > + > + entry = kmalloc(sizeof(*entry), GFP_KERNEL); > + if (!entry) > + return -ENOMEM; > + > + entry->reg = reg; > + entry->val = val; > + > + ret = mtree_store(mt, reg, entry, GFP_KERNEL); > + if (ret != 0) > + kfree(entry); > + > + return ret; > +} > + > +static int regcache_maple_drop(struct regmap *map, unsigned int min, > + unsigned int max) > +{ > + struct maple_tree *mt = map->cache; > + MA_STATE(mas, mt, min, max); > + struct cache_entry *entry; > + mas_lock(&mas); > + for (entry = mas_find(&mas, min); entry; entry = mas_next(&mas, max)) { This can be written with the maple tree iterator: mas_for_each(mas, entry, max) { > + kfree(entry); > + mas_erase(&mas); This isn't the most efficient way of removing a list of entries, but it certainly works for the first pass, as you say in the comments. > + } mas_unlock(&mas); > + > + return 0; > +} Wait, if this is for destroying the entire tree: mas_lock(&mas); mas_for_each(mas, entry, max) kfree(entry); __mt_destroy(mt); mas_unlock(&mas); Calling erase for each item will cause the tree to slowly drain; rebalancing and re-allocating new nodes over and over. mtree_destroy() and the non-locking __mt_destroy() will more efficiently walk to free every node. If it's not for the entire tree, you can free a range then write a NULL over the entire range. But be conscience of the maple state range after the iterator ends, you will need to mas_set_range(&mas, min, max) prior to the mas_store_gfp(). > + > +static int regcache_maple_sync(struct regmap *map, unsigned int min, > + unsigned int max) > +{ > + struct maple_tree *mt = map->cache; > + struct cache_entry *entry; > + MA_STATE(mas, mt, min, max); > + int ret; > + > + map->cache_bypass = true; > + rcu_read_lock(); > + for (entry = mas_find(&mas, min); entry; entry = mas_next(&mas, max)) { mas_for_each(mas, entry, max) { > + ret = regcache_sync_val(map, entry->reg, entry->val); > + if (ret != 0) > + goto out; > + } > + > +out: rcu_read_unlock(); > + map->cache_bypass = false; > + > + return ret; > +} > + > +static int regcache_maple_exit(struct regmap *map) > +{ > + struct maple_tree *mt = map->cache; > + > + /* if we've already been called then just return */ > + if (!mt) > + return 0; There's not a race here with multiple tasks in here at once, right? > + > + regcache_maple_drop(map, 0, UINT_MAX); > + > + kfree(mt); > + map->cache = NULL; > + > + return 0; > +} > + > +static int regcache_maple_init(struct regmap *map) > +{ > + struct maple_tree *mt; > + int i; > + int ret; > + > + mt = kmalloc(sizeof(*mt), GFP_KERNEL); > + if (!mt) > + return -ENOMEM; > + map->cache = mt; > + > + mt_init(mt); > + The maple tree does support bulk loading (in the b-tree sense). You can put the maple tree in this state and pre-allocate nodes in bulk. kernel/fork.c currently does this through the vma iterator interface. Note that after a bulk-load, you have to ensure you call mas_destroy() to free any unused nodes and to potentially cause a rebalance of the last node (this is automatic). But, again, like you said in your comment; this is good for the first pass. > + for (i = 0; i < map->num_reg_defaults; i++) { > + ret = regcache_maple_write(map, > + map->reg_defaults[i].reg, > + map->reg_defaults[i].def); > + if (ret) > + goto err; > + } > + > + return 0; > + > +err: > + regcache_maple_exit(map); > + return ret; > +} > + > +struct regcache_ops regcache_maple_ops = { > + .type = REGCACHE_MAPLE, > + .name = "maple", > + .init = regcache_maple_init, > + .exit = regcache_maple_exit, > + .read = regcache_maple_read, > + .write = regcache_maple_write, > + .drop = regcache_maple_drop, > + .sync = regcache_maple_sync, > +}; > diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c > index e5d6b535c002..0b47721089e6 100644 > --- a/drivers/base/regmap/regcache.c > +++ b/drivers/base/regmap/regcache.c > @@ -17,6 +17,7 @@ > > static const struct regcache_ops *cache_types[] = { > ®cache_rbtree_ops, > + ®cache_maple_ops, > ®cache_flat_ops, > }; > > diff --git a/drivers/base/regmap/regmap-kunit.c b/drivers/base/regmap/regmap-kunit.c > index 6f2bfa4650fe..3486bf9e28b8 100644 > --- a/drivers/base/regmap/regmap-kunit.c > +++ b/drivers/base/regmap/regmap-kunit.c > @@ -29,6 +29,7 @@ static const struct regcache_types regcache_types_list[] = { > { REGCACHE_NONE, "none" }, > { REGCACHE_FLAT, "flat" }, > { REGCACHE_RBTREE, "rbtree" }, > + { REGCACHE_MAPLE, "maple" }, > }; > > KUNIT_ARRAY_PARAM(regcache_types, regcache_types_list, case_to_desc); > @@ -36,12 +37,14 @@ KUNIT_ARRAY_PARAM(regcache_types, regcache_types_list, case_to_desc); > static const struct regcache_types real_cache_types_list[] = { > { REGCACHE_FLAT, "flat" }, > { REGCACHE_RBTREE, "rbtree" }, > + { REGCACHE_MAPLE, "maple" }, > }; > > KUNIT_ARRAY_PARAM(real_cache_types, real_cache_types_list, case_to_desc); > > static const struct regcache_types sparse_cache_types_list[] = { > { REGCACHE_RBTREE, "rbtree" }, > + { REGCACHE_MAPLE, "maple" }, > }; > > KUNIT_ARRAY_PARAM(sparse_cache_types, sparse_cache_types_list, case_to_desc); > diff --git a/include/linux/regmap.h b/include/linux/regmap.h > index 24fc4a9ed1f9..11b360da199d 100644 > --- a/include/linux/regmap.h > +++ b/include/linux/regmap.h > @@ -51,6 +51,7 @@ enum regcache_type { > REGCACHE_NONE, > REGCACHE_RBTREE, > REGCACHE_FLAT, > + REGCACHE_MAPLE, > }; > > /** > > -- > 2.34.1 >