RE: [RFC PATCH 2/8] bcache: initialize the nvm pages allocator

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



> -----Original Message-----
> From: Zhiqiang Liu <liuzhiqiang26@xxxxxxxxxx>
> Sent: Monday, December 7, 2020 10:17 PM
> To: Ren, Qiaowei <qiaowei.ren@xxxxxxxxx>; Coly Li <colyli@xxxxxxx>
> Cc: linux-bcache@xxxxxxxxxxxxxxx; Ma, Jianpeng <jianpeng.ma@xxxxxxxxx>
> Subject: Re: [RFC PATCH 2/8] bcache: initialize the nvm pages allocator
> 
> 
> On 2020/12/3 18:53, Qiaowei Ren wrote:
> > This patch define the prototype data structures in memory and
> > initializes the nvm pages allocator.
> >
> > The nv address space which is managed by this allocatior can consist
> > of many nvm namespaces, and some namespaces can compose into one
> nvm
> > set, like cache set. For this initial implementation, only one set can
> > be supported.
> >
> > The users of this nvm pages allocator need to call
> > regiseter_namespace() to register the nvdimm device (like /dev/pmemX)
> > into this allocator as the instance of struct nvm_namespace.
> >
> > Signed-off-by: Jianpeng Ma <jianpeng.ma@xxxxxxxxx>
> > Signed-off-by: Qiaowei Ren <qiaowei.ren@xxxxxxxxx>
> > ---
> >  drivers/md/bcache/Kconfig     |   6 +
> >  drivers/md/bcache/Makefile    |   2 +-
> >  drivers/md/bcache/nvm-pages.c | 303
> > ++++++++++++++++++++++++++++++++++
> >  drivers/md/bcache/nvm-pages.h |  91 ++++++++++
> >  drivers/md/bcache/super.c     |   3 +
> >  5 files changed, 404 insertions(+), 1 deletion(-)  create mode 100644
> > drivers/md/bcache/nvm-pages.c  create mode 100644
> > drivers/md/bcache/nvm-pages.h
> >
> > diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
> > index d1ca4d059c20..448a99ce13b2 100644
> > --- a/drivers/md/bcache/Kconfig
> > +++ b/drivers/md/bcache/Kconfig
> > @@ -35,3 +35,9 @@ config BCACHE_ASYNC_REGISTRATION
> >  	device path into this file will returns immediately and the real
> >  	registration work is handled in kernel work queue in asynchronous
> >  	way.
> > +
> > +config BCACHE_NVM_PAGES
> > +	bool "NVDIMM support for bcache"
> > +	depends on BCACHE
> > +	help
> > +	nvm pages allocator for bcache.
> > diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile
> > index 5b87e59676b8..948e5ed2ca66 100644
> > --- a/drivers/md/bcache/Makefile
> > +++ b/drivers/md/bcache/Makefile
> > @@ -4,4 +4,4 @@ obj-$(CONFIG_BCACHE)	+= bcache.o
> >
> >  bcache-y		:= alloc.o bset.o btree.o closure.o debug.o extents.o\
> >  	io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
> > -	util.o writeback.o features.o
> > +	util.o writeback.o features.o nvm-pages.o
> > diff --git a/drivers/md/bcache/nvm-pages.c
> > b/drivers/md/bcache/nvm-pages.c new file mode 100644 index
> > 000000000000..841616ea3267
> > --- /dev/null
> > +++ b/drivers/md/bcache/nvm-pages.c
> > @@ -0,0 +1,303 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +
> > +#include <linux/slab.h>
> > +#include <linux/list.h>
> > +#include <linux/mutex.h>
> > +#include <linux/dax.h>
> > +#include <linux/pfn_t.h>
> > +#include <linux/libnvdimm.h>
> > +#include <linux/mm_types.h>
> > +#include <linux/err.h>
> > +#include <linux/pagemap.h>
> > +#include <linux/bitmap.h>
> > +#include <linux/blkdev.h>
> > +#include "nvm-pages.h"
> > +
> > +struct nvm_set *only_set;
> > +
> > +static struct owner_list *alloc_owner_list(const char *owner_uuid,
> > +		const char *label, int total_namespaces) {
> > +	struct owner_list *owner_list;
> > +
> > +	owner_list = kzalloc(sizeof(struct owner_list), GFP_KERNEL);
> 
> Should we check whether kzalloc(GFP_KERNEL) returns NULL?
> or use __GFP_NOFAIL flag when calling kzalloc()?
>
> There are several similar cases in other patches.
> 
Thanks your suggestions. I'll change.

Jianpeng.
> > +	if (owner_uuid)
> > +		memcpy(owner_list->owner_uuid, owner_uuid, 16);
> > +	if (label)
> > +		memcpy(owner_list->label, label, NVM_PAGES_LABEL_SIZE);
> > +	owner_list->alloced_recs = kcalloc(total_namespaces,
> > +			sizeof(struct nvm_alloced_recs *), GFP_KERNEL);
> > +
> > +	return owner_list;
> > +}
> > +
> > +static void release_extents(struct nvm_alloced_recs *extents) {
> > +	struct list_head *list = extents->extent_head.next;
> > +	struct extent *extent;
> > +
> > +	while (list != &extents->extent_head) {
> > +		extent = container_of(list, struct extent, list);
> > +		list_del(list);
> > +		kfree(extent);
> > +		list = extents->extent_head.next;
> > +	}
> > +	kfree(extents);
> > +}
> > +
> > +static void release_owner_info(struct nvm_set *nvm_set) {
> > +	struct owner_list *owner_list;
> > +	int i, j;
> > +
> > +	for (i = 0; i < nvm_set->owner_list_size; i++) {
> > +		owner_list = nvm_set->owner_lists[i];
> > +		for (j = 0; j < nvm_set->total_namespaces_nr; j++) {
> > +			if (owner_list->alloced_recs[j])
> > +				release_extents(owner_list->alloced_recs[j]);
> > +		}
> > +		kfree(owner_list->alloced_recs);
> > +		kfree(owner_list);
> > +	}
> > +	kfree(nvm_set->owner_lists);
> > +}
> > +
> > +static void release_nvm_namespaces(struct nvm_set *nvm_set) {
> > +	int i;
> > +
> > +	for (i = 0; i < nvm_set->total_namespaces_nr; i++)
> > +		kfree(nvm_set->nss[i]);
> > +
> > +	kfree(nvm_set->nss);
> > +}
> > +
> > +static void release_nvm_set(struct nvm_set *nvm_set) {
> > +	release_nvm_namespaces(nvm_set);
> > +	release_owner_info(nvm_set);
> > +	kfree(nvm_set);
> > +}
> > +
> > +static void *nvm_pgoff_to_vaddr(struct nvm_namespace *ns, pgoff_t
> > +pgoff) {
> > +	return ns->kaddr + ns->pages_offset + (pgoff << PAGE_SHIFT); }
> > +
> > +static void init_owner_info(struct nvm_namespace *ns) {
> > +	struct owner_list_head *owner_list_head;
> > +	struct nvm_pages_owner_head *owner_head;
> > +	struct nvm_pgalloc_recs *nvm_pgalloc_recs;
> > +	struct owner_list *owner_list;
> > +	struct nvm_alloced_recs *extents;
> > +	struct extent *extent;
> > +	u32 i, j, k;
> > +
> > +	owner_list_head = (struct owner_list_head *)
> > +			(ns->kaddr + NVM_PAGES_OWNER_LIST_HEAD_OFFSET);
> > +
> > +	mutex_lock(&only_set->lock);
> > +	only_set->owner_list_size = owner_list_head->size;
> > +	for (i = 0; i < owner_list_head->size; i++) {
> > +		owner_head = &owner_list_head->heads[i];
> > +		owner_list = alloc_owner_list(owner_head->uuid, owner_head-
> >label,
> > +				only_set->total_namespaces_nr);
> > +
> > +		for (j = 0; j < only_set->total_namespaces_nr; j++) {
> > +			if (only_set->nss[j] == NULL || owner_head->recs[j] ==
> NULL)
> > +				continue;
> > +
> > +			nvm_pgalloc_recs = (struct nvm_pgalloc_recs *)
> > +					((long)owner_head->recs[j] + ns->kaddr);
> > +
> > +			extents = kzalloc(sizeof(struct nvm_alloced_recs),
> GFP_KERNEL);
> > +			extents->ns = only_set->nss[j];
> > +			INIT_LIST_HEAD(&extents->extent_head);
> > +			owner_list->alloced_recs[j] = extents;
> > +
> > +			do {
> > +				struct nvm_pgalloc_rec *rec;
> > +
> > +				for (k = 0; k < nvm_pgalloc_recs->size; k++) {
> > +					rec = &nvm_pgalloc_recs->recs[k];
> > +					extent = kzalloc(sizeof(struct extent),
> GFP_KERNEL);
> > +					extent->kaddr =
> nvm_pgoff_to_vaddr(extents->ns, rec->pgoff);
> > +					extent->nr = rec->nr;
> > +					list_add_tail(&extent->list, &extents-
> >extent_head);
> > +
> > +					extents->ns->free -= rec->nr;
> > +				}
> > +				extents->size += nvm_pgalloc_recs->size;
> > +
> > +				if (nvm_pgalloc_recs->next)
> > +					nvm_pgalloc_recs = (struct nvm_pgalloc_recs
> *)
> > +						((long)nvm_pgalloc_recs->next + ns-
> >kaddr);
> > +				else
> > +					nvm_pgalloc_recs = NULL;
> > +			} while (nvm_pgalloc_recs);
> > +		}
> > +		only_set->owner_lists[i] = owner_list;
> > +		owner_list->nvm_set = only_set;
> > +	}
> > +	mutex_unlock(&only_set->lock);
> > +}
> > +
> > +static bool dev_dax_supported(struct block_device *bdev) {
> > +	char buf[BDEVNAME_SIZE];
> > +	struct page *page;
> > +	struct nvm_pages_sb *sb;
> > +	bool supported = false;
> > +
> > +	page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
> > +			NVM_PAGES_SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL);
> > +
> > +	if (IS_ERR(page))
> > +		goto err;
> > +
> > +	sb = page_address(page);
> > +	if (!bdev_dax_supported(bdev, sb->page_size))
> > +		pr_info("DAX can't supported by %s\n", bdevname(bdev, buf));
> > +	else
> > +		supported = true;
> > +
> > +	put_page(page);
> > +err:
> > +	return supported;
> > +}
> > +
> > +static bool attach_nvm_set(struct nvm_namespace *ns) {
> > +	bool rc = true;
> > +
> > +	mutex_lock(&only_set->lock);
> > +	if (only_set->nss) {
> > +		if (memcmp(ns->sb->set_uuid, only_set->set_uuid, 16)) {
> > +			pr_info("namespace id does't match nvm set\n");
> > +			rc = false;
> > +			goto unlock;
> > +		}
> > +
> > +		if (only_set->nss[ns->sb->this_namespace_nr]) {
> > +			pr_info("already has the same position(%d) nvm\n",
> > +					ns->sb->this_namespace_nr);
> > +			rc = false;
> > +			goto unlock;
> > +		}
> > +	} else {
> > +		memcpy(only_set->set_uuid, ns->sb->set_uuid, 16);
> > +		only_set->total_namespaces_nr = ns->sb->total_namespaces_nr;
> > +		only_set->nss = kcalloc(only_set->total_namespaces_nr,
> > +				sizeof(struct nvm_namespace *), GFP_KERNEL);
> > +		only_set->owner_lists = kcalloc(MAX_OWNER_LIST,
> > +				sizeof(struct nvm_pages_owner_head *),
> GFP_KERNEL);
> > +	}
> > +
> > +	only_set->nss[ns->sb->this_namespace_nr] = ns;
> > +
> > +unlock:
> > +	mutex_unlock(&only_set->lock);
> > +	return rc;
> > +}
> > +
> > +struct nvm_namespace *register_namespace(const char *dev_path) {
> > +	struct nvm_namespace *ns;
> > +	int err;
> > +	pgoff_t pgoff;
> > +	char buf[BDEVNAME_SIZE];
> > +	struct block_device *bdev;
> > +
> > +	bdev = blkdev_get_by_path(dev_path,
> FMODE_READ|FMODE_WRITE|FMODE_EXEC, NULL);
> > +	if (IS_ERR(bdev)) {
> > +		pr_info("get %s error\n", dev_path);
> > +		return ERR_PTR(PTR_ERR(bdev));
> > +	}
> > +
> > +	err = -EOPNOTSUPP;
> > +	if (!dev_dax_supported(bdev)) {
> > +		pr_info("%s don't support DAX\n", bdevname(bdev, buf));
> > +		goto bdput;
> > +	}
> > +
> > +	err = -EINVAL;
> > +	if (bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff)) {
> > +		pr_info("invalid offset of %s\n", bdevname(bdev, buf));
> > +		goto bdput;
> > +	}
> > +
> > +	err = -ENOMEM;
> > +	ns = kmalloc(sizeof(struct nvm_namespace), GFP_KERNEL);
> > +	if (ns == NULL)
> > +		goto bdput;
> > +
> > +	err = -EINVAL;
> > +	ns->dax_dev = fs_dax_get_by_bdev(bdev);
> > +	if (ns->dax_dev == NULL) {
> > +		pr_info(" can't by dax device by %s\n", bdevname(bdev, buf));
> small cleanup:
> remove one useless space.
> -> pr_info("can't by dax device by %s\n", bdevname(bdev, buf));
> > +		goto free_ns;
> > +	}
> > +
> > +	if (dax_direct_access(ns->dax_dev, pgoff, 1, &ns->kaddr, &ns-
> >start_pfn) < 0) {
> > +		pr_info("dax_direct_access error\n");
> > +		goto free_ns;
> > +	}
> > +
> > +	ns->sb = (struct nvm_pages_sb *)(ns->kaddr +
> NVM_PAGES_SB_OFFSET);
> > +	if (ns->sb->total_namespaces_nr != 1) {
> > +		pr_info("only one nvm device\n");
> > +		goto free_ns;
> > +	}
> > +
> > +	err = -EEXIST;
> > +	if (!attach_nvm_set(ns))
> > +		goto free_ns;
> > +
> > +	ns->page_size = ns->sb->page_size;
> > +	ns->pages_offset = ns->sb->pages_offset;
> > +	ns->pages_total = ns->sb->pages_total;
> > +	ns->start_pfn.val += ns->pages_offset >> PAGE_SHIFT;
> > +	ns->free = ns->pages_total;
> > +	ns->bdev = bdev;
> > +	ns->nvm_set = only_set;
> > +
> > +	mutex_init(&ns->lock);
> > +
> > +	if (ns->sb->this_namespace_nr == 0) {
> > +		pr_info("only first namespace contain owner info\n");
> > +		init_owner_info(ns);
> > +	}
> > +
> > +	return ns;
> > +
> > +free_ns:
> > +	kfree(ns);
> > +bdput:
> > +	bdput(bdev);
> > +
> > +	return ERR_PTR(err);
> > +}
> > +EXPORT_SYMBOL_GPL(register_namespace);
> > +
> > +int __init bch_nvm_init(void)
> > +{
> > +	only_set = kzalloc(sizeof(struct nvm_set), GFP_KERNEL);
> > +	if (!only_set)
> > +		return -ENOMEM;
> > +
> > +	only_set->total_namespaces_nr = 0;
> > +	only_set->owner_lists = NULL;
> > +	only_set->nss = NULL;
> > +
> > +	mutex_init(&only_set->lock);
> > +
> > +	pr_info("bcache nvm init\n");
> > +	return 0;
> > +}
> > +
> > +void bch_nvm_exit(void)
> > +{
> > +	release_nvm_set(only_set);
> > +	pr_info("bcache nvm exit\n");
> > +}
> > diff --git a/drivers/md/bcache/nvm-pages.h
> > b/drivers/md/bcache/nvm-pages.h new file mode 100644 index
> > 000000000000..1a24af6cb5a9
> > --- /dev/null
> > +++ b/drivers/md/bcache/nvm-pages.h
> > @@ -0,0 +1,91 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +
> > +#ifndef _BCACHE_NVM_PAGES_H
> > +#define _BCACHE_NVM_PAGES_H
> > +
> > +#include <linux/bcache-nvm.h>
> > +
> > +/*
> > + * Bcache NVDIMM in memory data structures  */
> > +
> > +/*
> > + * The following three structures in memory records which page(s)
> > +allocated
> > + * to which owner. After reboot from power failure, they will be
> > +initialized
> > + * based on nvm pages superblock in NVDIMM device.
> > + */
> > +struct extent {
> > +	void *kaddr;
> > +	u32 nr;
> > +	struct list_head list;
> > +};
> > +
> > +struct nvm_alloced_recs {
> > +	u32  size;
> > +	struct nvm_namespace *ns;
> > +	struct list_head extent_head;
> > +};
> > +
> > +struct owner_list {
> > +	u8  owner_uuid[16];
> > +	char label[NVM_PAGES_LABEL_SIZE];
> > +
> > +	struct nvm_set *nvm_set;
> > +	struct nvm_alloced_recs **alloced_recs; };
> > +
> > +struct nvm_namespace {
> > +	void *kaddr;
> > +
> > +	u8 uuid[18];
> > +	u64 free;
> > +	u32 page_size;
> > +	u64 pages_offset;
> > +	u64 pages_total;
> > +	pfn_t start_pfn;
> > +
> > +	struct dax_device *dax_dev;
> > +	struct block_device *bdev;
> > +	struct nvm_pages_sb *sb;
> > +	struct nvm_set *nvm_set;
> > +
> > +	struct mutex lock;
> > +};
> > +
> > +/*
> > + * A set of namespaces. Currently only one set can be supported.
> > + */
> > +struct nvm_set {
> > +	u8 set_uuid[16];
> > +	u32 total_namespaces_nr;
> > +
> > +	u32 owner_list_size;
> > +	struct owner_list **owner_lists;
> > +
> > +	struct nvm_namespace **nss;
> > +
> > +	struct mutex lock;
> > +};
> > +extern struct nvm_set *only_set;
> > +
> > +#ifdef CONFIG_BCACHE_NVM_PAGES
> > +
> > +extern struct nvm_namespace *register_namespace(const char
> > +*dev_path); extern int bch_nvm_init(void); extern void
> > +bch_nvm_exit(void);
> > +
> > +#else
> > +
> > +static inline struct nvm_namespace *register_namespace(const char
> > +*dev_path) {
> > +	return NULL;
> > +}
> > +static inline int bch_nvm_init(void)
> > +{
> > +	return 0;
> > +}
> > +static inline void bch_nvm_exit(void) { }
> > +
> > +#endif /* CONFIG_BCACHE_NVM_PAGES */
> > +
> > +#endif /* _BCACHE_NVM_PAGES_H */
> > diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
> > index 46a00134a36a..77b608efbe55 100644
> > --- a/drivers/md/bcache/super.c
> > +++ b/drivers/md/bcache/super.c
> > @@ -14,6 +14,7 @@
> >  #include "request.h"
> >  #include "writeback.h"
> >  #include "features.h"
> > +#include "nvm-pages.h"
> >
> >  #include <linux/blkdev.h>
> >  #include <linux/debugfs.h>
> > @@ -2782,6 +2783,7 @@ static void bcache_exit(void)  {
> >  	bch_debug_exit();
> >  	bch_request_exit();
> > +	bch_nvm_exit();
> >  	if (bcache_kobj)
> >  		kobject_put(bcache_kobj);
> >  	if (bcache_wq)
> > @@ -2861,6 +2863,7 @@ static int __init bcache_init(void)
> >
> >  	bch_debug_init();
> >  	closure_debug_init();
> > +	bch_nvm_init();
> >
> >  	bcache_is_reboot = false;
> >
> >





[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Linux ARM Kernel]     [Linux Filesystem Development]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux