RE: [RFC PATCH 2/8] bcache: initialize the nvm pages allocator

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Coly:
> -----Original Message-----
> From: Coly Li <colyli@xxxxxxx>
> Sent: Wednesday, December 16, 2020 6:17 PM
> To: Ren, Qiaowei <qiaowei.ren@xxxxxxxxx>
> Cc: linux-bcache@xxxxxxxxxxxxxxx; Ma, Jianpeng <jianpeng.ma@xxxxxxxxx>
> Subject: Re: [RFC PATCH 2/8] bcache: initialize the nvm pages allocator
> 
> On 12/3/20 6:53 PM, Qiaowei Ren wrote:
> > This patch define the prototype data structures in memory and
> > initializes the nvm pages allocator.
> >
> > The nv address space which is managed by this allocatior can consist
> > of many nvm namespaces, and some namespaces can compose into one
> nvm
> > set, like cache set. For this initial implementation, only one set can
> > be supported.
> >
> > The users of this nvm pages allocator need to call
> > regiseter_namespace() to register the nvdimm device (like /dev/pmemX)
> > into this allocator as the instance of struct nvm_namespace.
> >
> > Signed-off-by: Jianpeng Ma <jianpeng.ma@xxxxxxxxx>
> > Signed-off-by: Qiaowei Ren <qiaowei.ren@xxxxxxxxx>
> 
> Hi Jianpeng,
> 
> I add my comments in line.
> 
> 
> > ---
> >  drivers/md/bcache/Kconfig     |   6 +
> >  drivers/md/bcache/Makefile    |   2 +-
> >  drivers/md/bcache/nvm-pages.c | 303
> > ++++++++++++++++++++++++++++++++++
> >  drivers/md/bcache/nvm-pages.h |  91 ++++++++++
> >  drivers/md/bcache/super.c     |   3 +
> >  5 files changed, 404 insertions(+), 1 deletion(-)  create mode 100644
> > drivers/md/bcache/nvm-pages.c  create mode 100644
> > drivers/md/bcache/nvm-pages.h
> >
> > diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
> > index d1ca4d059c20..448a99ce13b2 100644
> > --- a/drivers/md/bcache/Kconfig
> > +++ b/drivers/md/bcache/Kconfig
> > @@ -35,3 +35,9 @@ config BCACHE_ASYNC_REGISTRATION
> >  	device path into this file will returns immediately and the real
> >  	registration work is handled in kernel work queue in asynchronous
> >  	way.
> > +
> > +config BCACHE_NVM_PAGES
> > +	bool "NVDIMM support for bcache"
> 
> I'd like to have this change in kernel ASAP for more people to test and
> evaludate, but it should be bool "NVDIMM support for bcache
> (EXPERIMENTAL)".
> 
>
Ok, i'll change.

> > +	depends on BCACHE
> > +	help
> > +	nvm pages allocator for bcache.
> > diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile
> > index 5b87e59676b8..948e5ed2ca66 100644
> > --- a/drivers/md/bcache/Makefile
> > +++ b/drivers/md/bcache/Makefile
> > @@ -4,4 +4,4 @@ obj-$(CONFIG_BCACHE)	+= bcache.o
> >
> >  bcache-y		:= alloc.o bset.o btree.o closure.o debug.o extents.o\
> >  	io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
> > -	util.o writeback.o features.o
> > +	util.o writeback.o features.o nvm-pages.o
> > diff --git a/drivers/md/bcache/nvm-pages.c
> > b/drivers/md/bcache/nvm-pages.c new file mode 100644 index
> > 000000000000..841616ea3267
> > --- /dev/null
> > +++ b/drivers/md/bcache/nvm-pages.c
> > @@ -0,0 +1,303 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +
> > +#include <linux/slab.h>
> > +#include <linux/list.h>
> > +#include <linux/mutex.h>
> > +#include <linux/dax.h>
> > +#include <linux/pfn_t.h>
> > +#include <linux/libnvdimm.h>
> > +#include <linux/mm_types.h>
> > +#include <linux/err.h>
> > +#include <linux/pagemap.h>
> > +#include <linux/bitmap.h>
> > +#include <linux/blkdev.h>
> > +#include "nvm-pages.h"
> > +
> > +struct nvm_set *only_set;
> > +
> 
> [snipped]
> 
> > +static void init_owner_info(struct nvm_namespace *ns) {
> > +	struct owner_list_head *owner_list_head;
> > +	struct nvm_pages_owner_head *owner_head;
> > +	struct nvm_pgalloc_recs *nvm_pgalloc_recs;
> > +	struct owner_list *owner_list;
> > +	struct nvm_alloced_recs *extents;
> > +	struct extent *extent;
> > +	u32 i, j, k;
> > +
> > +	owner_list_head = (struct owner_list_head *)
> > +			(ns->kaddr + NVM_PAGES_OWNER_LIST_HEAD_OFFSET);
> > +
> > +	mutex_lock(&only_set->lock);
> > +	only_set->owner_list_size = owner_list_head->size;
> > +	for (i = 0; i < owner_list_head->size; i++) {
> > +		owner_head = &owner_list_head->heads[i];
> > +		owner_list = alloc_owner_list(owner_head->uuid, owner_head-
> >label,
> > +				only_set->total_namespaces_nr);
> > +
> > +		for (j = 0; j < only_set->total_namespaces_nr; j++) {
> > +			if (only_set->nss[j] == NULL || owner_head->recs[j] ==
> NULL)
> > +				continue;
> > +
> > +			nvm_pgalloc_recs = (struct nvm_pgalloc_recs *)
> > +					((long)owner_head->recs[j] + ns->kaddr);
> > +
> > +			extents = kzalloc(sizeof(struct nvm_alloced_recs),
> GFP_KERNEL);
> > +			extents->ns = only_set->nss[j];
> > +			INIT_LIST_HEAD(&extents->extent_head);
> > +			owner_list->alloced_recs[j] = extents;
> > +
> > +			do {
> > +				struct nvm_pgalloc_rec *rec;
> > +
> > +				for (k = 0; k < nvm_pgalloc_recs->size; k++) {
> > +					rec = &nvm_pgalloc_recs->recs[k];
> > +					extent = kzalloc(sizeof(struct extent),
> GFP_KERNEL);
> > +					extent->kaddr =
> nvm_pgoff_to_vaddr(extents->ns, rec->pgoff);
> > +					extent->nr = rec->nr;
> > +					list_add_tail(&extent->list, &extents-
> >extent_head);
> > +
> > +					extents->ns->free -= rec->nr;
> > +				}
> > +				extents->size += nvm_pgalloc_recs->size;
> > +
> > +				if (nvm_pgalloc_recs->next)
> > +					nvm_pgalloc_recs = (struct nvm_pgalloc_recs
> *)
> > +						((long)nvm_pgalloc_recs->next + ns-
> >kaddr);
> > +				else
> > +					nvm_pgalloc_recs = NULL;
> > +			} while (nvm_pgalloc_recs);
> > +		}
> > +		only_set->owner_lists[i] = owner_list;
> > +		owner_list->nvm_set = only_set;
> > +	}
> > +	mutex_unlock(&only_set->lock);
> > +}
> 
> Currently the initialization of owner_list_head is done by user space tool. We
> just need to check whether the owner_list_head is correctly initialized by user
> space tool.
> 
When we attach nvm device, we need know the alloced spaces(alloced by use space when mkfs or previous in kernel mode). 
And based on those info, patch3 remove the alloced spaces and init buddy allocator using the left space.

Thanks!
Jianpeng.
> [snipped]
> > diff --git a/drivers/md/bcache/nvm-pages.h
> > b/drivers/md/bcache/nvm-pages.h new file mode 100644 index
> > 000000000000..1a24af6cb5a9
> > --- /dev/null
> > +++ b/drivers/md/bcache/nvm-pages.h
> > @@ -0,0 +1,91 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +
> > +#ifndef _BCACHE_NVM_PAGES_H
> > +#define _BCACHE_NVM_PAGES_H
> > +
> > +#include <linux/bcache-nvm.h>
> > +
> > +/*
> > + * Bcache NVDIMM in memory data structures  */
> > +
> > +/*
> > + * The following three structures in memory records which page(s)
> > +allocated
> > + * to which owner. After reboot from power failure, they will be
> > +initialized
> > + * based on nvm pages superblock in NVDIMM device.
> > + */
> > +struct extent {
> > +	void *kaddr;
> > +	u32 nr;
> > +	struct list_head list;
> > +};
> > +
> > +struct nvm_alloced_recs {
> > +	u32  size;
> > +	struct nvm_namespace *ns;
> > +	struct list_head extent_head;
> > +};
> > +
> > +struct owner_list {
> > +	u8  owner_uuid[16];
> > +	char label[NVM_PAGES_LABEL_SIZE];
> > +
> > +	struct nvm_set *nvm_set;
> > +	struct nvm_alloced_recs **alloced_recs; };
> > +
> > +struct nvm_namespace {
> > +	void *kaddr;
> > +
> > +	u8 uuid[18];
> > +	u64 free;
> > +	u32 page_size;
> > +	u64 pages_offset;
> > +	u64 pages_total;
> > +	pfn_t start_pfn;
> > +
> > +	struct dax_device *dax_dev;
> > +	struct block_device *bdev;
> > +	struct nvm_pages_sb *sb;
> > +	struct nvm_set *nvm_set;
> > +
> > +	struct mutex lock;
> > +};> +/*
> > + * A set of namespaces. Currently only one set can be supported.
> > + */
> > +struct nvm_set {
> > +	u8 set_uuid[16];
> > +	u32 total_namespaces_nr;
> > +
> > +	u32 owner_list_size;
> > +	struct owner_list **owner_lists;
> > +
> > +	struct nvm_namespace **nss;
> > +
> > +	struct mutex lock;
> > +};
> 
> 
> All the above data structures should be named with prefix bch_ .
> 
> [snipped]
> 
> Thanks.
> 
> Coly Li




[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Linux ARM Kernel]     [Linux Filesystem Development]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux