This adds the mpool control plane infrastructure to manage mpools. There is a unit object instance for each device object created by the mpool driver. A minor number is reserved for each unit object. The mpool control device (/dev/mpoolctl) gets a minor number of 0. An mpool device (/dev/mpool/<mpool_name>) gets a minor number > 0. Utility routines exist to lookup an mpool unit given its minor number or name. All units are born with a reference count of two - one for the caller and a birth reference that can be released only by either destroying the unit or unloading the module. Co-developed-by: Greg Becker <gbecker@xxxxxxxxxx> Signed-off-by: Greg Becker <gbecker@xxxxxxxxxx> Co-developed-by: Pierre Labat <plabat@xxxxxxxxxx> Signed-off-by: Pierre Labat <plabat@xxxxxxxxxx> Co-developed-by: John Groves <jgroves@xxxxxxxxxx> Signed-off-by: John Groves <jgroves@xxxxxxxxxx> Signed-off-by: Nabeel M Mohamed <nmeeramohide@xxxxxxxxxx> --- drivers/mpool/init.c | 20 ++ drivers/mpool/init.h | 3 + drivers/mpool/mpctl.c | 465 ++++++++++++++++++++++++++++++++++++++++++ drivers/mpool/mpctl.h | 49 +++++ drivers/mpool/sysfs.c | 48 +++++ drivers/mpool/sysfs.h | 48 +++++ 6 files changed, 633 insertions(+) create mode 100644 drivers/mpool/mpctl.c create mode 100644 drivers/mpool/mpctl.h create mode 100644 drivers/mpool/sysfs.c create mode 100644 drivers/mpool/sysfs.h diff --git a/drivers/mpool/init.c b/drivers/mpool/init.c index eb1217f63746..126c6c7142b5 100644 --- a/drivers/mpool/init.c +++ b/drivers/mpool/init.c @@ -12,10 +12,23 @@ #include "smap.h" #include "pmd_obj.h" #include "sb.h" +#include "mpctl.h" /* * Module params... */ +unsigned int maxunits __read_mostly = 1024; +module_param(maxunits, uint, 0444); +MODULE_PARM_DESC(maxunits, " max mpools"); + +unsigned int rwsz_max_mb __read_mostly = 32; +module_param(rwsz_max_mb, uint, 0444); +MODULE_PARM_DESC(rwsz_max_mb, " max mblock/mlog r/w size (mB)"); + +unsigned int rwconc_max __read_mostly = 8; +module_param(rwconc_max, uint, 0444); +MODULE_PARM_DESC(rwconc_max, " max mblock/mlog large r/w concurrency"); + unsigned int rsvd_bios_max __read_mostly = 16; module_param(rsvd_bios_max, uint, 0444); MODULE_PARM_DESC(rsvd_bios_max, "max reserved bios in mpool bioset"); @@ -26,6 +39,7 @@ MODULE_PARM_DESC(chunk_size_kb, "Chunk size (in KiB) for device I/O"); static void mpool_exit_impl(void) { + mpctl_exit(); pmd_exit(); smap_exit(); sb_exit(); @@ -68,6 +82,12 @@ static __init int mpool_init(void) goto errout; } + rc = mpctl_init(); + if (rc) { + errmsg = "mpctl init failed"; + goto errout; + } + errout: if (rc) { mp_pr_err("%s", rc, errmsg); diff --git a/drivers/mpool/init.h b/drivers/mpool/init.h index e02a9672e727..3d8f809a5e45 100644 --- a/drivers/mpool/init.h +++ b/drivers/mpool/init.h @@ -6,6 +6,9 @@ #ifndef MPOOL_INIT_H #define MPOOL_INIT_H +extern unsigned int maxunits; +extern unsigned int rwsz_max_mb; +extern unsigned int rwconc_max; extern unsigned int rsvd_bios_max; extern int chunk_size_kb; diff --git a/drivers/mpool/mpctl.c b/drivers/mpool/mpctl.c new file mode 100644 index 000000000000..21eb7ac4610b --- /dev/null +++ b/drivers/mpool/mpctl.c @@ -0,0 +1,465 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015-2020 Micron Technology, Inc. All rights reserved. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/cdev.h> +#include <linux/log2.h> +#include <linux/idr.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/blkdev.h> +#include <linux/vmalloc.h> +#include <linux/memcontrol.h> +#include <linux/pagemap.h> +#include <linux/kobject.h> +#include <linux/mm_inline.h> +#include <linux/version.h> +#include <linux/kref.h> + +#include <linux/backing-dev.h> +#include <linux/spinlock.h> +#include <linux/list.h> +#include <linux/rbtree.h> +#include <linux/migrate.h> +#include <linux/delay.h> +#include <linux/ctype.h> +#include <linux/uio.h> + +#include "mpool_printk.h" +#include "assert.h" + +#include "mpool_ioctl.h" +#include "mlog.h" +#include "mp.h" +#include "mpctl.h" +#include "sysfs.h" +#include "init.h" + + +#define NODEV MKDEV(0, 0) /* Non-existent device */ + +/* mpc pseudo-driver instance data (i.e., all globals live here). */ +struct mpc_softstate { + struct mutex ss_lock; /* Protects ss_unitmap */ + struct idr ss_unitmap; /* minor-to-unit map */ + + ____cacheline_aligned + struct semaphore ss_op_sema; /* Serialize mgmt. ops */ + dev_t ss_devno; /* Control device devno */ + struct cdev ss_cdev; + struct class *ss_class; + bool ss_inited; +}; + +/* Unit-type specific information. */ +struct mpc_uinfo { + const char *ui_typename; + const char *ui_subdirfmt; +}; + +/* One mpc_mpool object per mpool. */ +struct mpc_mpool { + struct kref mp_ref; + struct rw_semaphore mp_lock; + struct mpool_descriptor *mp_desc; + struct mp_mdc *mp_mdc; + uint mp_dpathc; + char **mp_dpathv; + char mp_name[]; +}; + +/* The following structures are initialized at the end of this file. */ +static const struct file_operations mpc_fops_default; + +static struct mpc_softstate mpc_softstate; + +static unsigned int mpc_ctl_uid __read_mostly; +static unsigned int mpc_ctl_gid __read_mostly = 6; +static unsigned int mpc_ctl_mode __read_mostly = 0664; + +static const struct mpc_uinfo mpc_uinfo_ctl = { + .ui_typename = "mpoolctl", + .ui_subdirfmt = "%s", +}; + +static const struct mpc_uinfo mpc_uinfo_mpool = { + .ui_typename = "mpool", + .ui_subdirfmt = "mpool/%s", +}; + +static inline bool mpc_unit_isctldev(const struct mpc_unit *unit) +{ + return (unit->un_uinfo == &mpc_uinfo_ctl); +} + +static inline bool mpc_unit_ismpooldev(const struct mpc_unit *unit) +{ + return (unit->un_uinfo == &mpc_uinfo_mpool); +} + +static inline uid_t mpc_current_uid(void) +{ + return from_kuid(current_user_ns(), current_uid()); +} + +static inline gid_t mpc_current_gid(void) +{ + return from_kgid(current_user_ns(), current_gid()); +} + +/** + * mpc_mpool_release() - release kref handler for mpc_mpool object + * @refp: kref pointer + */ +static void mpc_mpool_release(struct kref *refp) +{ + struct mpc_mpool *mpool = container_of(refp, struct mpc_mpool, mp_ref); + int rc; + + if (mpool->mp_desc) { + rc = mpool_deactivate(mpool->mp_desc); + if (rc) + mp_pr_err("mpool %s deactivate failed", rc, mpool->mp_name); + } + + kfree(mpool->mp_dpathv); + kfree(mpool); + + module_put(THIS_MODULE); +} + +static void mpc_mpool_put(struct mpc_mpool *mpool) +{ + kref_put(&mpool->mp_ref, mpc_mpool_release); +} + +/** + * mpc_unit_create() - Create and install a unit object + * @path: device path under "/dev/" to create + * @mpool: mpool ptr + * @unitp: unit ptr + * + * Create a unit object and install a NULL ptr for it in the units map, + * thereby reserving a minor number. The unit cannot be found by any + * of the lookup routines until the NULL ptr is replaced by the actual + * ptr to the unit. + * + * A unit maps an mpool device (.e.g., /dev/mpool/foo) to an mpool object + * created by mpool_create(). + * + * All units are born with two references, one for the caller and one that + * can only be released by destroying the unit or unloading the module. + * + * Return: Returns 0 if successful and sets *unitp. + * Returns -errno on error. + */ +static int mpc_unit_create(const char *name, struct mpc_mpool *mpool, struct mpc_unit **unitp) +{ + struct mpc_softstate *ss = &mpc_softstate; + struct mpc_unit *unit; + size_t unitsz; + int minor; + + if (!ss || !name || !unitp) + return -EINVAL; + + unitsz = sizeof(*unit) + strlen(name) + 1; + + unit = kzalloc(unitsz, GFP_KERNEL); + if (!unit) + return -ENOMEM; + + strcpy(unit->un_name, name); + + sema_init(&unit->un_open_lock, 1); + unit->un_open_excl = false; + unit->un_open_cnt = 0; + unit->un_devno = NODEV; + kref_init(&unit->un_ref); + unit->un_mpool = mpool; + + mutex_lock(&ss->ss_lock); + minor = idr_alloc(&ss->ss_unitmap, NULL, 0, -1, GFP_KERNEL); + mutex_unlock(&ss->ss_lock); + + if (minor < 0) { + kfree(unit); + return minor; + } + + kref_get(&unit->un_ref); /* acquire additional ref for the caller */ + + unit->un_devno = MKDEV(MAJOR(ss->ss_cdev.dev), minor); + *unitp = unit; + + return 0; +} + +/** + * mpc_unit_release() - Destroy a unit object created by mpc_unit_create(). + * @unit: + */ +static void mpc_unit_release(struct kref *refp) +{ + struct mpc_unit *unit = container_of(refp, struct mpc_unit, un_ref); + struct mpc_softstate *ss = &mpc_softstate; + + mutex_lock(&ss->ss_lock); + idr_remove(&ss->ss_unitmap, MINOR(unit->un_devno)); + mutex_unlock(&ss->ss_lock); + + if (unit->un_mpool) + mpc_mpool_put(unit->un_mpool); + + if (unit->un_device) + device_destroy(ss->ss_class, unit->un_devno); + + kfree(unit); +} + +static void mpc_unit_put(struct mpc_unit *unit) +{ + if (unit) + kref_put(&unit->un_ref, mpc_unit_release); +} + +/** + * mpc_unit_setup() - Create a device unit object and special file + * @uinfo: + * @name: + * @cfg: + * @mpool: + * @unitp: unitp can be NULL. *unitp is updated only if unitp is not NULL + * and no error is returned. + * + * If successful, this function adopts mpool. On failure, mpool + * remains the responsibility of the caller. + * + * All units are born with two references, one for the caller and one + * that can only be released by destroying the unit or unloading the + * module. If the caller passes in nil for unitp then this function + * will drop the caller's "caller reference" on his behalf. + * + * Return: Returns 0 on success, -errno otherwise... + */ +static int mpc_unit_setup(const struct mpc_uinfo *uinfo, const char *name, + const struct mpool_config *cfg, struct mpc_mpool *mpool, + struct mpc_unit **unitp) +{ + struct mpc_softstate *ss = &mpc_softstate; + struct mpc_unit *unit; + struct device *device; + int rc; + + if (!ss || !uinfo || !name || !name[0] || !cfg || !unitp) + return -EINVAL; + + if (cfg->mc_uid == -1 || cfg->mc_gid == -1 || cfg->mc_mode == -1) + return -EINVAL; + + if (!capable(CAP_MKNOD)) + return -EPERM; + + if (cfg->mc_uid != mpc_current_uid() && !capable(CAP_CHOWN)) + return -EPERM; + + if (cfg->mc_gid != mpc_current_gid() && !capable(CAP_CHOWN)) + return -EPERM; + + if (mpool && strcmp(mpool->mp_name, name)) + return -EINVAL; + + *unitp = NULL; + unit = NULL; + + /* + * Try to create a new unit object. If successful, then all error + * handling beyond this point must route through the errout label + * to ensure the unit is fully destroyed. + */ + rc = mpc_unit_create(name, mpool, &unit); + if (rc) + return rc; + + unit->un_uid = cfg->mc_uid; + unit->un_gid = cfg->mc_gid; + unit->un_mode = cfg->mc_mode; + + unit->un_mdc_captgt = cfg->mc_captgt; + memcpy(&unit->un_utype, &cfg->mc_utype, sizeof(unit->un_utype)); + strlcpy(unit->un_label, cfg->mc_label, sizeof(unit->un_label)); + unit->un_ds_oidv[0] = cfg->mc_oid1; + unit->un_ds_oidv[1] = cfg->mc_oid2; + unit->un_ra_pages_max = cfg->mc_ra_pages_max; + + device = device_create(ss->ss_class, NULL, unit->un_devno, unit, uinfo->ui_subdirfmt, name); + if (IS_ERR(device)) { + rc = PTR_ERR(device); + mp_pr_err("device_create %s failed", rc, name); + goto errout; + } + + unit->un_device = device; + unit->un_uinfo = uinfo; + + dev_info(unit->un_device, "minor %u, uid %u, gid %u, mode 0%02o", + MINOR(unit->un_devno), cfg->mc_uid, cfg->mc_gid, cfg->mc_mode); + + *unitp = unit; + +errout: + if (rc) { + /* + * Acquire an additional reference on mpool so that it is not + * errantly destroyed along with the unit, then release both + * the unit's birth and caller's references which should + * destroy the unit. + */ + kref_get(&mpool->mp_ref); + mpc_unit_put(unit); + mpc_unit_put(unit); + } + + return rc; +} + +/** + * mpc_uevent() - Hook to intercept and modify uevents before they're posted to udev + * @dev: mpc driver device + * @env: + * + * See man 7 udev for more info. + */ +static int mpc_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct mpc_unit *unit = dev_get_drvdata(dev); + + if (unit) { + add_uevent_var(env, "DEVMODE=%#o", unit->un_mode); + add_uevent_var(env, "DEVUID=%u", unit->un_uid); + add_uevent_var(env, "DEVGID=%u", unit->un_gid); + } + + return 0; +} + +static int mpc_exit_unit(int minor, void *item, void *arg) +{ + mpc_unit_put(item); + + return ITERCB_NEXT; +} + +/** + * mpctl_exit() - Tear down and unload the mpool control module. + */ +void mpctl_exit(void) +{ + struct mpc_softstate *ss = &mpc_softstate; + + if (ss->ss_inited) { + idr_for_each(&ss->ss_unitmap, mpc_exit_unit, NULL); + idr_destroy(&ss->ss_unitmap); + + if (ss->ss_devno != NODEV) { + if (ss->ss_class) { + if (ss->ss_cdev.ops) + cdev_del(&ss->ss_cdev); + class_destroy(ss->ss_class); + } + unregister_chrdev_region(ss->ss_devno, maxunits); + } + + ss->ss_inited = false; + } +} + +/** + * mpctl_init() - Load and initialize the mpool control module. + */ +int mpctl_init(void) +{ + struct mpc_softstate *ss = &mpc_softstate; + struct mpool_config *cfg = NULL; + struct mpc_unit *ctlunit; + const char *errmsg = NULL; + int rc; + + if (ss->ss_inited) + return -EBUSY; + + ctlunit = NULL; + + maxunits = clamp_t(uint, maxunits, 8, 8192); + + cdev_init(&ss->ss_cdev, &mpc_fops_default); + ss->ss_cdev.owner = THIS_MODULE; + + mutex_init(&ss->ss_lock); + idr_init(&ss->ss_unitmap); + ss->ss_class = NULL; + ss->ss_devno = NODEV; + sema_init(&ss->ss_op_sema, 1); + ss->ss_inited = true; + + rc = alloc_chrdev_region(&ss->ss_devno, 0, maxunits, "mpool"); + if (rc) { + errmsg = "cannot allocate control device major"; + ss->ss_devno = NODEV; + goto errout; + } + + ss->ss_class = class_create(THIS_MODULE, module_name(THIS_MODULE)); + if (IS_ERR(ss->ss_class)) { + errmsg = "class_create() failed"; + rc = PTR_ERR(ss->ss_class); + ss->ss_class = NULL; + goto errout; + } + + ss->ss_class->dev_uevent = mpc_uevent; + + rc = cdev_add(&ss->ss_cdev, ss->ss_devno, maxunits); + if (rc) { + errmsg = "cdev_add() failed"; + ss->ss_cdev.ops = NULL; + goto errout; + } + + cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); + if (!cfg) { + errmsg = "cfg alloc failed"; + rc = -ENOMEM; + goto errout; + } + + cfg->mc_uid = mpc_ctl_uid; + cfg->mc_gid = mpc_ctl_gid; + cfg->mc_mode = mpc_ctl_mode; + + rc = mpc_unit_setup(&mpc_uinfo_ctl, MPC_DEV_CTLNAME, cfg, NULL, &ctlunit); + if (rc) { + errmsg = "cannot create control device"; + goto errout; + } + + mutex_lock(&ss->ss_lock); + idr_replace(&ss->ss_unitmap, ctlunit, MINOR(ctlunit->un_devno)); + mutex_unlock(&ss->ss_lock); + + mpc_unit_put(ctlunit); + +errout: + if (rc) { + mp_pr_err("%s", rc, errmsg); + mpctl_exit(); + } + + kfree(cfg); + + return rc; +} diff --git a/drivers/mpool/mpctl.h b/drivers/mpool/mpctl.h new file mode 100644 index 000000000000..412a6a491c15 --- /dev/null +++ b/drivers/mpool/mpctl.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2015-2020 Micron Technology, Inc. All rights reserved. + */ + +#ifndef MPOOL_MPCTL_H +#define MPOOL_MPCTL_H + +#include <linux/rbtree.h> +#include <linux/kref.h> +#include <linux/device.h> +#include <linux/semaphore.h> + +#define ITERCB_NEXT (0) +#define ITERCB_DONE (1) + +/* There is one unit object for each device object created by the driver. */ +struct mpc_unit { + struct kref un_ref; + int un_open_cnt; /* Unit open count */ + struct semaphore un_open_lock; /* Protects un_open_* */ + bool un_open_excl; /* Unit exclusively open */ + uid_t un_uid; + gid_t un_gid; + mode_t un_mode; + dev_t un_devno; + const struct mpc_uinfo *un_uinfo; + struct mpc_mpool *un_mpool; + struct address_space *un_mapping; + struct device *un_device; + struct mpc_attr *un_attr; + uint un_rawio; /* log2(max_mblock_size) */ + u64 un_ds_oidv[2]; + u32 un_ra_pages_max; + u64 un_mdc_captgt; + uuid_le un_utype; + u8 un_label[MPOOL_LABELSZ_MAX]; + char un_name[]; +}; + +static inline struct mpc_unit *dev_to_unit(struct device *dev) +{ + return dev_get_drvdata(dev); +} + +int mpctl_init(void) __cold; +void mpctl_exit(void) __cold; + +#endif /* MPOOL_MPCTL_H */ diff --git a/drivers/mpool/sysfs.c b/drivers/mpool/sysfs.c new file mode 100644 index 000000000000..638106a77669 --- /dev/null +++ b/drivers/mpool/sysfs.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015-2020 Micron Technology, Inc. All rights reserved. + */ + +#include <linux/slab.h> + +#include "sysfs.h" + +struct mpc_attr *mpc_attr_create(struct device *dev, const char *name, int acnt) +{ + struct mpc_attr *attr; + int i; + + attr = kzalloc(sizeof(*attr) + acnt * sizeof(*attr->a_dattr) + + (acnt + 1) * sizeof(*attr->a_attrs), GFP_KERNEL); + if (!attr) + return NULL; + + attr->a_kobj = &dev->kobj; + + attr->a_dattr = (void *)(attr + 1); + + attr->a_attrs = (void *)(attr->a_dattr + acnt); + for (i = 0; i < acnt; i++) + attr->a_attrs[i] = &attr->a_dattr[i].attr; + attr->a_attrs[i] = NULL; + + attr->a_group.attrs = attr->a_attrs; + attr->a_group.name = name; + + return attr; +} + +void mpc_attr_destroy(struct mpc_attr *attr) +{ + kfree(attr); +} + +int mpc_attr_group_create(struct mpc_attr *attr) +{ + return sysfs_create_group(attr->a_kobj, &attr->a_group); +} + +void mpc_attr_group_destroy(struct mpc_attr *attr) +{ + sysfs_remove_group(attr->a_kobj, &attr->a_group); +} diff --git a/drivers/mpool/sysfs.h b/drivers/mpool/sysfs.h new file mode 100644 index 000000000000..b161eceec75f --- /dev/null +++ b/drivers/mpool/sysfs.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2015-2020 Micron Technology, Inc. All rights reserved. + */ + +#ifndef MPOOL_SYSFS_H +#define MPOOL_SYSFS_H + +#include <linux/device.h> +#include <linux/sysfs.h> + + +#define MPC_ATTR(_da, _name, _mode) \ + (_da)->attr.name = __stringify(_name); \ + (_da)->attr.mode = (_mode); \ + (_da)->show = mpc_##_name##_show \ + +#define MPC_ATTR_RO(_dattr, _name) \ + do { \ + __typeof(_dattr) da = (_dattr); \ + MPC_ATTR(da, _name, 0444); \ + da->store = NULL; \ + } while (0) + +#define MPC_ATTR_RW(_dattr, _name) \ + do { \ + __typeof(_dattr) da = (_dattr); \ + MPC_ATTR(da, _name, 0644); \ + da->store = mpc_##_name##_store; \ + } while (0) + + +struct mpc_attr { + struct attribute_group a_group; + struct kobject *a_kobj; + struct device_attribute *a_dattr; + struct attribute **a_attrs; +}; + +struct mpc_attr *mpc_attr_create(struct device *d, const char *name, int acnt); + +void mpc_attr_destroy(struct mpc_attr *attr); + +int mpc_attr_group_create(struct mpc_attr *attr); + +void mpc_attr_group_destroy(struct mpc_attr *attr); + +#endif /* MPOOL_SYSFS_H */ -- 2.17.2