On Wed, Feb 26, 2025 at 7:50 PM Leon Romanovsky <leon@xxxxxxxxxx> wrote: > > From: Chiara Meiohas <cmeiohas@xxxxxxxxxx> > > Implement a new User CAPabilities (UCAP) API to provide fine-grained > control over specific firmware features. > > This approach offers more granular capabilities than the existing Linux > capabilities, which may be too generic for certain FW features. > > This mechanism represents each capability as a character device with > root read-write access. Root processes can grant users special > privileges by allowing access to these character devices (e.g., using > chown). > > UCAP character devices are located in /dev/infiniband and the class path > is /sys/class/infiniband_ucaps. > > Signed-off-by: Chiara Meiohas <cmeiohas@xxxxxxxxxx> > Reviewed-by: Yishai Hadas <yishaih@xxxxxxxxxx> > Signed-off-by: Leon Romanovsky <leon@xxxxxxxxxx> > --- > drivers/infiniband/core/Makefile | 3 +- > drivers/infiniband/core/ucaps.c | 255 ++++++++++++++++++++++++++ > drivers/infiniband/core/uverbs_main.c | 2 + > include/rdma/ib_ucaps.h | 25 +++ > 4 files changed, 284 insertions(+), 1 deletion(-) > create mode 100644 drivers/infiniband/core/ucaps.c > create mode 100644 include/rdma/ib_ucaps.h > > diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile > index 8ab4eea5a0a5..d49ded7e95f0 100644 > --- a/drivers/infiniband/core/Makefile > +++ b/drivers/infiniband/core/Makefile > @@ -39,6 +39,7 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ > uverbs_std_types_async_fd.o \ > uverbs_std_types_srq.o \ > uverbs_std_types_wq.o \ > - uverbs_std_types_qp.o > + uverbs_std_types_qp.o \ > + ucaps.o > ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o umem_dmabuf.o > ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o > diff --git a/drivers/infiniband/core/ucaps.c b/drivers/infiniband/core/ucaps.c > new file mode 100644 > index 000000000000..82b1184891ed > --- /dev/null > +++ b/drivers/infiniband/core/ucaps.c > @@ -0,0 +1,255 @@ > +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB > +/* > + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved > + */ > + > +#include <linux/cdev.h> > +#include <linux/mutex.h> > +#include <linux/file.h> > +#include <linux/fs.h> > +#include <rdma/ib_ucaps.h> > + > +#define RDMA_UCAP_FIRST RDMA_UCAP_MLX5_CTRL_LOCAL > + > +static DEFINE_MUTEX(ucaps_mutex); > +static struct ib_ucap *ucaps_list[RDMA_UCAP_MAX]; > +static bool ucaps_class_is_registered; > +static dev_t ucaps_base_dev; > + > +struct ib_ucap { > + struct cdev cdev; > + struct device dev; > + int refcount; > +}; > + > +static const char *ucap_names[RDMA_UCAP_MAX] = { > + [RDMA_UCAP_MLX5_CTRL_LOCAL] = "mlx5_perm_ctrl_local", > + [RDMA_UCAP_MLX5_CTRL_OTHER_VHCA] = "mlx5_perm_ctrl_other_vhca" > +}; > + > +static char *ucaps_devnode(const struct device *dev, umode_t *mode) > +{ > + if (mode) > + *mode = 0600; > + > + return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); > +} > + > +static const struct class ucaps_class = { > + .name = "infiniband_ucaps", > + .devnode = ucaps_devnode, > +}; > + > +static const struct file_operations ucaps_cdev_fops = { > + .owner = THIS_MODULE, > + .open = simple_open, > +}; > + > +/** > + * ib_cleanup_ucaps - cleanup all API resources and class. > + * > + * This is called once, when removing the ib_uverbs module. > + */ > +void ib_cleanup_ucaps(void) > +{ > + mutex_lock(&ucaps_mutex); > + if (!ucaps_class_is_registered) { > + mutex_unlock(&ucaps_mutex); > + return; > + } > + > + for (int i = RDMA_UCAP_FIRST; i < RDMA_UCAP_MAX; i++) > + WARN_ON(ucaps_list[i]); > + > + class_unregister(&ucaps_class); > + ucaps_class_is_registered = false; > + unregister_chrdev_region(ucaps_base_dev, RDMA_UCAP_MAX); > + mutex_unlock(&ucaps_mutex); > +} > + > +static int get_ucap_from_devt(dev_t devt, u64 *idx_mask) > +{ > + for (int type = RDMA_UCAP_FIRST; type < RDMA_UCAP_MAX; type++) { > + if (ucaps_list[type] && ucaps_list[type]->dev.devt == devt) { > + *idx_mask |= 1 << type; > + return 0; > + } > + } > + > + return -EINVAL; > +} > + > +static int get_devt_from_fd(unsigned int fd, dev_t *ret_dev) > +{ > + struct file *file; > + > + file = fget(fd); > + if (!file) > + return -EBADF; > + > + *ret_dev = file_inode(file)->i_rdev; > + fput(file); > + return 0; > +} > + > +/** > + * ib_ucaps_init - Initialization required before ucap creation. > + * > + * Return: 0 on success, or a negative errno value on failure > + */ > +static int ib_ucaps_init(void) > +{ > + int ret = 0; > + > + if (ucaps_class_is_registered) > + return ret; > + > + ret = class_register(&ucaps_class); > + if (ret) > + return ret; > + > + ret = alloc_chrdev_region(&ucaps_base_dev, 0, RDMA_UCAP_MAX, > + ucaps_class.name); > + if (ret < 0) { > + class_unregister(&ucaps_class); > + return ret; > + } > + > + ucaps_class_is_registered = true; > + > + return 0; > +} > + > +static void ucap_dev_release(struct device *device) > +{ > + struct ib_ucap *ucap = container_of(device, struct ib_ucap, dev); > + > + kfree(ucap); > +} > + > +/** > + * ib_create_ucap - Add a ucap character device > + * @type: UCAP type > + * > + * Creates a ucap character device in the /dev/infiniband directory. By default, > + * the device has root-only read-write access. > + * > + * A driver may call this multiple times with the same UCAP type. A reference > + * count tracks creations and deletions. > + * > + * Return: 0 on success, or a negative errno value on failure > + */ > +int ib_create_ucap(enum rdma_user_cap type) > +{ > + struct ib_ucap *ucap; > + int ret; > + > + if (type >= RDMA_UCAP_MAX) > + return -EINVAL; > + > + mutex_lock(&ucaps_mutex); > + ret = ib_ucaps_init(); > + if (ret) > + goto unlock; > + > + ucap = ucaps_list[type]; > + if (ucap) { > + ucap->refcount++; > + mutex_unlock(&ucaps_mutex); > + return 0; > + } > + > + ucap = kzalloc(sizeof(*ucap), GFP_KERNEL); > + if (!ucap) { > + ret = -ENOMEM; > + goto unlock; > + } > + > + device_initialize(&ucap->dev); > + ucap->dev.class = &ucaps_class; > + ucap->dev.devt = MKDEV(MAJOR(ucaps_base_dev), type); > + ucap->dev.release = ucap_dev_release; > + dev_set_name(&ucap->dev, ucap_names[type]); > + > + cdev_init(&ucap->cdev, &ucaps_cdev_fops); > + ucap->cdev.owner = THIS_MODULE; > + > + ret = cdev_device_add(&ucap->cdev, &ucap->dev); > + if (ret) > + goto err_device; Memory leak in the error path, need to free ucap here? > + > + ucap->refcount = 1; > + ucaps_list[type] = ucap; > + mutex_unlock(&ucaps_mutex); > + > + return 0; > + > +err_device: > + put_device(&ucap->dev); > +unlock: > + mutex_unlock(&ucaps_mutex); > + return ret; > +} > +EXPORT_SYMBOL(ib_create_ucap); > + > +/** > + * ib_remove_ucap - Remove a ucap character device > + * @type: User cap type > + * > + * Removes the ucap character device according to type. The device is completely > + * removed from the filesystem when its reference count reaches 0. > + */ > +void ib_remove_ucap(enum rdma_user_cap type) > +{ > + struct ib_ucap *ucap; > + > + mutex_lock(&ucaps_mutex); > + ucap = ucaps_list[type]; > + if (WARN_ON(!ucap)) > + goto end; > + > + ucap->refcount--; > + if (ucap->refcount) > + goto end; > + > + ucaps_list[type] = NULL; > + cdev_device_del(&ucap->cdev, &ucap->dev); > + put_device(&ucap->dev); need to free ucap here > + > +end: > + mutex_unlock(&ucaps_mutex); > +} > +EXPORT_SYMBOL(ib_remove_ucap); > + > +/** > + * ib_get_ucaps - Get bitmask of ucap types from file descriptors > + * @fds: Array of file descriptors > + * @fd_count: Number of file descriptors in the array > + * @idx_mask: Bitmask to be updated based on the ucaps in the fd list > + * > + * Given an array of file descriptors, this function returns a bitmask of > + * the ucaps where a bit is set if an FD for that ucap type was in the array. > + * > + * Return: 0 on success, or a negative errno value on failure > + */ > +int ib_get_ucaps(int *fds, int fd_count, uint64_t *idx_mask) > +{ > + int ret = 0; > + dev_t dev; > + > + *idx_mask = 0; > + mutex_lock(&ucaps_mutex); > + for (int i = 0; i < fd_count; i++) { > + ret = get_devt_from_fd(fds[i], &dev); > + if (ret) > + goto end; > + > + ret = get_ucap_from_devt(dev, idx_mask); > + if (ret) > + goto end; > + } > + > +end: > + mutex_unlock(&ucaps_mutex); > + return ret; > +} > diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c > index 85cfc790a7bb..973fe2c7ef53 100644 > --- a/drivers/infiniband/core/uverbs_main.c > +++ b/drivers/infiniband/core/uverbs_main.c > @@ -52,6 +52,7 @@ > #include <rdma/ib.h> > #include <rdma/uverbs_std_types.h> > #include <rdma/rdma_netlink.h> > +#include <rdma/ib_ucaps.h> > > #include "uverbs.h" > #include "core_priv.h" > @@ -1345,6 +1346,7 @@ static void __exit ib_uverbs_cleanup(void) > IB_UVERBS_NUM_FIXED_MINOR); > unregister_chrdev_region(dynamic_uverbs_dev, > IB_UVERBS_NUM_DYNAMIC_MINOR); > + ib_cleanup_ucaps(); > mmu_notifier_synchronize(); > } > > diff --git a/include/rdma/ib_ucaps.h b/include/rdma/ib_ucaps.h > new file mode 100644 > index 000000000000..d044d02f087f > --- /dev/null > +++ b/include/rdma/ib_ucaps.h > @@ -0,0 +1,25 @@ > +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ > +/* > + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved > + */ > + > +#ifndef _IB_UCAPS_H_ > +#define _IB_UCAPS_H_ > + > +#define UCAP_ENABLED(ucaps, type) (!!((ucaps) & (1U << type))) > + > +enum rdma_user_cap { > + RDMA_UCAP_MLX5_CTRL_LOCAL, > + RDMA_UCAP_MLX5_CTRL_OTHER_VHCA, > + RDMA_UCAP_MAX > +}; > + > +void ib_cleanup_ucaps(void); > + > +int ib_create_ucap(enum rdma_user_cap type); > + > +void ib_remove_ucap(enum rdma_user_cap type); > + > +int ib_get_ucaps(int *fds, int fd_count, uint64_t *idx_mask); > + > +#endif /* _IB_UCAPS_H_ */ > -- > 2.48.1 > > -- Regards, Kalesh AP
Attachment:
smime.p7s
Description: S/MIME Cryptographic Signature