On Sat, Dec 22, 2018 at 10:18:06PM +0100, Christian Brauner wrote: > Since binderfs can be mounted by userns root in non-initial user namespaces > some precautions are in order. First, a way to set a maximum on the number > of binder devices that can be allocated per binderfs instance and second, a > way to reserve a reasonable chunk of binderfs devices for the initial ipc > namespace. > A first approach as seen in [1] used sysctls similiar to devpts but was > shown to be flawed (cf. [2] and [3]) since some aspects were unneeded. This > is an alternative approach which avoids sysctls completely and instead > switches to a single mount option. > > Starting with this commit binderfs instances can be mounted with a limit on > the number of binder devices that can be allocated. The max=<count> mount > option serves as a per-instance limit. If max=<count> is set then only > <count> number of binder devices can be allocated in this binderfs > instance. > Additionally, the binderfs instance in the initial ipc namespace will > always have a reserve of at least 1024 binder devices unless explicitly > capped via max=<count>. > > This allows to safely bind-mount binderfs instances into unprivileged user > namespaces since userns root in a non-initial user namespace cannot change > the mount option as long as it does not own the mount namespace the > binderfs mount was created in and hence cannot drain the host of minor > device numbers > > [1]: https://lore.kernel.org/lkml/20181221133909.18794-1-christian@xxxxxxxxxx/ > [2]; https://lore.kernel.org/lkml/20181221163316.GA8517@xxxxxxxxx/ > [3]: https://lore.kernel.org/lkml/CAHRSSEx+gDVW4fKKK8oZNAir9G5icJLyodO8hykv3O0O1jt2FQ@xxxxxxxxxxxxxx/ > [4]: https://lore.kernel.org/lkml/20181221192044.5yvfnuri7gdop4rs@xxxxxxxxxx/ > > Cc: Todd Kjos <tkjos@xxxxxxxxxx> > Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> > Signed-off-by: Christian Brauner <christian.brauner@xxxxxxxxxx> > --- > drivers/android/binderfs.c | 109 +++++++++++++++++++++++++++++++++++-- > 1 file changed, 104 insertions(+), 5 deletions(-) > > diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c > index 7496b10532aa..93aee00994d4 100644 > --- a/drivers/android/binderfs.c > +++ b/drivers/android/binderfs.c > @@ -20,6 +20,7 @@ > #include <linux/parser.h> > #include <linux/radix-tree.h> > #include <linux/sched.h> > +#include <linux/seq_file.h> > #include <linux/slab.h> > #include <linux/spinlock_types.h> > #include <linux/stddef.h> > @@ -39,6 +40,11 @@ > #define INODE_OFFSET 3 > #define INTSTRLEN 21 > #define BINDERFS_MAX_MINOR (1U << MINORBITS) > +/* > + * Ensure that the initial ipc namespace always has a good chunk of devices > + * available. > + */ > +#define BINDERFS_MAX_MINOR_CAPPED (BINDERFS_MAX_MINOR - 1024) > > static struct vfsmount *binderfs_mnt; > > @@ -46,6 +52,24 @@ static dev_t binderfs_dev; > static DEFINE_MUTEX(binderfs_minors_mutex); > static DEFINE_IDA(binderfs_minors); > > +/** > + * binderfs_mount_opts - mount options for binderfs > + * @max: maximum number of allocatable binderfs binder devices > + */ > +struct binderfs_mount_opts { > + int max; > +}; > + > +enum { > + Opt_max, > + Opt_err > +}; > + > +static const match_table_t tokens = { > + { Opt_max, "max=%d" }, > + { Opt_err, NULL } > +}; > + > /** > * binderfs_info - information about a binderfs mount > * @ipc_ns: The ipc namespace the binderfs mount belongs to. > @@ -55,13 +79,16 @@ static DEFINE_IDA(binderfs_minors); > * created. > * @root_gid: gid that needs to be used when a new binder device is > * created. > + * @mount_opts: The mount options in use. > + * @device_count: The current number of allocated binder devices. > */ > struct binderfs_info { > struct ipc_namespace *ipc_ns; > struct dentry *control_dentry; > kuid_t root_uid; > kgid_t root_gid; > - > + struct binderfs_mount_opts mount_opts; > + atomic_t device_count; > }; > > static inline struct binderfs_info *BINDERFS_I(const struct inode *inode) > @@ -107,13 +134,22 @@ static int binderfs_binder_device_create(struct inode *ref_inode, > struct inode *inode = NULL; > struct super_block *sb = ref_inode->i_sb; > struct binderfs_info *info = sb->s_fs_info; > + bool use_reserve = (info->ipc_ns == &init_ipc_ns); > > /* Reserve new minor number for the new device. */ > mutex_lock(&binderfs_minors_mutex); > - minor = ida_alloc_max(&binderfs_minors, BINDERFS_MAX_MINOR, GFP_KERNEL); > + if (atomic_inc_return(&info->device_count) < info->mount_opts.max) > + minor = ida_alloc_max(&binderfs_minors, > + use_reserve ? BINDERFS_MAX_MINOR : > + BINDERFS_MAX_MINOR_CAPPED, > + GFP_KERNEL); > + else > + minor = -ENOSPC; > mutex_unlock(&binderfs_minors_mutex); > - if (minor < 0) > + if (minor < 0) { > + atomic_dec(&info->device_count); > return minor; > + } > > ret = -ENOMEM; > device = kzalloc(sizeof(*device), GFP_KERNEL); > @@ -187,6 +223,7 @@ static int binderfs_binder_device_create(struct inode *ref_inode, > kfree(name); > kfree(device); > mutex_lock(&binderfs_minors_mutex); > + atomic_dec(&info->device_count); > ida_free(&binderfs_minors, minor); > mutex_unlock(&binderfs_minors_mutex); > iput(inode); > @@ -232,6 +269,7 @@ static long binder_ctl_ioctl(struct file *file, unsigned int cmd, > static void binderfs_evict_inode(struct inode *inode) > { > struct binder_device *device = inode->i_private; > + struct binderfs_info *info = BINDERFS_I(inode); > > clear_inode(inode); > > @@ -239,6 +277,7 @@ static void binderfs_evict_inode(struct inode *inode) > return; > > mutex_lock(&binderfs_minors_mutex); > + atomic_dec(&info->device_count); > ida_free(&binderfs_minors, device->miscdev.minor); > mutex_unlock(&binderfs_minors_mutex); > > @@ -246,9 +285,64 @@ static void binderfs_evict_inode(struct inode *inode) > kfree(device); > } > > +/** > + * binderfs_parse_mount_opts - parse binderfs mount options > + * @data: options to set (can be NULL in which case defaults are used) > + */ > +static int binderfs_parse_mount_opts(char *data, > + struct binderfs_mount_opts *opts) > +{ > + char *p; > + opts->max = BINDERFS_MAX_MINOR; > + > + while ((p = strsep(&data, ",")) != NULL) { > + substring_t args[MAX_OPT_ARGS]; > + int token; > + int max_devices; > + > + if (!*p) > + continue; > + > + token = match_token(p, tokens, args); > + switch (token) { > + case Opt_max: > + max_devices = match_int(&args[0], &max_devices); > + if (max_devices < 0 || (max_devices > BINDERFS_MAX_MINOR)) > + return -EINVAL; One should always account for their own stupid mistakes and here's one I made: the ways match_int() is used here is nonsense since it overwrites the parsed max_devices value. This should be: if (match_int(&args[0], &max_devices) || (max_devices < 0 || (max_devices > BINDERFS_MAX_MINOR))) return -EINVAL; Good that we have to have another version anyway. Sorry about that. Christian > + > + opts->max = max_devices; > + break; > + default: > + pr_err("Invalid mount options\n"); > + return -EINVAL; > + } > + } > + > + return 0; > +} > + > +static int binderfs_remount(struct super_block *sb, int *flags, char *data) > +{ > + struct binderfs_info *info = sb->s_fs_info; > + return binderfs_parse_mount_opts(data, &info->mount_opts); > +} > + > +static int binderfs_show_mount_opts(struct seq_file *seq, struct dentry *root) > +{ > + struct binderfs_info *info; > + > + info = root->d_sb->s_fs_info; > + if (info->mount_opts.max < BINDERFS_MAX_MINOR) > + seq_printf(seq, ",max=%d", info->mount_opts.max); > + > + return 0; > +} > + > static const struct super_operations binderfs_super_ops = { > - .statfs = simple_statfs, > - .evict_inode = binderfs_evict_inode, > + .evict_inode = binderfs_evict_inode, > + .remount_fs = binderfs_remount, > + .show_options = binderfs_show_mount_opts, > + .statfs = simple_statfs, > }; > > static int binderfs_rename(struct inode *old_dir, struct dentry *old_dentry, > @@ -409,6 +503,10 @@ static int binderfs_fill_super(struct super_block *sb, void *data, int silent) > if (!info) > goto err_without_dentry; > > + ret = binderfs_parse_mount_opts(data, &info->mount_opts); > + if (ret) > + goto err_without_dentry; > + > info->ipc_ns = ipc_ns; > info->root_gid = make_kgid(sb->s_user_ns, 0); > if (!gid_valid(info->root_gid)) > @@ -416,6 +514,7 @@ static int binderfs_fill_super(struct super_block *sb, void *data, int silent) > info->root_uid = make_kuid(sb->s_user_ns, 0); > if (!uid_valid(info->root_uid)) > info->root_uid = GLOBAL_ROOT_UID; > + atomic_set(&info->device_count, 0); > > sb->s_fs_info = info; > > -- > 2.19.1 > _______________________________________________ devel mailing list devel@xxxxxxxxxxxxxxxxxxxxxx http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel