Add container disk quota control source file. Signed-off-by: Jie Liu <jeff.liu@xxxxxxxxxx> --- fs/ns_quota.c | 261 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 261 insertions(+), 0 deletions(-) create mode 100644 fs/ns_quota.c diff --git a/fs/ns_quota.c b/fs/ns_quota.c new file mode 100644 index 0000000..9d24041 --- /dev/null +++ b/fs/ns_quota.c @@ -0,0 +1,261 @@ +#include <linux/fs.h> +#include <linux/slab.h> +#include <asm/current.h> +#include <linux/uaccess.h> +#include <linux/kernel.h> +#include <linux/security.h> +#include <linux/syscalls.h> +#include <linux/capability.h> +#include <linux/types.h> +#include <linux/writeback.h> +#include <linux/nsproxy.h> +#include <linux/mnt_namespace.h> +#include "mount.h" + +/* + * The corresponding device of "/" and file system type is "rootfs" + * if quotactl(2) is invoked from a container guest. + */ +static int is_container_rootfs(const char __user *special) +{ + int ret; + char *tmp = getname(special); + + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + + ret = strcmp(tmp, "rootfs"); + putname(tmp); + + return !ret; +} + +/* + * Currently, to ensure quotactl(2) is invoked from a container VM or a + * cloned mount namespace created through unshare(1), I do check that the + * input dev is "rootfs" or the current pid namespace is not the initial + * one. Is that sounds stupid enough? :( + * + * FIXME: + * Need to find out a reasonable approach to examine whether perform + * container disk quota or not. + * Some of my thoughs were shown as following: + * 1. Define a couple of pariticular NS_QUOTAON/NS_QUOTAOFF/NS_QGETINFO, etc. + * do container disk quota if they are presented. + * 2. Maybe people prefer to make use of container disk quota through + * unshare(1) combine with cgroups, and they even don't want run + * quotacheck(8) in this case, they just want to limit those quota stuff + * in a strightford way without disk usage pre-checkup, something like: + * turn quota on a particular mount namespace, set the quota limits per + * their requirements, stop further storage operations once over quota + * limits. And also, the quota limits can across different storage if + * the underlying file systems are running with container quota enabled. + */ +int do_quotactl_for_container(const char __user *special) +{ + return (is_container_rootfs(special) || + current->nsproxy->pid_ns != &init_pid_ns) ? 1 : 0; +} + +/* + * FIXME: find out a way to solve mount namespace security/cap verfication. + * Something like: ns_capable(current->nsproxy->mnt_ns, CAP_XXXX)? + */ +static int check_ns_quotactl_permission(struct mnt_namespace *ns, + int type, int cmd, qid_t id) +{ + switch (cmd) { + /* these commands do not require any special privilegues */ + case Q_GETFMT: + case Q_GETINFO: + break; + /* allow to query information for dquots we "own" */ + case Q_GETQUOTA: + if ((type == USRQUOTA && current_euid() == id) || + (type == GRPQUOTA && in_egroup_p(id))) + break; + /*fallthrough*/ + default: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + } + + return 0; +} + +/* + * FIXME: + * The following helpers are copied from general quota, they can be + * shared actally. + */ +static void copy_to_if_dqblk(struct if_dqblk *dst, struct fs_disk_quota *src) +{ + dst->dqb_bhardlimit = src->d_blk_hardlimit; + dst->dqb_bsoftlimit = src->d_blk_softlimit; + dst->dqb_curspace = src->d_bcount; + dst->dqb_ihardlimit = src->d_ino_hardlimit; + dst->dqb_isoftlimit = src->d_ino_softlimit; + dst->dqb_curinodes = src->d_icount; + dst->dqb_btime = src->d_btimer; + dst->dqb_itime = src->d_itimer; + dst->dqb_valid = QIF_ALL; +} + +static void copy_from_if_dqblk(struct fs_disk_quota *dst, struct if_dqblk *src) +{ + dst->d_blk_hardlimit = src->dqb_bhardlimit; + dst->d_blk_softlimit = src->dqb_bsoftlimit; + dst->d_bcount = src->dqb_curspace; + dst->d_ino_hardlimit = src->dqb_ihardlimit; + dst->d_ino_softlimit = src->dqb_isoftlimit; + dst->d_icount = src->dqb_curinodes; + dst->d_btimer = src->dqb_btime; + dst->d_itimer = src->dqb_itime; + + dst->d_fieldmask = 0; + if (src->dqb_valid & QIF_BLIMITS) + dst->d_fieldmask |= FS_DQ_BSOFT | FS_DQ_BHARD; + if (src->dqb_valid & QIF_SPACE) + dst->d_fieldmask |= FS_DQ_BCOUNT; + if (src->dqb_valid & QIF_ILIMITS) + dst->d_fieldmask |= FS_DQ_ISOFT | FS_DQ_IHARD; + if (src->dqb_valid & QIF_INODES) + dst->d_fieldmask |= FS_DQ_ICOUNT; + if (src->dqb_valid & QIF_BTIME) + dst->d_fieldmask |= FS_DQ_BTIMER; + if (src->dqb_valid & QIF_ITIME) + dst->d_fieldmask |= FS_DQ_ITIMER; +} + +static int ns_quota_on(struct mnt_namespace *ns, int type) +{ + return ns->ns_qcop->quota_on(ns, type); +} + +static int ns_quota_off(struct mnt_namespace *ns, int type) +{ + return ns->ns_qcop->quota_off(ns, type); +} + +static int ns_quota_getinfo(struct mnt_namespace *ns, int type, + void __user *addr) +{ + struct if_dqinfo info; + int ret; + + ret = ns->ns_qcop->get_info(ns, type, &info); + if (!ret && copy_to_user(addr, &info, sizeof(info))) + return -EFAULT; + + return ret; +} + +static int ns_quota_setinfo(struct mnt_namespace *ns, int type, + void __user *addr) +{ + struct if_dqinfo info; + + if (copy_from_user(&info, addr, sizeof(info))) + return -EFAULT; + + return ns->ns_qcop->set_info(ns, type, &info); +} + +static int ns_quota_getquota(struct mnt_namespace *ns, int type, + qid_t id, void __user *addr) +{ + struct fs_disk_quota fdq; + struct if_dqblk idq; + int ret; + + ret = ns->ns_qcop->get_dqblk(ns, type, id, &fdq); + if (ret) + return ret; + + copy_to_if_dqblk(&idq, &fdq); + if (copy_to_user(addr, &idq, sizeof(idq))) + return -EFAULT; + + return 0; +} + +static int ns_quota_setquota(struct mnt_namespace *ns, int type, qid_t id, + void __user *addr) +{ + struct fs_disk_quota fdq; + struct if_dqblk idq; + + if (copy_from_user(&idq, addr, sizeof(idq))) + return -EFAULT; + + copy_from_if_dqblk(&fdq, &idq); + return ns->ns_qcop->set_dqblk(ns, type, id, &fdq); +} + +static int ns_quota_getfmt(struct mnt_namespace *ns, int type, + void __user *addr) +{ + __u32 fmt; + + fmt = ns_dquot_getfmt(ns, type); + if (!fmt) + return fmt; + + if (copy_to_user(addr, &fmt, sizeof(fmt))) + return -EFAULT; + return 0; +} + +/* Copy parameters and call proper function */ +int do_container_quotactl(int type, int cmd, qid_t id, void __user *addr) +{ + struct mnt_namespace *ns = current->nsproxy->mnt_ns; + int ret = 0; + + if (type >= (XQM_COMMAND(cmd) ? XQM_MAXQUOTAS : MAXQUOTAS)) + return -EINVAL; + + lock_mnt_ns(ns); + ret = check_ns_quotactl_permission(ns, type, cmd, id); + if (ret < 0) + goto out_unlock; + + if (!ns->ns_qcop) { + ret = -ENOSYS; + goto out_unlock; + } + + switch (cmd) { + case Q_QUOTAON: + ret = ns_quota_on(ns, type); + break; + case Q_QUOTAOFF: + ret = ns_quota_off(ns, type); + break; + case Q_GETQUOTA: + ret = ns_quota_getquota(ns, type, id, addr); + break; + case Q_SETQUOTA: + ret = ns_quota_setquota(ns, type, id, addr); + break; + case Q_GETINFO: + ret = ns_quota_getinfo(ns, type, addr); + break; + case Q_SETINFO: + ret = ns_quota_setinfo(ns, type, addr); + break; + case Q_GETFMT: + ret = ns_quota_getfmt(ns, type, addr); + break; + case Q_SYNC: + ret = 0; + break; + default: + ret = -EINVAL; + break; + } + +out_unlock: + unlock_mnt_ns(ns); + return ret; +} -- 1.7.9 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html