CC: netdev@xxxxxxxxxxxxxxx I'll review the cgroup part if this patch is regarded as useful. Grzegorz Nosek wrote: > This is a very simple cgroup subsystem to restrict IP addresses used > by member processes. Currently it is limited to IPv4 only but IPv6 (or > other protocols) should be easy to implement. > > IP addresses are write-once (via /cgroup/.../ipaddr.ipv4 in dotted-quad Why they should be write-once ? > format) and are inherited by descendant cgroups, so a process once > restricted should never be able to get rid of the limits. Any address > may be specified in multiple cgroups. No verification is done to ensure > the addresses are actually configured on the machine, which has its > advantages (may add the addresses later) and disadvantages (if you enter > the wrong address, the cgroup will be effectively cut off from the > network). > > Whenever a process inside a restricted cgroup calls bind(2), the address > is checked like this: > - INADDR_LOOPBACK is explicitly allowed (a special case) > - INADDR_ANY is remapped to _the_ IP address > - _the_ IP address is passed through unharmed > - everything else causes -EPERM > > When a process calls connect(2), this subsystem calls bind(_the_IP_) > quietly behind its back, while preserving the original bound port (if > any). > > Rationale (or when/why would you want it): > The use case for ipaddr_cgroup doesn't overlap with network namespaces, > which also allow IP address restrictions, because it aims to be much > lighter due to its limited scope (hopefully able to easily support > hundreds or possibly thousands of distinct cgroups). It does not attempt > to hide the existence of other IP addresses from the user. > > Signed-off-by: Grzegorz Nosek <root@xxxxxxxxxxxxxx> > --- > > This is more of an RFC than a finished patch so any and all comments are > appreciated. > > The patch is based to a significant extent on the device_cgroup code, > including bypassing the security infrastructure and hooking directly > into the networking code. > > I'd also love to hear your opinion about locking--I have a version of this > patch that uses a seqlock to protect the IP address but I'm not sure this > is the Right Way to do it (and raw non-atomic lockless access looks scary, > regardless of how rarely would the address be changed, i.e. at most > once). > > And of course, if the whole idea is stupid, let me know. > > include/linux/cgroup_subsys.h | 6 ++ > include/linux/ipaddr_cgroup.h | 23 +++++ > init/Kconfig | 7 ++ > net/socket.c | 16 +++- > security/Makefile | 1 + > security/ipaddr_cgroup.c | 200 +++++++++++++++++++++++++++++++++++++++++ > 6 files changed, 250 insertions(+), 3 deletions(-) > create mode 100644 include/linux/ipaddr_cgroup.h > create mode 100644 security/ipaddr_cgroup.c > > diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h > index 9c22396..70dd375 100644 > --- a/include/linux/cgroup_subsys.h > +++ b/include/linux/cgroup_subsys.h > @@ -54,3 +54,9 @@ SUBSYS(freezer) > #endif > > /* */ > + > +#ifdef CONFIG_CGROUP_IPADDR > +SUBSYS(ipaddr) > +#endif > + > +/* */ > diff --git a/include/linux/ipaddr_cgroup.h b/include/linux/ipaddr_cgroup.h > new file mode 100644 > index 0000000..19dc382 > --- /dev/null > +++ b/include/linux/ipaddr_cgroup.h > @@ -0,0 +1,23 @@ > +#ifndef HAVE_IPADDR_CGROUP_H > +#define HAVE_IPADDR_CGROUP_H > + > +struct socket; > +struct sockaddr; > + > +#ifdef CONFIG_CGROUP_IPADDR > +int ipaddr_cgroup_connect(struct socket *sock, struct sockaddr *address, int addrlen); > +int ipaddr_cgroup_bind(struct socket *sock, struct sockaddr *address, int addrlen); > + > +#else > +static inline int ipaddr_cgroup_connect(struct socket *sock, struct sockaddr *address, int addrlen) > +{ > + return 0; > +} > + > +static inline int ipaddr_cgroup_bind(struct socket *sock, struct sockaddr *address, int addrlen) > +{ > + return 0; > +} > + > +#endif /* CONFIG_CGROUP_IPADDR */ > +#endif /* HAVE_IPADDR_CGROUP_H */ > diff --git a/init/Kconfig b/init/Kconfig > index 35d87b9..db43344 100644 > --- a/init/Kconfig > +++ b/init/Kconfig > @@ -338,6 +338,13 @@ config CGROUP_DEVICE > Provides a cgroup implementing whitelists for devices which > a process in the cgroup can mknod or open. > > +config CGROUP_IPADDR > + bool "IP address controller for cgroups" > + depends on CGROUPS && EXPERIMENTAL > + help > + Provides a cgroup restricting IP addresses its member processes > + can use. > + > config CPUSETS > bool "Cpuset support" > depends on SMP && CGROUPS > diff --git a/net/socket.c b/net/socket.c > index 3e8d4e3..3bd8c08 100644 > --- a/net/socket.c > +++ b/net/socket.c > @@ -87,6 +87,7 @@ > #include <linux/audit.h> > #include <linux/wireless.h> > #include <linux/nsproxy.h> > +#include <linux/ipaddr_cgroup.h> > > #include <asm/uaccess.h> > #include <asm/unistd.h> > @@ -1375,9 +1376,13 @@ asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) > if (sock) { > err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address); > if (err >= 0) { > - err = security_socket_bind(sock, > - (struct sockaddr *)&address, > - addrlen); > + err = ipaddr_cgroup_bind(sock, > + (struct sockaddr *)&address, > + addrlen); > + if (!err) > + err = security_socket_bind(sock, > + (struct sockaddr *)&address, > + addrlen); > if (!err) > err = sock->ops->bind(sock, > (struct sockaddr *) > @@ -1600,6 +1605,11 @@ asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, > goto out_put; > > err = > + ipaddr_cgroup_connect(sock, (struct sockaddr *)&address, addrlen); > + if (err) > + goto out_put; > + > + err = > security_socket_connect(sock, (struct sockaddr *)&address, addrlen); > if (err) > goto out_put; > diff --git a/security/Makefile b/security/Makefile > index f654260..aaf225e 100644 > --- a/security/Makefile > +++ b/security/Makefile > @@ -16,3 +16,4 @@ obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o > obj-$(CONFIG_SECURITY_SMACK) += smack/built-in.o > obj-$(CONFIG_SECURITY_ROOTPLUG) += root_plug.o > obj-$(CONFIG_CGROUP_DEVICE) += device_cgroup.o > +obj-$(CONFIG_CGROUP_IPADDR) += ipaddr_cgroup.o > diff --git a/security/ipaddr_cgroup.c b/security/ipaddr_cgroup.c > new file mode 100644 > index 0000000..96ccf27 > --- /dev/null > +++ b/security/ipaddr_cgroup.c > @@ -0,0 +1,200 @@ > +/* > + * IP address cgroup subsystem > + */ > + > +#include <linux/ipaddr_cgroup.h> > + > +#include <linux/cgroup.h> > +#include <linux/err.h> > +#include <linux/in.h> > +#include <linux/inet.h> > +#include <linux/seq_file.h> > +#include <linux/socket.h> > + > +#include <net/inet_sock.h> > + > +struct ipaddr_cgroup { > + struct cgroup_subsys_state css; > + u32 ipv4_addr; > +}; > + > +static inline struct ipaddr_cgroup *css_to_ipcgroup(struct cgroup_subsys_state *s) > +{ > + return container_of(s, struct ipaddr_cgroup, css); > +} > + > +static inline struct ipaddr_cgroup *cgroup_to_ipcgroup(struct cgroup *cgroup) > +{ > + return css_to_ipcgroup(cgroup_subsys_state(cgroup, ipaddr_subsys_id)); > +} > + > +static inline struct ipaddr_cgroup *task_ipcgroup(struct task_struct *task) > +{ > + return css_to_ipcgroup(task_subsys_state(task, ipaddr_subsys_id)); > +} > + > +struct cgroup_subsys ipaddr_subsys; > + > +static int ipcgroup_can_attach(struct cgroup_subsys *ss, > + struct cgroup *new_cgroup, struct task_struct *task) > +{ > + struct ipaddr_cgroup *old_ipcgroup, *new_ipcgroup; > + u32 old_ipv4; > + > + if (current != task && !capable(CAP_SYS_ADMIN)) > + return -EPERM; > + > + old_ipcgroup = task_ipcgroup(task); > + new_ipcgroup = cgroup_to_ipcgroup(new_cgroup); > + old_ipv4 = old_ipcgroup->ipv4_addr; > + > + if (old_ipv4 != INADDR_ANY && old_ipv4 != new_ipcgroup->ipv4_addr) > + return -EPERM; > + > + return 0; > +} > + > +static struct cgroup_subsys_state *ipcgroup_create(struct cgroup_subsys *ss, > + struct cgroup *cgroup) > +{ > + struct ipaddr_cgroup *ipcgroup, *parent_ipcgroup; > + struct cgroup *parent_cgroup; > + > + ipcgroup = kzalloc(sizeof(*ipcgroup), GFP_KERNEL); > + if (!ipcgroup) > + return ERR_PTR(-ENOMEM); > + parent_cgroup = cgroup->parent; > + > + if (parent_cgroup == NULL) { > + ipcgroup->ipv4_addr = htonl(INADDR_ANY); > + } else { > + parent_ipcgroup = cgroup_to_ipcgroup(parent_cgroup); > + ipcgroup->ipv4_addr = parent_ipcgroup->ipv4_addr; > + } > + > + return &ipcgroup->css; > +} > + > +static void ipcgroup_destroy(struct cgroup_subsys *ss, > + struct cgroup *cgroup) > +{ > + struct ipaddr_cgroup *ipcgroup; > + > + ipcgroup = cgroup_to_ipcgroup(cgroup); > + kfree(ipcgroup); > +} > + > +static int ipcgroup_write_ipv4(struct cgroup *cgrp, struct cftype *cft, > + const char *buffer) > +{ > + u32 new_addr; > + struct ipaddr_cgroup *ipcgroup; > + int ret; > + > + if (!capable(CAP_SYS_ADMIN)) > + return -EPERM; > + > + ipcgroup = cgroup_to_ipcgroup(cgrp); > + if (ipcgroup->ipv4_addr != htonl(INADDR_ANY)) > + return -EPERM; > + > + ret = in4_pton(buffer, -1, (u8 *)&new_addr, '\0', NULL); > + if (!ret) > + return -EINVAL; > + > + /* already network-endian */ > + ipcgroup->ipv4_addr = new_addr; > + return 0; > +} > + > +static int ipcgroup_read_ipv4(struct cgroup *cgrp, struct cftype *cft, > + struct seq_file *m) > +{ > + struct ipaddr_cgroup *ipcgroup; > + > + ipcgroup = cgroup_to_ipcgroup(cgrp); > + seq_printf(m, NIPQUAD_FMT "\n", NIPQUAD(ipcgroup->ipv4_addr)); > + return 0; > +} > + > +static struct cftype ipaddr_cgroup_files[] = { > + { > + .name = "ipv4", > + .write_string = ipcgroup_write_ipv4, > + .read_seq_string = ipcgroup_read_ipv4, > + }, > +}; > + > +static int ipcgroup_populate(struct cgroup_subsys *ss, > + struct cgroup *cgroup) > +{ > + return cgroup_add_files(cgroup, ss, ipaddr_cgroup_files, > + ARRAY_SIZE(ipaddr_cgroup_files)); > +} > + > +struct cgroup_subsys ipaddr_subsys = { > + .name = "ipaddr", > + .can_attach = ipcgroup_can_attach, > + .create = ipcgroup_create, > + .destroy = ipcgroup_destroy, > + .populate = ipcgroup_populate, > + .subsys_id = ipaddr_subsys_id > +}; > + > +int ipaddr_cgroup_connect(struct socket *sock, struct sockaddr *address, int addrlen) > +{ > + struct sockaddr_in sa_in; > + struct ipaddr_cgroup *ipcgroup; > + struct inet_sock *inet; > + int err; > + > + if (address->sa_family != AF_INET) > + return 0; > + > + ipcgroup = task_ipcgroup(current); > + if (ipcgroup->ipv4_addr == htonl(INADDR_ANY)) > + return 0; > + > + inet = inet_sk(sock->sk); > + > + sa_in.sin_family = AF_INET; > + sa_in.sin_addr.s_addr = ipcgroup->ipv4_addr; > + sa_in.sin_port = inet->sport; > + > + err = security_socket_bind(sock, (struct sockaddr *)&sa_in, sizeof(sa_in)); > + if (err) > + return err; > + > + err = sock->ops->bind(sock, (struct sockaddr *)&sa_in, sizeof(sa_in)); > + > + return err; > +} > + > +int ipaddr_cgroup_bind(struct socket *sock, struct sockaddr *address, int addrlen) > +{ > + struct sockaddr_in *sa_in; > + struct ipaddr_cgroup *ipcgroup; > + > + if (address->sa_family != AF_INET) > + return 0; > + > + ipcgroup = task_ipcgroup(current); > + if (ipcgroup->ipv4_addr == htonl(INADDR_ANY)) > + return 0; > + > + sa_in = (struct sockaddr_in *) address; > + > + /* remap INADDR_ANY to cgroup IP address */ > + if (sa_in->sin_addr.s_addr == htonl(INADDR_ANY)) > + sa_in->sin_addr.s_addr = ipcgroup->ipv4_addr; > + > + /* a very special case */ > + if (sa_in->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) > + return 0; > + > + if (sa_in->sin_addr.s_addr == ipcgroup->ipv4_addr) > + return 0; > + > + return -EPERM; > +} > + _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linux-foundation.org/mailman/listinfo/containers