dscp ranges ---------- This property controls which dscp values the processes in a cgroup are allowed to use. A process in a cgroup will receive an EACCES error if it tries to do any of these things: * set a socket's IP_TOS option to a value whose dscp field (bits 7:2) is outside the range * use a socket to send a message in which the IP_TOS ancillary data is set to a value whose dscp field is outside the range This property is exposed to userspace through the 'net.dscp_ranges' file, similar to the bind and listen port ranges. Tested: wrote python to attempt to setsockopt the IP_TOS option to a value with an out-of-range dscp field, and expect a failure Signed-off-by: Anoop Naravaram <anaravaram@xxxxxxxxxx> --- Documentation/cgroup-v1/net.txt | 14 ++++++++++++++ include/net/net_cgroup.h | 6 ++++++ net/core/net_cgroup.c | 34 ++++++++++++++++++++++++++++++++-- net/ipv4/ip_sockglue.c | 13 +++++++++++++ net/ipv6/datagram.c | 9 +++++++++ net/ipv6/ipv6_sockglue.c | 8 ++++++++ 6 files changed, 82 insertions(+), 2 deletions(-) diff --git a/Documentation/cgroup-v1/net.txt b/Documentation/cgroup-v1/net.txt index a14fd1c..ea2f1db 100644 --- a/Documentation/cgroup-v1/net.txt +++ b/Documentation/cgroup-v1/net.txt @@ -30,6 +30,20 @@ This property is exposed to userspace through the 'net.listen_port_ranges' file, as ranges of ports that the processes can listen on (as described in the HOW TO INTERACT WITH RANGES FILES section). +dscp ranges +----------- +This property controls which dscp values the processes in a cgroup are +allowed to use. A process in a cgroup will receive an EACCES error if it +tries to do any of these things: +* set a socket's IP_TOS option to a value whose dscp field (bits 7:2) is + outside the range +* use a socket to send a message in which the IP_TOS ancillary data is + set to a value whose dscp field is outside the range + +This property is exposed to userspace through the 'net.dscp_ranges' file, as +ranges of dscp values that the process can use (as described in the HOW TO +INTERACT WITH RANGES FILES section). + udp port usage and limit ------------------------ This property controls the limit of udp ports that can be used by the diff --git a/include/net/net_cgroup.h b/include/net/net_cgroup.h index 25a9def..d89e98d 100644 --- a/include/net/net_cgroup.h +++ b/include/net/net_cgroup.h @@ -23,6 +23,7 @@ enum { NETCG_LISTEN_RANGES, NETCG_BIND_RANGES, + NETCG_DSCP_RANGES, NETCG_NUM_RANGE_TYPES }; @@ -73,6 +74,7 @@ struct net_cgroup { bool net_cgroup_bind_allowed(u16 port); bool net_cgroup_listen_allowed(u16 port); +bool net_cgroup_dscp_allowed(u8 dscp); bool net_cgroup_acquire_udp_port(void); void net_cgroup_release_udp_port(void); @@ -85,6 +87,10 @@ static inline bool net_cgroup_listen_allowed(u16 port) { return true; } +static inline bool net_cgroup_dscp_allowed(u8 dscp) +{ + return true; +} static inline bool net_cgroup_acquire_udp_port(void) { return true; diff --git a/net/core/net_cgroup.c b/net/core/net_cgroup.c index 2f58e13..73dc5e7 100644 --- a/net/core/net_cgroup.c +++ b/net/core/net_cgroup.c @@ -21,6 +21,9 @@ #define MIN_PORT_VALUE 0 #define MAX_PORT_VALUE 65535 +#define MIN_DSCP_VALUE 0 +#define MAX_DSCP_VALUE 63 + /* Deriving MAX_ENTRIES from MAX_WRITE_SIZE as a rough estimate */ #define MAX_ENTRIES ((MAX_WRITE_SIZE - offsetof(struct net_ranges, range)) / \ BYTES_PER_ENTRY) @@ -161,7 +164,10 @@ cgrp_css_alloc(struct cgroup_subsys_state *parent_css) MIN_PORT_VALUE, MAX_PORT_VALUE) || alloc_init_net_ranges( &netcg->whitelists[NETCG_LISTEN_RANGES], - MIN_PORT_VALUE, MAX_PORT_VALUE)) { + MIN_PORT_VALUE, MAX_PORT_VALUE) || + alloc_init_net_ranges( + &netcg->whitelists[NETCG_DSCP_RANGES], + MIN_DSCP_VALUE, MAX_DSCP_VALUE)) { free_net_cgroup(netcg); /* if any of these cause an error, return ENOMEM */ return ERR_PTR(-ENOMEM); @@ -178,7 +184,11 @@ cgrp_css_alloc(struct cgroup_subsys_state *parent_css) alloc_copy_net_ranges( &netcg->whitelists[NETCG_LISTEN_RANGES], MIN_PORT_VALUE, MAX_PORT_VALUE, - &parent_netcg->whitelists[NETCG_LISTEN_RANGES])) { + &parent_netcg->whitelists[NETCG_LISTEN_RANGES]) || + alloc_copy_net_ranges( + &netcg->whitelists[NETCG_DSCP_RANGES], + MIN_DSCP_VALUE, MAX_DSCP_VALUE, + &parent_netcg->whitelists[NETCG_DSCP_RANGES])) { free_net_cgroup(netcg); /* if any of these cause an error, return ENOMEM */ return ERR_PTR(-ENOMEM); @@ -237,6 +247,12 @@ bool net_cgroup_listen_allowed(u16 port) } EXPORT_SYMBOL_GPL(net_cgroup_listen_allowed); +bool net_cgroup_dscp_allowed(u8 dscp) +{ + return net_cgroup_value_allowed(dscp, NETCG_DSCP_RANGES); +} +EXPORT_SYMBOL_GPL(net_cgroup_dscp_allowed); + static s64 net_udp_read_s64(struct cgroup_subsys_state *css, struct cftype *cft) { struct net_cgroup *netcg = css_to_net_cgroup(css); @@ -634,6 +650,20 @@ static struct cftype ss_files[] = { .max_write_len = MAX_WRITE_SIZE, }, { + .name = "dscp_ranges", + .flags = CFTYPE_ONLY_ON_ROOT, + .seq_show = net_read_ranges, + .private = NETCG_DSCP_RANGES, + }, + { + .name = "dscp_ranges", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = net_read_ranges, + .write = net_write_ranges, + .private = NETCG_DSCP_RANGES, + .max_write_len = MAX_WRITE_SIZE, + }, + { .name = "udp_limit", .flags = CFTYPE_ONLY_ON_ROOT, .read_s64 = net_udp_read_s64, diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 71a52f4d..71a4297 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -42,6 +42,7 @@ #include <net/transp_v6.h> #endif #include <net/ip_fib.h> +#include <net/net_cgroup.h> #include <linux/errqueue.h> #include <asm/uaccess.h> @@ -289,6 +290,11 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, val = *(int *)CMSG_DATA(cmsg); if (val < 0 || val > 255) return -EINVAL; + /* val is 8-bit tos, we need to rightshift 2 to get the + * 6-bit dscp field + */ + if (!net_cgroup_dscp_allowed(val >> 2)) + return -EACCES; ipc->tos = val; ipc->priority = rt_tos2priority(ipc->tos); break; @@ -727,6 +733,13 @@ static int do_ip_setsockopt(struct sock *sk, int level, val &= ~INET_ECN_MASK; val |= inet->tos & INET_ECN_MASK; } + /* val is 8-bit tos, we need to rightshift 2 to get the + * 6-bit dscp field + */ + if (!net_cgroup_dscp_allowed(val >> 2)) { + err = -EACCES; + break; + } if (inet->tos != val) { inet->tos = val; sk->sk_priority = rt_tos2priority(val); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 37874e2..9053b83 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -34,6 +34,7 @@ #include <linux/errqueue.h> #include <asm/uaccess.h> +#include <net/net_cgroup.h> static bool ipv6_mapped_addr_any(const struct in6_addr *a) { @@ -973,6 +974,14 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, if (tc < -1 || tc > 0xff) goto exit_f; + /* tc is 8-bit tclass, we need to rightshift 2 to get + * the 6-bit dscp field + */ + if (!net_cgroup_dscp_allowed(tc >> 2)) { + err = -EACCES; + goto exit_f; + } + err = 0; ipc6->tclass = tc; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index a9895e1..eac3f88 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -52,6 +52,7 @@ #include <net/udplite.h> #include <net/xfrm.h> #include <net/compat.h> +#include <net/net_cgroup.h> #include <asm/uaccess.h> @@ -339,6 +340,13 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, /* RFC 3542, 6.5: default traffic class of 0x0 */ if (val == -1) val = 0; + /* val is 8-bit tclass, we need to rightshift 2 to get the 6-bit + * dscp field + */ + if (!net_cgroup_dscp_allowed(val >> 2)) { + retv = -EACCES; + break; + } np->tclass = val; retv = 0; break; -- 2.8.0.rc3.226.g39d4020 -- To unsubscribe from this list: send the line "unsubscribe linux-doc" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html