Hi Greg, This needs a follow up incremental fix. Please, hold on with it. I will ping you back once it is there. Thanks On Mon, Sep 16, 2024 at 01:44:26PM +0200, Greg Kroah-Hartman wrote: > 6.10-stable review patch. If anyone has any objections, please let me know. > > ------------------ > > From: Florian Westphal <fw@xxxxxxxxx> > > [ Upstream commit 7f3287db654395f9c5ddd246325ff7889f550286 ] > > When running in container environmment, /sys/fs/cgroup/ might not be > the real root node of the sk-attached cgroup. > > Example: > > In container: > % stat /sys//fs/cgroup/ > Device: 0,21 Inode: 2214 .. > % stat /sys/fs/cgroup/foo > Device: 0,21 Inode: 2264 .. > > The expectation would be for: > > nft add rule .. socket cgroupv2 level 1 "foo" counter > > to match traffic from a process that got added to "foo" via > "echo $pid > /sys/fs/cgroup/foo/cgroup.procs". > > However, 'level 3' is needed to make this work. > > Seen from initial namespace, the complete hierarchy is: > > % stat /sys/fs/cgroup/system.slice/docker-.../foo > Device: 0,21 Inode: 2264 .. > > i.e. hierarchy is > 0 1 2 3 > / -> system.slice -> docker-1... -> foo > > ... but the container doesn't know that its "/" is the "docker-1.." > cgroup. Current code will retrieve the 'system.slice' cgroup node > and store its kn->id in the destination register, so compare with > 2264 ("foo" cgroup id) will not match. > > Fetch "/" cgroup from ->init() and add its level to the level we try to > extract. cgroup root-level is 0 for the init-namespace or the level > of the ancestor that is exposed as the cgroup root inside the container. > > In the above case, cgrp->level of "/" resolved in the container is 2 > (docker-1...scope/) and request for 'level 1' will get adjusted > to fetch the actual level (3). > > v2: use CONFIG_SOCK_CGROUP_DATA, eval function depends on it. > (kernel test robot) > > Cc: cgroups@xxxxxxxxxxxxxxx > Fixes: e0bb96db96f8 ("netfilter: nft_socket: add support for cgroupsv2") > Reported-by: Nadia Pinaeva <n.m.pinaeva@xxxxxxxxx> > Signed-off-by: Florian Westphal <fw@xxxxxxxxx> > Signed-off-by: Pablo Neira Ayuso <pablo@xxxxxxxxxxxxx> > Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx> > --- > net/netfilter/nft_socket.c | 41 +++++++++++++++++++++++++++++++++++--- > 1 file changed, 38 insertions(+), 3 deletions(-) > > diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c > index 765ffd6e06bc..12cdff640492 100644 > --- a/net/netfilter/nft_socket.c > +++ b/net/netfilter/nft_socket.c > @@ -9,7 +9,8 @@ > > struct nft_socket { > enum nft_socket_keys key:8; > - u8 level; > + u8 level; /* cgroupv2 level to extract */ > + u8 level_user; /* cgroupv2 level provided by userspace */ > u8 len; > union { > u8 dreg; > @@ -53,6 +54,28 @@ nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo > memcpy(dest, &cgid, sizeof(u64)); > return true; > } > + > +/* process context only, uses current->nsproxy. */ > +static noinline int nft_socket_cgroup_subtree_level(void) > +{ > + struct cgroup *cgrp = cgroup_get_from_path("/"); > + int level; > + > + if (!cgrp) > + return -ENOENT; > + > + level = cgrp->level; > + > + cgroup_put(cgrp); > + > + if (WARN_ON_ONCE(level > 255)) > + return -ERANGE; > + > + if (WARN_ON_ONCE(level < 0)) > + return -EINVAL; > + > + return level; > +} > #endif > > static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt) > @@ -174,9 +197,10 @@ static int nft_socket_init(const struct nft_ctx *ctx, > case NFT_SOCKET_MARK: > len = sizeof(u32); > break; > -#ifdef CONFIG_CGROUPS > +#ifdef CONFIG_SOCK_CGROUP_DATA > case NFT_SOCKET_CGROUPV2: { > unsigned int level; > + int err; > > if (!tb[NFTA_SOCKET_LEVEL]) > return -EINVAL; > @@ -185,6 +209,17 @@ static int nft_socket_init(const struct nft_ctx *ctx, > if (level > 255) > return -EOPNOTSUPP; > > + err = nft_socket_cgroup_subtree_level(); > + if (err < 0) > + return err; > + > + priv->level_user = level; > + > + level += err; > + /* Implies a giant cgroup tree */ > + if (WARN_ON_ONCE(level > 255)) > + return -EOPNOTSUPP; > + > priv->level = level; > len = sizeof(u64); > break; > @@ -209,7 +244,7 @@ static int nft_socket_dump(struct sk_buff *skb, > if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg)) > return -1; > if (priv->key == NFT_SOCKET_CGROUPV2 && > - nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level))) > + nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level_user))) > return -1; > return 0; > } > -- > 2.43.0 > > >