On Mon, Sep 23, 2024 at 01:08 PM GMT, David Laight wrote: > From: Tiago Lam <tiagolam@xxxxxxxxxxxxxx> [...] >> To limit its usage, a reverse socket lookup is performed to check if the >> configured egress source address and/or port have any ingress sk_lookup >> match. If it does, traffic is allowed to proceed, otherwise it falls >> back to the regular egress path. > > Is that really useful/necessary? We've been asking ourselves the same question during Plumbers with Martin. Unprivileges processes can already source UDP traffic from (almost) any IP & port by binding a socket to the desired source port and passing IP_PKTINFO. So perhaps having a reverse socket lookup is an overkill. We should probably respect net.ipv4.ip_local_reserved_ports and net.ipv4.ip_unprivileged_port_start system settings, though, or check for relevant caps. Open question if it is acceptable to disregard exclusive UDP port ownership by sockets binding to a wildcard address without SO_REUSEADDR? [...] > The check (but not the commit message) implies that some 'bpf thingy' > also needs to be enabled. > Any check would need to include the test that the program sending the packet > has the ability to send a packet through the ingress socket. > Additionally a check for the sending process having (IIRC) CAP_NET_ADMIN > (which would let the process send the message by other means) would save the > slow path. > > The code we have sends a lot of UDP RTP (typically 160 bytes of audio every 20ms). > There is actually no reason for there to be a valid matching ingress path. > (That code would benefit from being able to bind a lot of ports to the same > UDP socket.) > > David > >> >> Suggested-by: Jakub Sitnicki <jakub@xxxxxxxxxxxxxx> >> Signed-off-by: Tiago Lam <tiagolam@xxxxxxxxxxxxxx> >> --- >> net/ipv6/datagram.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++ >> net/ipv6/udp.c | 8 ++++-- >> 2 files changed, 85 insertions(+), 2 deletions(-) >> >> diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c >> index fff78496803d..369c64a478ec 100644 >> --- a/net/ipv6/datagram.c >> +++ b/net/ipv6/datagram.c >> @@ -756,6 +756,29 @@ void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, >> } >> EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl); >> >> +static inline bool reverse_sk_lookup(struct flowi6 *fl6, struct sock *sk, >> + struct in6_addr *saddr, __be16 sport) >> +{ >> + if (static_branch_unlikely(&bpf_sk_lookup_enabled) && >> + (saddr && sport) && >> + (ipv6_addr_cmp(&sk->sk_v6_rcv_saddr, saddr) || >> + inet_sk(sk)->inet_sport != sport)) { >> + struct sock *sk_egress; >> + >> + bpf_sk_lookup_run_v6(sock_net(sk), IPPROTO_UDP, &fl6->daddr, >> + fl6->fl6_dport, saddr, ntohs(sport), 0, >> + &sk_egress); >> + if (!IS_ERR_OR_NULL(sk_egress) && sk_egress == sk) >> + return true; >> + >> + net_info_ratelimited("No reverse socket lookup match for local addr %pI6:%d remote addr >> %pI6:%d\n", >> + &saddr, ntohs(sport), &fl6->daddr, >> + ntohs(fl6->fl6_dport)); >> + } >> + >> + return false; >> +} >> + >> int ip6_datagram_send_ctl(struct net *net, struct sock *sk, >> struct msghdr *msg, struct flowi6 *fl6, >> struct ipcm6_cookie *ipc6) >> @@ -844,7 +867,63 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, >> >> break; >> } >> + case IPV6_ORIGDSTADDR: >> + { >> + struct sockaddr_in6 *sockaddr_in; >> + struct net_device *dev = NULL; >> + >> + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct sockaddr_in6))) { >> + err = -EINVAL; >> + goto exit_f; >> + } >> + >> + sockaddr_in = (struct sockaddr_in6 *)CMSG_DATA(cmsg); >> + >> + addr_type = __ipv6_addr_type(&sockaddr_in->sin6_addr); >> + >> + if (addr_type & IPV6_ADDR_LINKLOCAL) >> + return -EINVAL; >> + >> + /* If we're egressing with a different source address >> + * and/or port, we perform a reverse socket lookup. The >> + * rationale behind this is that we can allow return >> + * UDP traffic that has ingressed through sk_lookup to >> + * also egress correctly. In case the reverse lookup >> + * fails, we continue with the normal path. >> + * >> + * The lookup is performed if either source address >> + * and/or port changed, and neither is "0". >> + */ >> + if (reverse_sk_lookup(fl6, sk, &sockaddr_in->sin6_addr, >> + sockaddr_in->sin6_port)) { >> + /* Override the source port and address to use >> + * with the one we got in cmsg and bail early. >> + */ >> + fl6->saddr = sockaddr_in->sin6_addr; >> + fl6->fl6_sport = sockaddr_in->sin6_port; >> + break; >> + } >> >> + if (addr_type != IPV6_ADDR_ANY) { >> + int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL; >> + >> + if (!ipv6_can_nonlocal_bind(net, inet_sk(sk)) && >> + !ipv6_chk_addr_and_flags(net, >> + &sockaddr_in->sin6_addr, >> + dev, !strict, 0, >> + IFA_F_TENTATIVE) && >> + !ipv6_chk_acast_addr_src(net, dev, >> + &sockaddr_in->sin6_addr)) >> + err = -EINVAL; >> + else >> + fl6->saddr = sockaddr_in->sin6_addr; >> + } >> + >> + if (err) >> + goto exit_f; >> + >> + break; >> + } >> case IPV6_FLOWINFO: >> if (cmsg->cmsg_len < CMSG_LEN(4)) { >> err = -EINVAL; >> diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c >> index 6602a2e9cdb5..6121cbb71ad3 100644 >> --- a/net/ipv6/udp.c >> +++ b/net/ipv6/udp.c >> @@ -1476,6 +1476,12 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) >> >> fl6->flowi6_uid = sk->sk_uid; >> >> + /* We use fl6's daddr and fl6_sport in the reverse sk_lookup done >> + * within ip6_datagram_send_ctl() now. >> + */ >> + fl6->daddr = *daddr; >> + fl6->fl6_sport = inet->inet_sport; >> + >> if (msg->msg_controllen) { >> opt = &opt_space; >> memset(opt, 0, sizeof(struct ipv6_txoptions)); >> @@ -1511,10 +1517,8 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) >> >> fl6->flowi6_proto = sk->sk_protocol; >> fl6->flowi6_mark = ipc6.sockc.mark; >> - fl6->daddr = *daddr; >> if (ipv6_addr_any(&fl6->saddr) && !ipv6_addr_any(&np->saddr)) >> fl6->saddr = np->saddr; >> - fl6->fl6_sport = inet->inet_sport; >> >> if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) { >> err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, >> >> -- >> 2.34.1 >> > > - > Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK > Registration No: 1397386 (Wales)