3.2.75-rc1 review patch. If anyone has any objections, please let me know. ------------------ From: Eric Dumazet <edumazet@xxxxxxxxxx> [ Upstream commit 45f6fad84cc305103b28d73482b344d7f5b76f39 ] This patch addresses multiple problems : UDP/RAW sendmsg() need to get a stable struct ipv6_txoptions while socket is not locked : Other threads can change np->opt concurrently. Dmitry posted a syzkaller (http://github.com/google/syzkaller) program desmonstrating use-after-free. Starting with TCP/DCCP lockless listeners, tcp_v6_syn_recv_sock() and dccp_v6_request_recv_sock() also need to use RCU protection to dereference np->opt once (before calling ipv6_dup_options()) This patch adds full RCU protection to np->opt Reported-by: Dmitry Vyukov <dvyukov@xxxxxxxxxx> Signed-off-by: Eric Dumazet <edumazet@xxxxxxxxxx> Acked-by: Hannes Frederic Sowa <hannes@xxxxxxxxxxxxxxxxxxx> Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx> [bwh: Backported to 3.2: - Drop changes to l2tp - Fix an additional use of np->opt in tcp_v6_send_synack() - Fold in commit 43264e0bd963 ("ipv6: remove unnecessary codes in tcp_ipv6.c") - Adjust context] Signed-off-by: Ben Hutchings <ben@xxxxxxxxxxxxxxx> --- --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -369,7 +369,7 @@ struct ipv6_pinfo { struct ipv6_ac_socklist *ipv6_ac_list; struct ipv6_fl_socklist *ipv6_fl_list; - struct ipv6_txoptions *opt; + struct ipv6_txoptions __rcu *opt; struct sk_buff *pktoptions; struct sk_buff *rxpmtu; struct { --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -191,6 +191,7 @@ extern rwlock_t ip6_ra_lock; */ struct ipv6_txoptions { + atomic_t refcnt; /* Length of this structure */ int tot_len; @@ -203,7 +204,7 @@ struct ipv6_txoptions { struct ipv6_opt_hdr *dst0opt; struct ipv6_rt_hdr *srcrt; /* Routing Header */ struct ipv6_opt_hdr *dst1opt; - + struct rcu_head rcu; /* Option buffer, as read by IPV6_PKTOPTIONS, starts here. */ }; @@ -229,6 +230,24 @@ struct ipv6_fl_socklist { struct ip6_flowlabel *fl; }; +static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np) +{ + struct ipv6_txoptions *opt; + + rcu_read_lock(); + opt = rcu_dereference(np->opt); + if (opt && !atomic_inc_not_zero(&opt->refcnt)) + opt = NULL; + rcu_read_unlock(); + return opt; +} + +static inline void txopt_put(struct ipv6_txoptions *opt) +{ + if (opt && atomic_dec_and_test(&opt->refcnt)) + kfree_rcu(opt, rcu); +} + extern struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label); extern struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space, struct ip6_flowlabel * fl, --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -252,7 +252,9 @@ static int dccp_v6_send_response(struct security_req_classify_flow(req, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); + rcu_read_unlock(); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) { @@ -269,7 +271,10 @@ static int dccp_v6_send_response(struct &ireq6->loc_addr, &ireq6->rmt_addr); ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr); - err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); + rcu_read_lock(); + err = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt), + np->tclass); + rcu_read_unlock(); err = net_xmit_eval(err); } @@ -463,6 +468,7 @@ static struct sock *dccp_v6_request_recv { struct inet6_request_sock *ireq6 = inet6_rsk(req); struct ipv6_pinfo *newnp, *np = inet6_sk(sk); + struct ipv6_txoptions *opt; struct inet_sock *newinet; struct dccp6_sock *newdp6; struct sock *newsk; @@ -586,13 +592,15 @@ static struct sock *dccp_v6_request_recv * Yes, keeping reference count would be much more clever, but we make * one more one thing there: reattach optmem to newsk. */ - if (np->opt != NULL) - newnp->opt = ipv6_dup_options(newsk, np->opt); - + opt = rcu_dereference(np->opt); + if (opt) { + opt = ipv6_dup_options(newsk, opt); + RCU_INIT_POINTER(newnp->opt, opt); + } inet_csk(newsk)->icsk_ext_hdr_len = 0; - if (newnp->opt != NULL) - inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + - newnp->opt->opt_flen); + if (opt) + inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + + opt->opt_flen; dccp_sync_mss(newsk, dst_mtu(dst)); @@ -844,6 +852,7 @@ static int dccp_v6_connect(struct sock * struct ipv6_pinfo *np = inet6_sk(sk); struct dccp_sock *dp = dccp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; + struct ipv6_txoptions *opt; struct flowi6 fl6; struct dst_entry *dst; int addr_type; @@ -946,7 +955,8 @@ static int dccp_v6_connect(struct sock * fl6.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + final_p = fl6_update_dst(&fl6, opt, &final); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); if (IS_ERR(dst)) { @@ -966,9 +976,8 @@ static int dccp_v6_connect(struct sock * __ip6_dst_store(sk, dst, NULL, NULL); icsk->icsk_ext_hdr_len = 0; - if (np->opt != NULL) - icsk->icsk_ext_hdr_len = (np->opt->opt_flen + - np->opt->opt_nflen); + if (opt) + icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen; inet->inet_dport = usin->sin6_port; --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -430,8 +430,11 @@ void inet6_destroy_sock(struct sock *sk) /* Free tx options */ - if ((opt = xchg(&np->opt, NULL)) != NULL) - sock_kfree_s(sk, opt, opt->tot_len); + opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } } EXPORT_SYMBOL_GPL(inet6_destroy_sock); @@ -669,7 +672,10 @@ int inet6_sk_rebuild_header(struct sock fl6.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), + &final); + rcu_read_unlock(); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) { --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -166,8 +166,10 @@ ipv4_connected: security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - opt = flowlabel ? flowlabel->opt : np->opt; + rcu_read_lock(); + opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt); final_p = fl6_update_dst(&fl6, opt, &final); + rcu_read_unlock(); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); err = 0; --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -748,6 +748,7 @@ ipv6_dup_options(struct sock *sk, struct *((char**)&opt2->dst1opt) += dif; if (opt2->srcrt) *((char**)&opt2->srcrt) += dif; + atomic_set(&opt2->refcnt, 1); } return opt2; } @@ -812,7 +813,7 @@ ipv6_renew_options(struct sock *sk, stru return ERR_PTR(-ENOBUFS); memset(opt2, 0, tot_len); - + atomic_set(&opt2->refcnt, 1); opt2->tot_len = tot_len; p = (char *)(opt2 + 1); --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -66,7 +66,9 @@ struct dst_entry *inet6_csk_route_req(st memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_TCP; ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr); - final_p = fl6_update_dst(&fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); + rcu_read_unlock(); ipv6_addr_copy(&fl6.saddr, &treq->loc_addr); fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; @@ -225,7 +227,9 @@ int inet6_csk_xmit(struct sk_buff *skb, fl6.fl6_dport = inet->inet_dport; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); + rcu_read_unlock(); dst = __inet6_csk_dst_check(sk, np->dst_cookie); @@ -248,7 +252,8 @@ int inet6_csk_xmit(struct sk_buff *skb, /* Restore final destination back after routing done */ ipv6_addr_copy(&fl6.daddr, &np->daddr); - res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); + res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt), + np->tclass); rcu_read_unlock(); return res; } --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -110,10 +110,12 @@ struct ipv6_txoptions *ipv6_update_optio icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen; icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); } - opt = xchg(&inet6_sk(sk)->opt, opt); + opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt, + opt); } else { spin_lock(&sk->sk_dst_lock); - opt = xchg(&inet6_sk(sk)->opt, opt); + opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt, + opt); spin_unlock(&sk->sk_dst_lock); } sk_dst_reset(sk); @@ -213,9 +215,12 @@ static int do_ipv6_setsockopt(struct soc sk->sk_socket->ops = &inet_dgram_ops; sk->sk_family = PF_INET; } - opt = xchg(&np->opt, NULL); - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + opt = xchg((__force struct ipv6_txoptions **)&np->opt, + NULL); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } pktopt = xchg(&np->pktoptions, NULL); kfree_skb(pktopt); @@ -384,7 +389,8 @@ static int do_ipv6_setsockopt(struct soc if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW)) break; - opt = ipv6_renew_options(sk, np->opt, optname, + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + opt = ipv6_renew_options(sk, opt, optname, (struct ipv6_opt_hdr __user *)optval, optlen); if (IS_ERR(opt)) { @@ -413,8 +419,10 @@ static int do_ipv6_setsockopt(struct soc retv = 0; opt = ipv6_update_options(sk, opt); sticky_done: - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } break; } @@ -467,6 +475,7 @@ sticky_done: break; memset(opt, 0, sizeof(*opt)); + atomic_set(&opt->refcnt, 1); opt->tot_len = sizeof(*opt) + optlen; retv = -EFAULT; if (copy_from_user(opt+1, optval, optlen)) @@ -483,8 +492,10 @@ update: retv = 0; opt = ipv6_update_options(sk, opt); done: - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } break; } case IPV6_UNICAST_HOPS: @@ -1053,10 +1064,11 @@ static int do_ipv6_getsockopt(struct soc case IPV6_RTHDR: case IPV6_DSTOPTS: { + struct ipv6_txoptions *opt; lock_sock(sk); - len = ipv6_getsockopt_sticky(sk, np->opt, - optname, optval, len); + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len); release_sock(sk); /* check if ipv6_getsockopt_sticky() returns err code */ if (len < 0) --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -724,6 +724,7 @@ static int rawv6_probe_proto_opt(struct static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { + struct ipv6_txoptions *opt_to_free = NULL; struct ipv6_txoptions opt_space; struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name; struct in6_addr *daddr, *final_p, final; @@ -830,8 +831,10 @@ static int rawv6_sendmsg(struct kiocb *i if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; } - if (opt == NULL) - opt = np->opt; + if (!opt) { + opt = txopt_get(np); + opt_to_free = opt; + } if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); @@ -896,6 +899,7 @@ done: dst_release(dst); out: fl6_sock_release(flowlabel); + txopt_put(opt_to_free); return err<0?err:len; do_confirm: dst_confirm(dst); --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -238,7 +238,7 @@ struct sock *cookie_v6_check(struct sock memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_TCP; ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr); - final_p = fl6_update_dst(&fl6, np->opt, &final); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr); fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -131,6 +131,7 @@ static int tcp_v6_connect(struct sock *s struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; + struct ipv6_txoptions *opt; struct rt6_info *rt; struct flowi6 fl6; struct dst_entry *dst; @@ -252,7 +253,8 @@ static int tcp_v6_connect(struct sock *s fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; - final_p = fl6_update_dst(&fl6, np->opt, &final); + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + final_p = fl6_update_dst(&fl6, opt, &final); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); @@ -295,9 +297,9 @@ static int tcp_v6_connect(struct sock *s } icsk->icsk_ext_hdr_len = 0; - if (np->opt) - icsk->icsk_ext_hdr_len = (np->opt->opt_flen + - np->opt->opt_nflen); + if (opt) + icsk->icsk_ext_hdr_len = opt->opt_flen + + opt->opt_nflen; tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); @@ -481,7 +483,6 @@ static int tcp_v6_send_synack(struct soc struct inet6_request_sock *treq = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff * skb; - struct ipv6_txoptions *opt = NULL; struct in6_addr * final_p, final; struct flowi6 fl6; struct dst_entry *dst; @@ -498,8 +499,7 @@ static int tcp_v6_send_synack(struct soc fl6.fl6_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); - opt = np->opt; - final_p = fl6_update_dst(&fl6, opt, &final); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) { @@ -513,13 +513,12 @@ static int tcp_v6_send_synack(struct soc __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr); - err = ip6_xmit(sk, skb, &fl6, opt, np->tclass); + err = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt), + np->tclass); err = net_xmit_eval(err); } done: - if (opt && opt != np->opt) - sock_kfree_s(sk, opt, opt->tot_len); dst_release(dst); return err; } @@ -1408,7 +1407,6 @@ static struct sock * tcp_v6_syn_recv_soc } treq = inet6_rsk(req); - opt = np->opt; if (sk_acceptq_is_full(sk)) goto out_overflow; @@ -1476,16 +1474,15 @@ static struct sock * tcp_v6_syn_recv_soc but we make one more one thing there: reattach optmem to newsk. */ + opt = rcu_dereference(np->opt); if (opt) { - newnp->opt = ipv6_dup_options(newsk, opt); - if (opt != np->opt) - sock_kfree_s(sk, opt, opt->tot_len); + opt = ipv6_dup_options(newsk, opt); + RCU_INIT_POINTER(newnp->opt, opt); } - - inet_csk(newsk)->icsk_ext_hdr_len = 0; - if (newnp->opt) - inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + - newnp->opt->opt_flen); + inet_csk(newsk)->icsk_ext_hdr_len = 0; + if (opt) + inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + + opt->opt_flen; tcp_mtup_init(newsk); tcp_sync_mss(newsk, dst_mtu(dst)); @@ -1530,8 +1527,6 @@ static struct sock * tcp_v6_syn_recv_soc out_overflow: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); out_nonewsk: - if (opt && opt != np->opt) - sock_kfree_s(sk, opt, opt->tot_len); dst_release(dst); out: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -954,6 +954,7 @@ int udpv6_sendmsg(struct kiocb *iocb, st struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name; struct in6_addr *daddr, *final_p, final; struct ipv6_txoptions *opt = NULL; + struct ipv6_txoptions *opt_to_free = NULL; struct ip6_flowlabel *flowlabel = NULL; struct flowi6 fl6; struct dst_entry *dst; @@ -1107,8 +1108,10 @@ do_udp_sendmsg: opt = NULL; connected = 0; } - if (opt == NULL) - opt = np->opt; + if (!opt) { + opt = txopt_get(np); + opt_to_free = opt; + } if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); @@ -1208,6 +1211,7 @@ do_append_data: out: dst_release(dst); fl6_sock_release(flowlabel); + txopt_put(opt_to_free); if (!err) return len; /* -- To unsubscribe from this list: send the line "unsubscribe stable" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html