4.2.8-ckt2 -stable review patch. If anyone has any objections, please let me know. ---8<------------------------------------------------------------ From: Eric Dumazet <edumazet@xxxxxxxxxx> commit d188ba86dd07a72ebebfa22fe9cb0b0572e57740 upstream. XFRM can deal with SYNACK messages, sent while listener socket is not locked. We add proper rcu protection to __xfrm_sk_clone_policy() and xfrm_sk_policy_lookup() This might serve as the first step to remove xfrm.xfrm_policy_lock use in fast path. Fixes: fa76ce7328b2 ("inet: get rid of central tcp/dccp listener timer") Signed-off-by: Eric Dumazet <edumazet@xxxxxxxxxx> Acked-by: Steffen Klassert <steffen.klassert@xxxxxxxxxxx> Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx> Signed-off-by: Kamal Mostafa <kamal@xxxxxxxxxxxxx> --- include/net/sock.h | 2 +- include/net/xfrm.h | 24 +++++++++++++++--------- net/core/sock.c | 2 +- net/xfrm/xfrm_policy.c | 37 +++++++++++++++++++++++++------------ 4 files changed, 42 insertions(+), 23 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 208c874..8de0690 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -371,7 +371,7 @@ struct sock { struct socket_wq __rcu *sk_wq; #ifdef CONFIG_XFRM - struct xfrm_policy *sk_policy[2]; + struct xfrm_policy __rcu *sk_policy[2]; #endif unsigned long sk_flags; struct dst_entry *sk_rx_dst; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f0ee97e..02a1d20 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1140,12 +1140,14 @@ static inline int xfrm6_route_forward(struct sk_buff *skb) return xfrm_route_forward(skb, AF_INET6); } -int __xfrm_sk_clone_policy(struct sock *sk); +int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk); -static inline int xfrm_sk_clone_policy(struct sock *sk) +static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { - if (unlikely(sk->sk_policy[0] || sk->sk_policy[1])) - return __xfrm_sk_clone_policy(sk); + sk->sk_policy[0] = NULL; + sk->sk_policy[1] = NULL; + if (unlikely(osk->sk_policy[0] || osk->sk_policy[1])) + return __xfrm_sk_clone_policy(sk, osk); return 0; } @@ -1153,12 +1155,16 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir); static inline void xfrm_sk_free_policy(struct sock *sk) { - if (unlikely(sk->sk_policy[0] != NULL)) { - xfrm_policy_delete(sk->sk_policy[0], XFRM_POLICY_MAX); + struct xfrm_policy *pol; + + pol = rcu_dereference_protected(sk->sk_policy[0], 1); + if (unlikely(pol != NULL)) { + xfrm_policy_delete(pol, XFRM_POLICY_MAX); sk->sk_policy[0] = NULL; } - if (unlikely(sk->sk_policy[1] != NULL)) { - xfrm_policy_delete(sk->sk_policy[1], XFRM_POLICY_MAX+1); + pol = rcu_dereference_protected(sk->sk_policy[1], 1); + if (unlikely(pol != NULL)) { + xfrm_policy_delete(pol, XFRM_POLICY_MAX+1); sk->sk_policy[1] = NULL; } } @@ -1168,7 +1174,7 @@ void xfrm_garbage_collect(struct net *net); #else static inline void xfrm_sk_free_policy(struct sock *sk) {} -static inline int xfrm_sk_clone_policy(struct sock *sk) { return 0; } +static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { return 0; } static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; } static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; } static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb) diff --git a/net/core/sock.c b/net/core/sock.c index 623224a..3148f24 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1536,7 +1536,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) */ is_charged = sk_filter_charge(newsk, filter); - if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk))) { + if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { /* It is still raw copy of parent, so invalidate * destructor and make plain sk_free() */ newsk->sk_destruct = NULL; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 18cead7..2718184 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1212,8 +1212,10 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct xfrm_policy *pol; struct net *net = sock_net(sk); + rcu_read_lock(); read_lock_bh(&net->xfrm.xfrm_policy_lock); - if ((pol = sk->sk_policy[dir]) != NULL) { + pol = rcu_dereference(sk->sk_policy[dir]); + if (pol != NULL) { bool match = xfrm_selector_match(&pol->selector, fl, sk->sk_family); int err = 0; @@ -1237,6 +1239,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, } out: read_unlock_bh(&net->xfrm.xfrm_policy_lock); + rcu_read_unlock(); return pol; } @@ -1305,13 +1308,14 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) #endif write_lock_bh(&net->xfrm.xfrm_policy_lock); - old_pol = sk->sk_policy[dir]; - sk->sk_policy[dir] = pol; + old_pol = rcu_dereference_protected(sk->sk_policy[dir], + lockdep_is_held(&net->xfrm.xfrm_policy_lock)); if (pol) { pol->curlft.add_time = get_seconds(); pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0); xfrm_sk_policy_link(pol, dir); } + rcu_assign_pointer(sk->sk_policy[dir], pol); if (old_pol) { if (pol) xfrm_policy_requeue(old_pol, pol); @@ -1359,17 +1363,26 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) return newp; } -int __xfrm_sk_clone_policy(struct sock *sk) +int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { - struct xfrm_policy *p0 = sk->sk_policy[0], - *p1 = sk->sk_policy[1]; + const struct xfrm_policy *p; + struct xfrm_policy *np; + int i, ret = 0; - sk->sk_policy[0] = sk->sk_policy[1] = NULL; - if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL) - return -ENOMEM; - if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL) - return -ENOMEM; - return 0; + rcu_read_lock(); + for (i = 0; i < 2; i++) { + p = rcu_dereference(osk->sk_policy[i]); + if (p) { + np = clone_policy(p, i); + if (unlikely(!np)) { + ret = -ENOMEM; + break; + } + rcu_assign_pointer(sk->sk_policy[i], np); + } + } + rcu_read_unlock(); + return ret; } static int -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe stable" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html