Re: [PATCH nf-next] netfilter: ctnetlink: don't dump ct extensions of unconfirmed conntracks

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2019-10-14, at 21:41:41 +0200, Florian Westphal wrote:
> When dumping the unconfirmed lists, the cpu that is processing the ct
> entry can realloc ct->ext at any time.
>
> Accessing extensions from another CPU is ok provided rcu read lock is
> held.
>
> Once extension space will be reallocated with plain krealloc this
> isn't used anymore.
>
> Dumping the extension area for confirmed or dying conntracks is fine:
> no reallocations are allowed and list iteration holds appropriate
> locks that prevent ct (and thus ct->ext) from getting free'd.
>
> Signed-off-by: Florian Westphal <fw@xxxxxxxxx>
> ---
>  net/netfilter/nf_conntrack_netlink.c | 77 ++++++++++++++++++----------
>  1 file changed, 51 insertions(+), 26 deletions(-)
>
> diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
> index e2d13cd18875..db04e1bfb04d 100644
> --- a/net/netfilter/nf_conntrack_netlink.c
> +++ b/net/netfilter/nf_conntrack_netlink.c
> @@ -506,9 +506,44 @@ static int ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct)
>  	return -1;
>  }
>
> +/* all these functions access ct->ext. Caller must either hold a reference
> + * on ct or prevent its deletion by holding either the bucket spinlock or
> + * pcpu dying list lock.
> + */
> +static int ctnetlink_dump_extinfo(struct sk_buff *skb,
> +				  const struct nf_conn *ct, u32 type)
> +{
> +	if (ctnetlink_dump_acct(skb, ct, type) < 0 ||
> +	    ctnetlink_dump_timestamp(skb, ct) < 0 ||
> +	    ctnetlink_dump_helpinfo(skb, ct) < 0 ||
> +	    ctnetlink_dump_labels(skb, ct) < 0 ||
> +	    ctnetlink_dump_ct_seq_adj(skb, ct) < 0 ||
> +	    ctnetlink_dump_ct_synproxy(skb, ct) < 0)
> +		return -1;
> +
> +	return 0;
> +}
> +
> +static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct)
> +{
> +	if (ctnetlink_dump_status(skb, ct) < 0 ||
> +	    ctnetlink_dump_mark(skb, ct) < 0 ||
> +	    ctnetlink_dump_secctx(skb, ct) < 0 ||
> +	    ctnetlink_dump_id(skb, ct) < 0 ||
> +	    ctnetlink_dump_use(skb, ct) < 0 ||
> +	    ctnetlink_dump_master(skb, ct) < 0)
> +		return -1;
> +
> +	if (!test_bit(IPS_OFFLOAD_BIT, &ct->status) &&
> +	    (ctnetlink_dump_timeout(skb, ct) < 0 ||
> +	     ctnetlink_dump_protoinfo(skb, ct) < 0))
> +
> +	return 0;
> +}
> +
>  static int
>  ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
> -		    struct nf_conn *ct)
> +		    struct nf_conn *ct, bool extinfo)
>  {
>  	const struct nf_conntrack_zone *zone;
>  	struct nlmsghdr *nlh;
> @@ -552,23 +587,9 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
>  				   NF_CT_DEFAULT_ZONE_DIR) < 0)
>  		goto nla_put_failure;
>
> -	if (ctnetlink_dump_status(skb, ct) < 0 ||
> -	    ctnetlink_dump_acct(skb, ct, type) < 0 ||
> -	    ctnetlink_dump_timestamp(skb, ct) < 0 ||
> -	    ctnetlink_dump_helpinfo(skb, ct) < 0 ||
> -	    ctnetlink_dump_mark(skb, ct) < 0 ||
> -	    ctnetlink_dump_secctx(skb, ct) < 0 ||
> -	    ctnetlink_dump_labels(skb, ct) < 0 ||
> -	    ctnetlink_dump_id(skb, ct) < 0 ||
> -	    ctnetlink_dump_use(skb, ct) < 0 ||
> -	    ctnetlink_dump_master(skb, ct) < 0 ||
> -	    ctnetlink_dump_ct_seq_adj(skb, ct) < 0 ||
> -	    ctnetlink_dump_ct_synproxy(skb, ct) < 0)
> +	if (ctnetlink_dump_info(skb, ct) < 0)
>  		goto nla_put_failure;
> -
> -	if (!test_bit(IPS_OFFLOAD_BIT, &ct->status) &&
> -	    (ctnetlink_dump_timeout(skb, ct) < 0 ||
> -	     ctnetlink_dump_protoinfo(skb, ct) < 0))
> +	if (extinfo && ctnetlink_dump_extinfo(skb, ct, type) < 0)
>  		goto nla_put_failure;
>
>  	nlmsg_end(skb, nlh);
> @@ -953,13 +974,11 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
>  			if (!ctnetlink_filter_match(ct, cb->data))
>  				continue;
>
> -			rcu_read_lock();
>  			res =
>  			ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
>  					    cb->nlh->nlmsg_seq,
>  					    NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
> -					    ct);
> -			rcu_read_unlock();
> +					    ct, true);
>  			if (res < 0) {
>  				nf_conntrack_get(&ct->ct_general);
>  				cb->args[1] = (unsigned long)ct;
> @@ -1364,10 +1383,8 @@ static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
>  		return -ENOMEM;
>  	}
>
> -	rcu_read_lock();
>  	err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
> -				  NFNL_MSG_TYPE(nlh->nlmsg_type), ct);
> -	rcu_read_unlock();
> +				  NFNL_MSG_TYPE(nlh->nlmsg_type), ct, true);
>  	nf_ct_put(ct);
>  	if (err <= 0)
>  		goto free;
> @@ -1429,12 +1446,20 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying
>  					continue;
>  				cb->args[1] = 0;
>  			}
> -			rcu_read_lock();
> +
> +			/* We can't dump extension info for the unconfirmed
> +			 * list because unconfirmed conntracks can have ct->ext
> +			 * reallocated (and thus freed).
> +			 *
> +			 * In the dying list case ct->ext can't be altered during
> +			 * list walk anymore, and free can only occur after ct
> +			 * has been unlinked from the dying list (which can't
> +			 * happen until after we drop pcpu->lock).
> +			 */
>  			res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
>  						  cb->nlh->nlmsg_seq,
>  						  NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
> -						  ct);
> -			rcu_read_unlock();
> +						  ct, dying ? true : false);

s/dying ? true : false/dying/

>  			if (res < 0) {
>  				if (!atomic_inc_not_zero(&ct->ct_general.use))
>  					continue;
> --
> 2.21.0
>
>

J.

Attachment: signature.asc
Description: PGP signature


[Index of Archives]     [Netfitler Users]     [Berkeley Packet Filter]     [LARTC]     [Bugtraq]     [Yosemite Forum]

  Powered by Linux