On 2019-10-14, at 21:41:41 +0200, Florian Westphal wrote: > When dumping the unconfirmed lists, the cpu that is processing the ct > entry can realloc ct->ext at any time. > > Accessing extensions from another CPU is ok provided rcu read lock is > held. > > Once extension space will be reallocated with plain krealloc this > isn't used anymore. > > Dumping the extension area for confirmed or dying conntracks is fine: > no reallocations are allowed and list iteration holds appropriate > locks that prevent ct (and thus ct->ext) from getting free'd. > > Signed-off-by: Florian Westphal <fw@xxxxxxxxx> > --- > net/netfilter/nf_conntrack_netlink.c | 77 ++++++++++++++++++---------- > 1 file changed, 51 insertions(+), 26 deletions(-) > > diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c > index e2d13cd18875..db04e1bfb04d 100644 > --- a/net/netfilter/nf_conntrack_netlink.c > +++ b/net/netfilter/nf_conntrack_netlink.c > @@ -506,9 +506,44 @@ static int ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct) > return -1; > } > > +/* all these functions access ct->ext. Caller must either hold a reference > + * on ct or prevent its deletion by holding either the bucket spinlock or > + * pcpu dying list lock. > + */ > +static int ctnetlink_dump_extinfo(struct sk_buff *skb, > + const struct nf_conn *ct, u32 type) > +{ > + if (ctnetlink_dump_acct(skb, ct, type) < 0 || > + ctnetlink_dump_timestamp(skb, ct) < 0 || > + ctnetlink_dump_helpinfo(skb, ct) < 0 || > + ctnetlink_dump_labels(skb, ct) < 0 || > + ctnetlink_dump_ct_seq_adj(skb, ct) < 0 || > + ctnetlink_dump_ct_synproxy(skb, ct) < 0) > + return -1; > + > + return 0; > +} > + > +static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct) > +{ > + if (ctnetlink_dump_status(skb, ct) < 0 || > + ctnetlink_dump_mark(skb, ct) < 0 || > + ctnetlink_dump_secctx(skb, ct) < 0 || > + ctnetlink_dump_id(skb, ct) < 0 || > + ctnetlink_dump_use(skb, ct) < 0 || > + ctnetlink_dump_master(skb, ct) < 0) > + return -1; > + > + if (!test_bit(IPS_OFFLOAD_BIT, &ct->status) && > + (ctnetlink_dump_timeout(skb, ct) < 0 || > + ctnetlink_dump_protoinfo(skb, ct) < 0)) > + > + return 0; > +} > + > static int > ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, > - struct nf_conn *ct) > + struct nf_conn *ct, bool extinfo) > { > const struct nf_conntrack_zone *zone; > struct nlmsghdr *nlh; > @@ -552,23 +587,9 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, > NF_CT_DEFAULT_ZONE_DIR) < 0) > goto nla_put_failure; > > - if (ctnetlink_dump_status(skb, ct) < 0 || > - ctnetlink_dump_acct(skb, ct, type) < 0 || > - ctnetlink_dump_timestamp(skb, ct) < 0 || > - ctnetlink_dump_helpinfo(skb, ct) < 0 || > - ctnetlink_dump_mark(skb, ct) < 0 || > - ctnetlink_dump_secctx(skb, ct) < 0 || > - ctnetlink_dump_labels(skb, ct) < 0 || > - ctnetlink_dump_id(skb, ct) < 0 || > - ctnetlink_dump_use(skb, ct) < 0 || > - ctnetlink_dump_master(skb, ct) < 0 || > - ctnetlink_dump_ct_seq_adj(skb, ct) < 0 || > - ctnetlink_dump_ct_synproxy(skb, ct) < 0) > + if (ctnetlink_dump_info(skb, ct) < 0) > goto nla_put_failure; > - > - if (!test_bit(IPS_OFFLOAD_BIT, &ct->status) && > - (ctnetlink_dump_timeout(skb, ct) < 0 || > - ctnetlink_dump_protoinfo(skb, ct) < 0)) > + if (extinfo && ctnetlink_dump_extinfo(skb, ct, type) < 0) > goto nla_put_failure; > > nlmsg_end(skb, nlh); > @@ -953,13 +974,11 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) > if (!ctnetlink_filter_match(ct, cb->data)) > continue; > > - rcu_read_lock(); > res = > ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, > cb->nlh->nlmsg_seq, > NFNL_MSG_TYPE(cb->nlh->nlmsg_type), > - ct); > - rcu_read_unlock(); > + ct, true); > if (res < 0) { > nf_conntrack_get(&ct->ct_general); > cb->args[1] = (unsigned long)ct; > @@ -1364,10 +1383,8 @@ static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl, > return -ENOMEM; > } > > - rcu_read_lock(); > err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, > - NFNL_MSG_TYPE(nlh->nlmsg_type), ct); > - rcu_read_unlock(); > + NFNL_MSG_TYPE(nlh->nlmsg_type), ct, true); > nf_ct_put(ct); > if (err <= 0) > goto free; > @@ -1429,12 +1446,20 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying > continue; > cb->args[1] = 0; > } > - rcu_read_lock(); > + > + /* We can't dump extension info for the unconfirmed > + * list because unconfirmed conntracks can have ct->ext > + * reallocated (and thus freed). > + * > + * In the dying list case ct->ext can't be altered during > + * list walk anymore, and free can only occur after ct > + * has been unlinked from the dying list (which can't > + * happen until after we drop pcpu->lock). > + */ > res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, > cb->nlh->nlmsg_seq, > NFNL_MSG_TYPE(cb->nlh->nlmsg_type), > - ct); > - rcu_read_unlock(); > + ct, dying ? true : false); s/dying ? true : false/dying/ > if (res < 0) { > if (!atomic_inc_not_zero(&ct->ct_general.use)) > continue; > -- > 2.21.0 > > J.
Attachment:
signature.asc
Description: PGP signature