Set expressions' dump callbacks are not concurrency-safe per-se with reset bit set. If two CPUs reset the same element at the same time, values may underrun at least with element-attached counters and quotas. Prevent this by introducing dedicated callbacks for nfnetlink and the asynchronous dump handling to serialize access. Signed-off-by: Phil Sutter <phil@xxxxxx> --- Changes since v2: - Move the audit_log_nft_set_reset() call into the critical section to protect the table pointer dereference. - Drop unused nelems variable from (non-reset) nf_tables_getsetelem(). --- net/netfilter/nf_tables_api.c | 109 +++++++++++++++++++++++++++++----- 1 file changed, 94 insertions(+), 15 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 245a2c5be082..fbf18a3b0915 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5816,10 +5816,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) nla_nest_end(skb, nest); nlmsg_end(skb, nlh); - if (dump_ctx->reset && args.iter.count > args.iter.skip) - audit_log_nft_set_reset(table, cb->seq, - args.iter.count - args.iter.skip); - rcu_read_unlock(); if (args.iter.err && args.iter.err != -EMSGSIZE) @@ -5835,6 +5831,26 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) return -ENOSPC; } +static int nf_tables_dumpreset_set(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk)); + struct nft_set_dump_ctx *dump_ctx = cb->data; + int ret, skip = cb->args[0]; + + mutex_lock(&nft_net->commit_mutex); + + ret = nf_tables_dump_set(skb, cb); + + if (cb->args[0] > skip) + audit_log_nft_set_reset(dump_ctx->ctx.table, cb->seq, + cb->args[0] - skip); + + mutex_unlock(&nft_net->commit_mutex); + + return ret; +} + static int nf_tables_dump_set_start(struct netlink_callback *cb) { struct nft_set_dump_ctx *dump_ctx = cb->data; @@ -6046,13 +6062,12 @@ static int nf_tables_getsetelem(struct sk_buff *skb, struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); u8 family = info->nfmsg->nfgen_family; - int rem, err = 0, nelems = 0; struct net *net = info->net; struct nft_table *table; struct nft_set *set; struct nlattr *attr; struct nft_ctx ctx; - bool reset = false; + int rem, err = 0; table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family, genmask, 0); @@ -6069,9 +6084,6 @@ static int nf_tables_getsetelem(struct sk_buff *skb, nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); - if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET) - reset = true; - if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .start = nf_tables_dump_set_start, @@ -6082,7 +6094,7 @@ static int nf_tables_getsetelem(struct sk_buff *skb, struct nft_set_dump_ctx dump_ctx = { .set = set, .ctx = ctx, - .reset = reset, + .reset = false, }; c.data = &dump_ctx; @@ -6093,17 +6105,84 @@ static int nf_tables_getsetelem(struct sk_buff *skb, return -EINVAL; nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { - err = nft_get_set_elem(&ctx, set, attr, reset); + err = nft_get_set_elem(&ctx, set, attr, false); + if (err < 0) { + NL_SET_BAD_ATTR(extack, attr); + break; + } + } + + return err; +} + +static int nf_tables_getsetelem_reset(struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const nla[]) +{ + struct nftables_pernet *nft_net = nft_pernet(info->net); + struct netlink_ext_ack *extack = info->extack; + u8 genmask = nft_genmask_cur(info->net); + u8 family = info->nfmsg->nfgen_family; + int rem, err = 0, nelems = 0; + struct net *net = info->net; + struct nft_table *table; + struct nft_set *set; + struct nlattr *attr; + struct nft_ctx ctx; + + table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family, + genmask, 0); + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]); + return PTR_ERR(table); + } + + set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask); + if (IS_ERR(set)) + return PTR_ERR(set); + + nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); + + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .start = nf_tables_dump_set_start, + .dump = nf_tables_dumpreset_set, + .done = nf_tables_dump_set_done, + .module = THIS_MODULE, + }; + struct nft_set_dump_ctx dump_ctx = { + .set = set, + .ctx = ctx, + .reset = true, + }; + + c.data = &dump_ctx; + return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); + } + + if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS]) + return -EINVAL; + + if (!try_module_get(THIS_MODULE)) + return -EINVAL; + rcu_read_unlock(); + mutex_lock(&nft_net->commit_mutex); + rcu_read_lock(); + + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { + err = nft_get_set_elem(&ctx, set, attr, true); if (err < 0) { NL_SET_BAD_ATTR(extack, attr); break; } nelems++; } + audit_log_nft_set_reset(table, nft_net->base_seq, nelems); - if (reset) - audit_log_nft_set_reset(table, nft_pernet(net)->base_seq, - nelems); + rcu_read_unlock(); + mutex_unlock(&nft_net->commit_mutex); + rcu_read_lock(); + module_put(THIS_MODULE); return err; } @@ -9128,7 +9207,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_set_elem_list_policy, }, [NFT_MSG_GETSETELEM_RESET] = { - .call = nf_tables_getsetelem, + .call = nf_tables_getsetelem_reset, .type = NFNL_CB_RCU, .attr_count = NFTA_SET_ELEM_LIST_MAX, .policy = nft_set_elem_list_policy, -- 2.41.0