Dump and reset doesn't work unless cmpxchg64() is used both from both packet and control plane paths. This approach is going to be slow though. Instead, use a percpu seqcount to fetch counters consistently, then subtract bytes and packets in case a reset was requested. This patch is based on original sketch from Eric Dumazet. Suggested-by: Eric Dumazet <eric.dumazet@xxxxxxxxx> Fixes: 43da04a593d8 ("netfilter: nf_tables: atomic dump and reset for stateful objects") Signed-off-by: Pablo Neira Ayuso <pablo@xxxxxxxxxxxxx> --- net/netfilter/nft_counter.c | 128 ++++++++++++++++++-------------------------- 1 file changed, 51 insertions(+), 77 deletions(-) diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index f6a02c5071c2..c37983d0a141 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -22,27 +22,29 @@ struct nft_counter { u64 packets; }; -struct nft_counter_percpu { - struct nft_counter counter; - struct u64_stats_sync syncp; -}; - struct nft_counter_percpu_priv { - struct nft_counter_percpu __percpu *counter; + struct nft_counter __percpu *counter; }; +static DEFINE_PER_CPU(seqcount_t, nft_counter_seq); + static inline void nft_counter_do_eval(struct nft_counter_percpu_priv *priv, struct nft_regs *regs, const struct nft_pktinfo *pkt) { - struct nft_counter_percpu *this_cpu; + struct nft_counter *this_cpu; + seqcount_t *myseq; local_bh_disable(); this_cpu = this_cpu_ptr(priv->counter); - u64_stats_update_begin(&this_cpu->syncp); - this_cpu->counter.bytes += pkt->skb->len; - this_cpu->counter.packets++; - u64_stats_update_end(&this_cpu->syncp); + myseq = this_cpu_ptr(&nft_counter_seq); + + write_seqcount_begin(myseq); + + this_cpu->bytes += pkt->skb->len; + this_cpu->packets++; + + write_seqcount_end(myseq); local_bh_enable(); } @@ -58,21 +60,21 @@ static inline void nft_counter_obj_eval(struct nft_object *obj, static int nft_counter_do_init(const struct nlattr * const tb[], struct nft_counter_percpu_priv *priv) { - struct nft_counter_percpu __percpu *cpu_stats; - struct nft_counter_percpu *this_cpu; + struct nft_counter __percpu *cpu_stats; + struct nft_counter *this_cpu; - cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu); + cpu_stats = alloc_percpu(struct nft_counter); if (cpu_stats == NULL) return -ENOMEM; preempt_disable(); this_cpu = this_cpu_ptr(cpu_stats); if (tb[NFTA_COUNTER_PACKETS]) { - this_cpu->counter.packets = + this_cpu->packets = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); } if (tb[NFTA_COUNTER_BYTES]) { - this_cpu->counter.bytes = + this_cpu->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); } preempt_enable(); @@ -100,74 +102,44 @@ static void nft_counter_obj_destroy(struct nft_object *obj) nft_counter_do_destroy(priv); } -static void nft_counter_fetch(struct nft_counter_percpu __percpu *counter, - struct nft_counter *total) +static void nft_counter_fetch(struct nft_counter_percpu_priv *priv, + struct nft_counter *total, bool reset) { - struct nft_counter_percpu *cpu_stats; + struct nft_counter *this_cpu; + const seqcount_t *myseq; u64 bytes, packets; unsigned int seq; int cpu; memset(total, 0, sizeof(*total)); for_each_possible_cpu(cpu) { - cpu_stats = per_cpu_ptr(counter, cpu); + myseq = per_cpu_ptr(&nft_counter_seq, cpu); + this_cpu = per_cpu_ptr(priv->counter, cpu); do { - seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp); - bytes = cpu_stats->counter.bytes; - packets = cpu_stats->counter.packets; - } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq)); - - total->packets += packets; - total->bytes += bytes; - } -} - -static u64 __nft_counter_reset(u64 *counter) -{ - u64 ret, old; - - do { - old = *counter; - ret = cmpxchg64(counter, old, 0); - } while (ret != old); - - return ret; -} - -static void nft_counter_reset(struct nft_counter_percpu __percpu *counter, - struct nft_counter *total) -{ - struct nft_counter_percpu *cpu_stats; - u64 bytes, packets; - unsigned int seq; - int cpu; - - memset(total, 0, sizeof(*total)); - for_each_possible_cpu(cpu) { - bytes = packets = 0; - - cpu_stats = per_cpu_ptr(counter, cpu); - do { - seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp); - packets += __nft_counter_reset(&cpu_stats->counter.packets); - bytes += __nft_counter_reset(&cpu_stats->counter.bytes); - } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq)); - - total->packets += packets; - total->bytes += bytes; + seq = read_seqcount_begin(myseq); + bytes = this_cpu->bytes; + packets = this_cpu->packets; + } while (read_seqcount_retry(myseq, seq)); + + total->bytes += bytes; + total->packets += packets; + + if (reset) { + local_bh_disable(); + this_cpu->packets -= packets; + this_cpu->bytes -= bytes; + local_bh_enable(); + } } } static int nft_counter_do_dump(struct sk_buff *skb, - const struct nft_counter_percpu_priv *priv, + struct nft_counter_percpu_priv *priv, bool reset) { struct nft_counter total; - if (reset) - nft_counter_reset(priv->counter, &total); - else - nft_counter_fetch(priv->counter, &total); + nft_counter_fetch(priv, &total, reset); if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes), NFTA_COUNTER_PAD) || @@ -216,7 +188,7 @@ static void nft_counter_eval(const struct nft_expr *expr, static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr) { - const struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); + struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); return nft_counter_do_dump(skb, priv, false); } @@ -242,21 +214,20 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src) { struct nft_counter_percpu_priv *priv = nft_expr_priv(src); struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst); - struct nft_counter_percpu __percpu *cpu_stats; - struct nft_counter_percpu *this_cpu; + struct nft_counter __percpu *cpu_stats; + struct nft_counter *this_cpu; struct nft_counter total; - nft_counter_fetch(priv->counter, &total); + nft_counter_fetch(priv, &total, false); - cpu_stats = __netdev_alloc_pcpu_stats(struct nft_counter_percpu, - GFP_ATOMIC); + cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC); if (cpu_stats == NULL) return -ENOMEM; preempt_disable(); this_cpu = this_cpu_ptr(cpu_stats); - this_cpu->counter.packets = total.packets; - this_cpu->counter.bytes = total.bytes; + this_cpu->packets = total.packets; + this_cpu->bytes = total.bytes; preempt_enable(); priv_clone->counter = cpu_stats; @@ -285,7 +256,10 @@ static struct nft_expr_type nft_counter_type __read_mostly = { static int __init nft_counter_module_init(void) { - int err; + int cpu, err; + + for_each_possible_cpu(cpu) + seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu)); err = nft_register_obj(&nft_counter_obj); if (err < 0) -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html