Using u64_stats for statistics has the advantage that the seqcount_t synchronisation can be reduced to 32bit architectures only. On 64bit architectures the update can happen lockless given there is only one writter. The critical section (xt_write_recseq_begin() - xt_write_recseq_end()) can be reduced to just the update of the value since the reader and its xt_table_info::private access is RCU protected. Use u64_stats_t in xt_counters_k for statistics. Add xt_counter_add() as a helper to update counters within the critical section. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> --- include/linux/netfilter/x_tables.h | 71 ++++++------------------------ net/ipv4/netfilter/arp_tables.c | 23 ++++------ net/ipv4/netfilter/ip_tables.c | 23 ++++------ net/ipv6/netfilter/ip6_tables.c | 23 ++++------ net/netfilter/x_tables.c | 6 +-- 5 files changed, 43 insertions(+), 103 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index fc52a2ba90f6b..df429d0198c92 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -269,10 +269,21 @@ struct xt_table_info { struct xt_counters_k { /* Packet and byte counter */ - __u64 pcnt; - __u64 bcnt; + u64_stats_t pcnt; + u64_stats_t bcnt; }; +DECLARE_PER_CPU(struct u64_stats_sync, xt_syncp); + +static inline void xt_counter_add(struct xt_counters_k *xt_counter, + unsigned int bytes, unsigned int packets) +{ + u64_stats_update_begin(this_cpu_ptr(&xt_syncp)); + u64_stats_add(&xt_counter->pcnt, packets); + u64_stats_add(&xt_counter->bcnt, bytes); + u64_stats_update_end(this_cpu_ptr(&xt_syncp)); +} + union xt_counter_k { struct xt_counters_k __percpu *pcpu; struct xt_counters_k local; @@ -346,16 +357,6 @@ void xt_proto_fini(struct net *net, u_int8_t af); struct xt_table_info *xt_alloc_table_info(unsigned int size); void xt_free_table_info(struct xt_table_info *info); -/** - * xt_recseq - recursive seqcount for netfilter use - * - * Packet processing changes the seqcount only if no recursion happened - * get_counters() can use read_seqcount_begin()/read_seqcount_retry(), - * because we use the normal seqcount convention : - * Low order bit set to 1 if a writer is active. - */ -DECLARE_PER_CPU(seqcount_t, xt_recseq); - bool xt_af_lock_held(u_int8_t af); static inline struct xt_table_info *nf_table_private(const struct xt_table *table) { @@ -368,52 +369,6 @@ static inline struct xt_table_info *nf_table_private(const struct xt_table *tabl */ extern struct static_key xt_tee_enabled; -/** - * xt_write_recseq_begin - start of a write section - * - * Begin packet processing : all readers must wait the end - * 1) Must be called with preemption disabled - * 2) softirqs must be disabled too (or we should use this_cpu_add()) - * Returns: - * 1 if no recursion on this cpu - * 0 if recursion detected - */ -static inline unsigned int xt_write_recseq_begin(void) -{ - unsigned int addend; - - /* - * Low order bit of sequence is set if we already - * called xt_write_recseq_begin(). - */ - addend = (__this_cpu_read(xt_recseq.sequence) + 1) & 1; - - /* - * This is kind of a write_seqcount_begin(), but addend is 0 or 1 - * We dont check addend value to avoid a test and conditional jump, - * since addend is most likely 1 - */ - __this_cpu_add(xt_recseq.sequence, addend); - smp_mb(); - - return addend; -} - -/** - * xt_write_recseq_end - end of a write section - * @addend: return value from previous xt_write_recseq_begin() - * - * End packet processing : all readers can proceed - * 1) Must be called with preemption disabled - * 2) softirqs must be disabled too (or we should use this_cpu_add()) - */ -static inline void xt_write_recseq_end(unsigned int addend) -{ - /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */ - smp_wmb(); - __this_cpu_add(xt_recseq.sequence, addend); -} - /* * This helper is performance critical and must be inlined */ diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index ce3d73155ca9b..6b957de58d2cf 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -194,7 +194,6 @@ unsigned int arpt_do_table(void *priv, unsigned int cpu, stackidx = 0; const struct xt_table_info *private; struct xt_action_param acpar; - unsigned int addend; if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) return NF_DROP; @@ -204,7 +203,6 @@ unsigned int arpt_do_table(void *priv, local_bh_disable(); rcu_read_lock(); - addend = xt_write_recseq_begin(); private = rcu_dereference(table->priv_info); cpu = smp_processor_id(); table_base = private->entries; @@ -229,7 +227,7 @@ unsigned int arpt_do_table(void *priv, } counter = xt_get_this_cpu_counter(&e->counter_pad); - ADD_COUNTER(*counter, arp_hdr_len(skb->dev), 1); + xt_counter_add(counter, arp_hdr_len(skb->dev), 1); t = arpt_get_target_c(e); @@ -279,7 +277,6 @@ unsigned int arpt_do_table(void *priv, break; } } while (!acpar.hotdrop); - xt_write_recseq_end(addend); rcu_read_unlock(); local_bh_enable(); @@ -607,7 +604,7 @@ static void get_counters(const struct xt_table_info *t, unsigned int i; for_each_possible_cpu(cpu) { - seqcount_t *s = &per_cpu(xt_recseq, cpu); + struct u64_stats_sync *syncp = &per_cpu(xt_syncp, cpu); i = 0; xt_entry_foreach(iter, t->entries, t->size) { @@ -617,10 +614,10 @@ static void get_counters(const struct xt_table_info *t, tmp = xt_get_per_cpu_counter(&iter->counter_pad, cpu); do { - start = read_seqcount_begin(s); - bcnt = tmp->bcnt; - pcnt = tmp->pcnt; - } while (read_seqcount_retry(s, start)); + start = u64_stats_fetch_begin(syncp); + bcnt = u64_stats_read(&tmp->bcnt); + pcnt = u64_stats_read(&tmp->pcnt); + } while (u64_stats_fetch_retry(syncp, start)); ADD_COUNTER(counters[i], bcnt, pcnt); ++i; @@ -641,7 +638,8 @@ static void get_old_counters(const struct xt_table_info *t, struct xt_counters_k *tmp; tmp = xt_get_per_cpu_counter(&iter->counter_pad, cpu); - ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt); + ADD_COUNTER(counters[i], u64_stats_read(&tmp->bcnt), + u64_stats_read(&tmp->pcnt)); ++i; } cond_resched(); @@ -1011,7 +1009,6 @@ static int do_add_counters(struct net *net, sockptr_t arg, unsigned int len) const struct xt_table_info *private; int ret = 0; struct arpt_entry *iter; - unsigned int addend; paddc = xt_copy_counters(arg, len, &tmp); if (IS_ERR(paddc)) @@ -1033,15 +1030,13 @@ static int do_add_counters(struct net *net, sockptr_t arg, unsigned int len) i = 0; - addend = xt_write_recseq_begin(); xt_entry_foreach(iter, private->entries, private->size) { struct xt_counters_k *tmp; tmp = xt_get_this_cpu_counter(&iter->counter_pad); - ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt); + xt_counter_add(tmp, paddc[i].bcnt, paddc[i].pcnt); ++i; } - xt_write_recseq_end(addend); unlock_up_free: rcu_read_unlock(); local_bh_enable(); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 95f917f5bceef..c5c90e2f724ba 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -236,7 +236,6 @@ ipt_do_table(void *priv, unsigned int stackidx, cpu; const struct xt_table_info *private; struct xt_action_param acpar; - unsigned int addend; /* Initialization */ stackidx = 0; @@ -258,7 +257,6 @@ ipt_do_table(void *priv, local_bh_disable(); rcu_read_lock(); private = rcu_dereference(table->priv_info); - addend = xt_write_recseq_begin(); cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; @@ -296,7 +294,7 @@ ipt_do_table(void *priv, } counter = xt_get_this_cpu_counter(&e->counter_pad); - ADD_COUNTER(*counter, skb->len, 1); + xt_counter_add(counter, skb->len, 1); t = ipt_get_target_c(e); WARN_ON(!t->u.kernel.target); @@ -354,7 +352,6 @@ ipt_do_table(void *priv, } } while (!acpar.hotdrop); - xt_write_recseq_end(addend); rcu_read_unlock(); local_bh_enable(); @@ -746,7 +743,7 @@ get_counters(const struct xt_table_info *t, unsigned int i; for_each_possible_cpu(cpu) { - seqcount_t *s = &per_cpu(xt_recseq, cpu); + struct u64_stats_sync *syncp = &per_cpu(xt_syncp, cpu); i = 0; xt_entry_foreach(iter, t->entries, t->size) { @@ -756,10 +753,10 @@ get_counters(const struct xt_table_info *t, tmp = xt_get_per_cpu_counter(&iter->counter_pad, cpu); do { - start = read_seqcount_begin(s); - bcnt = tmp->bcnt; - pcnt = tmp->pcnt; - } while (read_seqcount_retry(s, start)); + start = u64_stats_fetch_begin(syncp); + bcnt = u64_stats_read(&tmp->bcnt); + pcnt = u64_stats_read(&tmp->pcnt); + } while (u64_stats_fetch_retry(syncp, start)); ADD_COUNTER(counters[i], bcnt, pcnt); ++i; /* macro does multi eval of i */ @@ -780,7 +777,8 @@ static void get_old_counters(const struct xt_table_info *t, const struct xt_counters_k *tmp; tmp = xt_get_per_cpu_counter(&iter->counter_pad, cpu); - ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt); + ADD_COUNTER(counters[i], u64_stats_read(&tmp->bcnt), + u64_stats_read(&tmp->pcnt)); ++i; /* macro does multi eval of i */ } @@ -1164,7 +1162,6 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len) const struct xt_table_info *private; int ret = 0; struct ipt_entry *iter; - unsigned int addend; paddc = xt_copy_counters(arg, len, &tmp); if (IS_ERR(paddc)) @@ -1185,15 +1182,13 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len) } i = 0; - addend = xt_write_recseq_begin(); xt_entry_foreach(iter, private->entries, private->size) { struct xt_counters_k *tmp; tmp = xt_get_this_cpu_counter(&iter->counter_pad); - ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt); + xt_counter_add(tmp, paddc[i].bcnt, paddc[i].pcnt); ++i; } - xt_write_recseq_end(addend); unlock_up_free: rcu_read_unlock(); local_bh_enable(); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index f4877b1b2463e..5a6a592cd53dc 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -259,7 +259,6 @@ ip6t_do_table(void *priv, struct sk_buff *skb, unsigned int stackidx, cpu; const struct xt_table_info *private; struct xt_action_param acpar; - unsigned int addend; /* Initialization */ stackidx = 0; @@ -280,7 +279,6 @@ ip6t_do_table(void *priv, struct sk_buff *skb, local_bh_disable(); rcu_read_lock(); private = rcu_dereference(table->priv_info); - addend = xt_write_recseq_begin(); cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; @@ -319,7 +317,7 @@ ip6t_do_table(void *priv, struct sk_buff *skb, } counter = xt_get_this_cpu_counter(&e->counter_pad); - ADD_COUNTER(*counter, skb->len, 1); + xt_counter_add(counter, skb->len, 1); t = ip6t_get_target_c(e); WARN_ON(!t->u.kernel.target); @@ -372,7 +370,6 @@ ip6t_do_table(void *priv, struct sk_buff *skb, break; } while (!acpar.hotdrop); - xt_write_recseq_end(addend); rcu_read_unlock(); local_bh_enable(); @@ -763,7 +760,7 @@ get_counters(const struct xt_table_info *t, unsigned int i; for_each_possible_cpu(cpu) { - seqcount_t *s = &per_cpu(xt_recseq, cpu); + struct u64_stats_sync *syncp = &per_cpu(xt_syncp, cpu); i = 0; xt_entry_foreach(iter, t->entries, t->size) { @@ -773,10 +770,10 @@ get_counters(const struct xt_table_info *t, tmp = xt_get_per_cpu_counter(&iter->counter_pad, cpu); do { - start = read_seqcount_begin(s); - bcnt = tmp->bcnt; - pcnt = tmp->pcnt; - } while (read_seqcount_retry(s, start)); + start = u64_stats_fetch_begin(syncp); + bcnt = u64_stats_read(&tmp->bcnt); + pcnt = u64_stats_read(&tmp->pcnt); + } while (u64_stats_fetch_retry(syncp, start)); ADD_COUNTER(counters[i], bcnt, pcnt); ++i; @@ -797,7 +794,8 @@ static void get_old_counters(const struct xt_table_info *t, const struct xt_counters_k *tmp; tmp = xt_get_per_cpu_counter(&iter->counter_pad, cpu); - ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt); + ADD_COUNTER(counters[i], u64_stats_read(&tmp->bcnt), + u64_stats_read(&tmp->pcnt)); ++i; } cond_resched(); @@ -1181,7 +1179,6 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len) const struct xt_table_info *private; int ret = 0; struct ip6t_entry *iter; - unsigned int addend; paddc = xt_copy_counters(arg, len, &tmp); if (IS_ERR(paddc)) @@ -1201,15 +1198,13 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len) } i = 0; - addend = xt_write_recseq_begin(); xt_entry_foreach(iter, private->entries, private->size) { struct xt_counters_k *tmp; tmp = xt_get_this_cpu_counter(&iter->counter_pad); - ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt); + xt_counter_add(tmp, paddc[i].bcnt, paddc[i].pcnt); ++i; } - xt_write_recseq_end(addend); unlock_up_free: rcu_read_unlock(); local_bh_enable(); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index e3f48539f81d6..d9a31ee920fc0 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1330,8 +1330,8 @@ void xt_compat_unlock(u_int8_t af) EXPORT_SYMBOL_GPL(xt_compat_unlock); #endif -DEFINE_PER_CPU(seqcount_t, xt_recseq); -EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq); +DEFINE_PER_CPU(struct u64_stats_sync, xt_syncp); +EXPORT_PER_CPU_SYMBOL_GPL(xt_syncp); struct static_key xt_tee_enabled __read_mostly; EXPORT_SYMBOL_GPL(xt_tee_enabled); @@ -1977,7 +1977,7 @@ static int __init xt_init(void) int rv; for_each_possible_cpu(i) { - seqcount_init(&per_cpu(xt_recseq, i)); + u64_stats_init(&per_cpu(xt_syncp, i)); } xt = kcalloc(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL); -- 2.47.2