On Tue, Jan 17, 2023 at 07:33:31PM +0200, Vlad Buslov wrote: > > On Tue 17 Jan 2023 at 12:28, Marcelo Ricardo Leitner <marcelo.leitner@xxxxxxxxx> wrote: > > On Fri, Jan 13, 2023 at 05:55:45PM +0100, Vlad Buslov wrote: > >> Following patches in series need to update flowtable rule several times > >> during its lifetime in order to synchronize hardware offload with actual ct > >> status. However, reusing existing 'refresh' logic in act_ct would cause > >> data path to potentially schedule significant amount of spurious tasks in > >> 'add' workqueue since it is executed per-packet. Instead, introduce a new > >> flow 'update' flag and use it to schedule async flow refresh in flowtable > >> gc which will only be executed once per gc iteration. > >> > >> Signed-off-by: Vlad Buslov <vladbu@xxxxxxxxxx> > >> --- > >> include/net/netfilter/nf_flow_table.h | 3 ++- > >> net/netfilter/nf_flow_table_core.c | 20 +++++++++++++++----- > >> net/netfilter/nf_flow_table_offload.c | 5 +++-- > >> 3 files changed, 20 insertions(+), 8 deletions(-) > >> > >> diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h > >> index 88ab98ab41d9..e396424e2e68 100644 > >> --- a/include/net/netfilter/nf_flow_table.h > >> +++ b/include/net/netfilter/nf_flow_table.h > >> @@ -165,6 +165,7 @@ enum nf_flow_flags { > >> NF_FLOW_HW_DEAD, > >> NF_FLOW_HW_PENDING, > >> NF_FLOW_HW_BIDIRECTIONAL, > >> + NF_FLOW_HW_UPDATE, > >> }; > >> > >> enum flow_offload_type { > >> @@ -300,7 +301,7 @@ unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, > >> #define MODULE_ALIAS_NF_FLOWTABLE(family) \ > >> MODULE_ALIAS("nf-flowtable-" __stringify(family)) > >> > >> -void nf_flow_offload_add(struct nf_flowtable *flowtable, > >> +bool nf_flow_offload_add(struct nf_flowtable *flowtable, > >> struct flow_offload *flow); > >> void nf_flow_offload_del(struct nf_flowtable *flowtable, > >> struct flow_offload *flow); > >> diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c > >> index 04bd0ed4d2ae..5b495e768655 100644 > >> --- a/net/netfilter/nf_flow_table_core.c > >> +++ b/net/netfilter/nf_flow_table_core.c > >> @@ -316,21 +316,28 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) > >> } > >> EXPORT_SYMBOL_GPL(flow_offload_add); > >> > >> +static bool __flow_offload_refresh(struct nf_flowtable *flow_table, > >> + struct flow_offload *flow) > >> +{ > >> + if (likely(!nf_flowtable_hw_offload(flow_table))) > >> + return true; > >> + > >> + return nf_flow_offload_add(flow_table, flow); > >> +} > >> + > >> void flow_offload_refresh(struct nf_flowtable *flow_table, > >> struct flow_offload *flow) > >> { > >> u32 timeout; > >> > >> timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); > >> - if (timeout - READ_ONCE(flow->timeout) > HZ) > >> + if (timeout - READ_ONCE(flow->timeout) > HZ && > >> + !test_bit(NF_FLOW_HW_UPDATE, &flow->flags)) > >> WRITE_ONCE(flow->timeout, timeout); > >> else > >> return; > >> > >> - if (likely(!nf_flowtable_hw_offload(flow_table))) > >> - return; > >> - > >> - nf_flow_offload_add(flow_table, flow); > >> + __flow_offload_refresh(flow_table, flow); > >> } > >> EXPORT_SYMBOL_GPL(flow_offload_refresh); > >> > >> @@ -435,6 +442,9 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, > >> } else { > >> flow_offload_del(flow_table, flow); > >> } > >> + } else if (test_and_clear_bit(NF_FLOW_HW_UPDATE, &flow->flags)) { > >> + if (!__flow_offload_refresh(flow_table, flow)) > >> + set_bit(NF_FLOW_HW_UPDATE, &flow->flags); > >> } else if (test_bit(NF_FLOW_HW, &flow->flags)) { > >> nf_flow_offload_stats(flow_table, flow); > > > > AFAICT even after this patchset it is possible to have both flags set > > at the same time. > > With that, this would cause the stats to skip a beat. > > This would be better: > > > > - } else if (test_bit(NF_FLOW_HW, &flow->flags)) { > > - nf_flow_offload_stats(flow_table, flow); > > + } else { > > + if (test_and_clear_bit(NF_FLOW_HW_UPDATE, &flow->flags)) > > + if (!__flow_offload_refresh(flow_table, flow)) > > + set_bit(NF_FLOW_HW_UPDATE, &flow->flags); > > + if (test_bit(NF_FLOW_HW, &flow->flags)) > > + nf_flow_offload_stats(flow_table, flow); > > } > > > > But a flow cannot have 2 pending actions at a time. > > Yes. And timeouts are quite generous so there is IMO no problem in > skipping one iteration. It is not like this wq is high priority and we > can guarantee any exact update interval here anyway. I cannot disagree, lets say :-) Perhaps I'm just over worried because of recent issues with ovs and datapath flows, that it was evicting them because it saw no traffic in 5s and so. For example, Subject: [ovs-dev] [PATCH v3] ofproto-dpif-upcall: Wait for valid hw flow stats before applying min-revalidate-pps And we're still chasing a stall in ovs revalidator that leads to hicups in datapath stats periodicity. Yet, I'm not aware of such checks on top of CT entries. > > > Then maybe an update to nf_flow_offload_tuple() to make it handle the > > stats implicitly? > > I considered this, but didn't want to over-complicate this series which > is tricky enough as it is. Makes sense. > > > > >> } > >> diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c > >> index 8b852f10fab4..103b2ca8d123 100644 > >> --- a/net/netfilter/nf_flow_table_offload.c > >> +++ b/net/netfilter/nf_flow_table_offload.c > >> @@ -1036,16 +1036,17 @@ nf_flow_offload_work_alloc(struct nf_flowtable *flowtable, > >> } > >> > >> > >> -void nf_flow_offload_add(struct nf_flowtable *flowtable, > >> +bool nf_flow_offload_add(struct nf_flowtable *flowtable, > >> struct flow_offload *flow) > >> { > >> struct flow_offload_work *offload; > >> > >> offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE); > >> if (!offload) > >> - return; > >> + return false; > >> > >> flow_offload_queue_work(offload); > >> + return true; > >> } > >> > >> void nf_flow_offload_del(struct nf_flowtable *flowtable, > >> -- > >> 2.38.1 > >> >