Hello, On Sat, 3 Oct 2020, longguang.yue wrote: > It's ipvs's duty to do traffic statistic if packets get hit, > no matter what mode it is. > > Changes in v1: support DR/TUN mode statistic > Changes in v2: ip_vs_conn_out_get handles DR/TUN mode's conn > Changes in v3: fix checkpatch > Changes in v4: restructure and optimise this feature Changes should be after --- > Signed-off-by: longguang.yue <bigclouds@xxxxxxx> > --- > net/netfilter/ipvs/ip_vs_conn.c | 18 +++++++++++++++--- > net/netfilter/ipvs/ip_vs_core.c | 24 +++++++++++++++++------- > 2 files changed, 32 insertions(+), 10 deletions(-) > > diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c > index a90b8eac16ac..af08ca2d9174 100644 > --- a/net/netfilter/ipvs/ip_vs_conn.c > +++ b/net/netfilter/ipvs/ip_vs_conn.c > @@ -401,6 +401,8 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) > struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) > { > unsigned int hash; > + __be16 sport; > + const union nf_inet_addr *saddr; > struct ip_vs_conn *cp, *ret=NULL; > > /* > @@ -411,10 +413,20 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) > rcu_read_lock(); > > hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { > - if (p->vport == cp->cport && p->cport == cp->dport && > - cp->af == p->af && > + if (p->vport != cp->cport) > + continue; > + > + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { > + sport = cp->vport; > + saddr = &cp->vaddr; > + } else { > + sport = cp->dport; > + saddr = &cp->daddr; > + } > + > + if (p->cport == sport && cp->af == p->af && > ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && > - ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && > + ip_vs_addr_equal(p->af, p->caddr, saddr) && > p->protocol == cp->protocol && > cp->ipvs == p->ipvs) { > if (!__ip_vs_conn_get(cp)) > diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c > index e3668a6e54e4..315289aecad7 100644 > --- a/net/netfilter/ipvs/ip_vs_core.c > +++ b/net/netfilter/ipvs/ip_vs_core.c > @@ -911,6 +911,10 @@ static int handle_response_icmp(int af, struct sk_buff *skb, > ip_vs_update_conntrack(skb, cp, 0); > Something is wrong here. May be it should be as simple as that (ignore_cp is moved and renamed to after_nat): @@ -875,7 +875,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb, unsigned int verdict = NF_DROP; if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) - goto ignore_cp; + goto after_nat; /* Ensure the checksum is correct */ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { @@ -901,6 +901,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb, if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum)) goto out; +after_nat: /* do the statistics and put it back */ ip_vs_out_stats(cp, skb); @@ -909,8 +910,6 @@ static int handle_response_icmp(int af, struct sk_buff *skb, ip_vs_notrack(skb); else ip_vs_update_conntrack(skb, cp, 0); - -ignore_cp: verdict = NF_ACCEPT; out: > ignore_cp: > + ip_vs_out_stats(cp, skb); > + skb->ipvs_property = 1; > + if (!(cp->flags & IP_VS_CONN_F_NFCT)) > + ip_vs_notrack(skb); > verdict = NF_ACCEPT; > > out: > @@ -1276,6 +1280,9 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, > { > struct ip_vs_protocol *pp = pd->pp; > > + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) > + goto ignore_cp; > + > IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet"); > > if (skb_ensure_writable(skb, iph->len)) > @@ -1328,6 +1335,16 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, > LeaveFunction(11); > return NF_ACCEPT; > > +ignore_cp: > + ip_vs_out_stats(cp, skb); > + skb->ipvs_property = 1; > + if (!(cp->flags & IP_VS_CONN_F_NFCT)) > + ip_vs_notrack(skb); > + __ip_vs_conn_put(cp); > + > + LeaveFunction(11); > + return NF_ACCEPT; > + For handle_response() too, the code is already there: @@ -1278,6 +1277,9 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet"); + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) + goto after_nat; + if (skb_ensure_writable(skb, iph->len)) goto drop; @@ -1316,6 +1318,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, IP_VS_DBG_PKT(10, af, pp, skb, iph->off, "After SNAT"); +after_nat: ip_vs_out_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd); skb->ipvs_property = 1; What we do is: - switch to bidirectional state changes by calling ip_vs_set_state with IP_VS_DIR_OUTPUT, we have packets in both directions just like MASQ. See set_tcp_state(). - use ip_vs_conn_put() like MASQ because this packet is part of the connection, not __ip_vs_conn_put(). This will schedule proper timeout according to the state. - __ip_vs_conn_put() is inappropriate for the last handle_response() call because real server can initiate new connection (see ip_vs_new_conn_out) with service using IP_VS_SVC_F_ONEPACKET flag. > drop: > ip_vs_conn_put(cp); > kfree_skb(skb); > @@ -1413,8 +1430,6 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in > ipvs, af, skb, &iph); > > if (likely(cp)) { > - if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) > - goto ignore_cp; > return handle_response(af, skb, pd, cp, &iph, hooknum); > } > > @@ -1475,14 +1490,9 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in > } > } > > -out: > IP_VS_DBG_PKT(12, af, pp, skb, iph.off, > "ip_vs_out: packet continues traversal as normal"); > return NF_ACCEPT; > - > -ignore_cp: > - __ip_vs_conn_put(cp); > - goto out; > } > > /* Regards -- Julian Anastasov <ja@xxxxxx>