Hello, On Tue, 5 Apr 2016, Marco Angaroni wrote: > When using LVS-NAT and SIP persistence-egine over UDP, the following > limitations are present with current implementation: > > 1) To actually have load-balancing based on Call-ID header, you need to > use one-packet-scheduling mode. But with one-packet-scheduling the > connection is deleted just after packet is forwarded, so SIP responses > coming from real-servers do not match any connection and SNAT is > not applied. > > 2) If you do not use "-o" option, IPVS behaves as normal UDP load > balancer, so different SIP calls (each one identified by a different > Call-ID) coming from the same ip-address/port go to the same > real-server. So basically you don’t have load-balancing based on > Call-ID as intended. > > 3) Call-ID is not learned when a new SIP call is started by a real-server > (inside-to-outside direction), but only in the outside-to-inside > direction. This would be a general problem for all SIP servers acting > as Back2BackUserAgent. > > This patch aims to solve problems 1) and 3) while keeping OPS mode > mandatory for SIP-UDP, so that 2) is not a problem anymore. > > The basic mechanism implemented is to make packets, that do not match any > existent connection but come from real-servers, create new connections > instead of let them pass without any effect. > When such packets pass through ip_vs_out(), if their source ip address and > source port match a configured real-server, a new connection is > automatically created in the same way as it would have happened if the > packet had come from outside-to-inside direction. A new connection template > is created too if the virtual-service is persistent and there is no > matching connection template found. The new connection automatically > created, if the service had "-o" option, is an OPS connection that lasts > only the time to forward the packet, just like it happens on the > ingress side. > > The main part of this mechanism is implemented inside a persistent-engine > specific callback (at the moment only SIP persistent engine exists) and > is triggered only for UDP packets, since connection oriented protocols, by > using different set of ports (typically ephemeral ports) to open new > outgoing connections, should not need this feature. > > The following requisites are needed for automatic connection creation; if > any is missing the packet simply goes the same way as before. > a) virtual-service is not fwmark based (this is because fwmark services > do not store address and port of the virtual-service, required to > build the connection data). > b) virtual-service and real-servers must not have been configured with > omitted port (this is again to have all data to create the connection). > > Signed-off-by: Marco Angaroni <marcoangaroni@xxxxxxxxx> Nice addition, thanks! Simon, please apply. Acked-by: Julian Anastasov <ja@xxxxxx> > --- > include/net/ip_vs.h | 17 +++++ > net/netfilter/ipvs/ip_vs_core.c | 154 ++++++++++++++++++++++++++++++++++++++ > net/netfilter/ipvs/ip_vs_ctl.c | 46 +++++++++++- > net/netfilter/ipvs/ip_vs_pe_sip.c | 15 ++++ > 4 files changed, 231 insertions(+), 1 deletion(-) > > diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h > index 0816c87..7eff508 100644 > --- a/include/net/ip_vs.h > +++ b/include/net/ip_vs.h > @@ -731,6 +731,12 @@ struct ip_vs_pe { > u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval, > bool inverse); > int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf); > + /* create connections for real-server outgoing packets */ > + struct ip_vs_conn* (*conn_out)(struct ip_vs_service *svc, > + struct ip_vs_dest *dest, > + struct sk_buff *skb, > + const struct ip_vs_iphdr *iph, > + __be16 dport, __be16 cport); > }; > > /* The application module object (a.k.a. app incarnation) */ > @@ -874,6 +880,7 @@ struct netns_ipvs { > /* Service counters */ > atomic_t ftpsvc_counter; > atomic_t nullsvc_counter; > + atomic_t conn_out_counter; > > #ifdef CONFIG_SYSCTL > /* 1/rate drop and drop-entry variables */ > @@ -1147,6 +1154,12 @@ static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs) > */ > const char *ip_vs_proto_name(unsigned int proto); > void ip_vs_init_hash_table(struct list_head *table, int rows); > +struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc, > + struct ip_vs_dest *dest, > + struct sk_buff *skb, > + const struct ip_vs_iphdr *iph, > + __be16 dport, > + __be16 cport); > #define IP_VS_INIT_HASH_TABLE(t) ip_vs_init_hash_table((t), ARRAY_SIZE((t))) > > #define IP_VS_APP_TYPE_FTP 1 > @@ -1378,6 +1391,10 @@ ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol > bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, > const union nf_inet_addr *daddr, __be16 dport); > > +struct ip_vs_dest * > +ip_vs_find_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, > + const union nf_inet_addr *daddr, __be16 dport); > + > int ip_vs_use_count_inc(void); > void ip_vs_use_count_dec(void); > int ip_vs_register_nl_ioctl(void); > diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c > index f57b4dc..398ccb5 100644 > --- a/net/netfilter/ipvs/ip_vs_core.c > +++ b/net/netfilter/ipvs/ip_vs_core.c > @@ -68,6 +68,7 @@ EXPORT_SYMBOL(ip_vs_conn_put); > #ifdef CONFIG_IP_VS_DEBUG > EXPORT_SYMBOL(ip_vs_get_debug_level); > #endif > +EXPORT_SYMBOL(ip_vs_new_conn_out); > > static int ip_vs_net_id __read_mostly; > /* netns cnt used for uniqueness */ > @@ -1099,6 +1100,143 @@ static inline bool is_new_conn_expected(const struct ip_vs_conn *cp, > } > } > > +/* Generic function to create new connections for outgoing RS packets > + * > + * Pre-requisites for successful connection creation: > + * 1) Virtual Service is NOT fwmark based: > + * In fwmark-VS actual vaddr and vport are unknown to IPVS > + * 2) Real Server and Virtual Service were NOT configured without port: > + * This is to allow match of different VS to the same RS ip-addr > + */ > +struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc, > + struct ip_vs_dest *dest, > + struct sk_buff *skb, > + const struct ip_vs_iphdr *iph, > + __be16 dport, > + __be16 cport) > +{ > + struct ip_vs_conn_param param; > + struct ip_vs_conn *ct = NULL, *cp = NULL; > + const union nf_inet_addr *vaddr, *daddr, *caddr; > + union nf_inet_addr snet; > + __be16 vport; > + unsigned int flags; > + > + EnterFunction(12); > + vaddr = &svc->addr; > + vport = svc->port; > + daddr = &iph->saddr; > + caddr = &iph->daddr; > + > + /* check pre-requisites are satisfied */ > + if (svc->fwmark) > + return NULL; > + if (!vport || !dport) > + return NULL; > + > + /* for persistent service first create connection template */ > + if (svc->flags & IP_VS_SVC_F_PERSISTENT) { > + /* apply netmask the same way ingress-side does */ > +#ifdef CONFIG_IP_VS_IPV6 > + if (svc->af == AF_INET6) > + ipv6_addr_prefix(&snet.in6, &caddr->in6, > + (__force __u32)svc->netmask); > + else > +#endif > + snet.ip = caddr->ip & svc->netmask; > + /* fill params and create template if not existent */ > + if (ip_vs_conn_fill_param_persist(svc, skb, iph->protocol, > + &snet, 0, vaddr, > + vport, ¶m) < 0) > + return NULL; > + ct = ip_vs_ct_in_get(¶m); > + if (!ct) { > + ct = ip_vs_conn_new(¶m, dest->af, daddr, dport, > + IP_VS_CONN_F_TEMPLATE, dest, 0); > + if (!ct) { > + kfree(param.pe_data); > + return NULL; > + } > + ct->timeout = svc->timeout; > + } else { > + kfree(param.pe_data); > + } > + } > + > + /* connection flags */ > + flags = ((svc->flags & IP_VS_SVC_F_ONEPACKET) && > + iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0; > + /* create connection */ > + ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol, > + caddr, cport, vaddr, vport, ¶m); > + cp = ip_vs_conn_new(¶m, dest->af, daddr, dport, flags, dest, 0); > + if (!cp) { > + if (ct) > + ip_vs_conn_put(ct); > + return NULL; > + } > + if (ct) { > + ip_vs_control_add(cp, ct); > + ip_vs_conn_put(ct); > + } > + ip_vs_conn_stats(cp, svc); > + > + /* return connection (will be used to handle outgoing packet) */ > + IP_VS_DBG_BUF(6, "New connection RS-initiated:%c c:%s:%u v:%s:%u " > + "d:%s:%u conn->flags:%X conn->refcnt:%d\n", > + ip_vs_fwd_tag(cp), > + IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), > + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), > + IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport), > + cp->flags, atomic_read(&cp->refcnt)); > + LeaveFunction(12); > + return cp; > +} > + > +/* Handle outgoing packets which are considered requests initiated by > + * real servers, so that subsequent responses from external client can be > + * routed to the right real server. > + * Used also for outgoing responses in OPS mode. > + * > + * Connection management is handled by persistent-engine specific callback. > + */ > +static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum, > + struct netns_ipvs *ipvs, > + int af, struct sk_buff *skb, > + const struct ip_vs_iphdr *iph) > +{ > + struct ip_vs_dest *dest; > + struct ip_vs_conn *cp = NULL; > + __be16 _ports[2], *pptr; > + > + if (hooknum == NF_INET_LOCAL_IN) > + return NULL; > + > + pptr = frag_safe_skb_hp(skb, iph->len, > + sizeof(_ports), _ports, iph); > + if (!pptr) > + return NULL; > + > + rcu_read_lock(); > + dest = ip_vs_find_real_service(ipvs, af, iph->protocol, > + &iph->saddr, pptr[0]); > + if (dest) { > + struct ip_vs_service *svc; > + struct ip_vs_pe *pe; > + > + svc = rcu_dereference(dest->svc); > + if (svc) { > + pe = rcu_dereference(svc->pe); > + if (pe && pe->conn_out) > + cp = pe->conn_out(svc, dest, skb, iph, > + pptr[0], pptr[1]); > + } > + } > + rcu_read_unlock(); > + > + return cp; > +} > + > /* Handle response packets: rewrite addresses and send away... > */ > static unsigned int > @@ -1244,6 +1382,22 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in > > if (likely(cp)) > return handle_response(af, skb, pd, cp, &iph, hooknum); > + > + /* Check for real-server-started requests */ > + if (atomic_read(&ipvs->conn_out_counter)) { > + /* Currently only for UDP: > + * connection oriented protocols typically use > + * ephemeral ports for outgoing connections, so > + * related incoming responses would not match any VS > + */ > + if (pp->protocol == IPPROTO_UDP) { > + cp = __ip_vs_rs_conn_out(hooknum, ipvs, af, skb, &iph); > + if (likely(cp)) > + return handle_response(af, skb, pd, cp, &iph, > + hooknum); > + } > + } > + > if (sysctl_nat_icmp_send(ipvs) && > (pp->protocol == IPPROTO_TCP || > pp->protocol == IPPROTO_UDP || > diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c > index e7c1b05..fcb2681 100644 > --- a/net/netfilter/ipvs/ip_vs_ctl.c > +++ b/net/netfilter/ipvs/ip_vs_ctl.c > @@ -567,6 +567,36 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, > return false; > } > > +/* Find real service record by <proto,addr,port>. > + * In case of multiple records with the same <proto,addr,port>, only > + * the first found record is returned. > + * > + * To be called under RCU lock. > + */ > +struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af, > + __u16 protocol, > + const union nf_inet_addr *daddr, > + __be16 dport) > +{ > + unsigned int hash; > + struct ip_vs_dest *dest; > + > + /* Check for "full" addressed entries */ > + hash = ip_vs_rs_hashkey(af, daddr, dport); > + > + hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { > + if (dest->port == dport && > + dest->af == af && > + ip_vs_addr_equal(af, &dest->addr, daddr) && > + (dest->protocol == protocol || dest->vfwmark)) { > + /* HIT */ > + return dest; > + } > + } > + > + return NULL; > +} > + > /* Lookup destination by {addr,port} in the given service > * Called under RCU lock. > */ > @@ -1253,6 +1283,8 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, > atomic_inc(&ipvs->ftpsvc_counter); > else if (svc->port == 0) > atomic_inc(&ipvs->nullsvc_counter); > + if (svc->pe && svc->pe->conn_out) > + atomic_inc(&ipvs->conn_out_counter); > > ip_vs_start_estimator(ipvs, &svc->stats); > > @@ -1293,6 +1325,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) > struct ip_vs_scheduler *sched = NULL, *old_sched; > struct ip_vs_pe *pe = NULL, *old_pe = NULL; > int ret = 0; > + bool new_pe_conn_out, old_pe_conn_out; > > /* > * Lookup the scheduler, by 'u->sched_name' > @@ -1355,8 +1388,16 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) > svc->netmask = u->netmask; > > old_pe = rcu_dereference_protected(svc->pe, 1); > - if (pe != old_pe) > + if (pe != old_pe) { > rcu_assign_pointer(svc->pe, pe); > + /* check for optional methods in new pe */ > + new_pe_conn_out = (pe && pe->conn_out) ? true : false; > + old_pe_conn_out = (old_pe && old_pe->conn_out) ? true : false; > + if (new_pe_conn_out && !old_pe_conn_out) > + atomic_inc(&svc->ipvs->conn_out_counter); > + if (old_pe_conn_out && !new_pe_conn_out) > + atomic_dec(&svc->ipvs->conn_out_counter); > + } > > out: > ip_vs_scheduler_put(old_sched); > @@ -1391,6 +1432,8 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) > > /* Unbind persistence engine, keep svc->pe */ > old_pe = rcu_dereference_protected(svc->pe, 1); > + if (old_pe && old_pe->conn_out) > + atomic_dec(&ipvs->conn_out_counter); > ip_vs_pe_put(old_pe); > > /* > @@ -3960,6 +4003,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) > (unsigned long) ipvs); > atomic_set(&ipvs->ftpsvc_counter, 0); > atomic_set(&ipvs->nullsvc_counter, 0); > + atomic_set(&ipvs->conn_out_counter, 0); > > /* procfs stats */ > ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); > diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c > index b3e0e5b..dff3a3e 100644 > --- a/net/netfilter/ipvs/ip_vs_pe_sip.c > +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c > @@ -143,6 +143,20 @@ static int ip_vs_sip_show_pe_data(const struct ip_vs_conn *cp, char *buf) > return cp->pe_data_len; > } > > +static struct ip_vs_conn * > +ip_vs_sip_conn_out(struct ip_vs_service *svc, > + struct ip_vs_dest *dest, > + struct sk_buff *skb, > + const struct ip_vs_iphdr *iph, > + __be16 dport, > + __be16 cport) > +{ > + if (likely(iph->protocol == IPPROTO_UDP)) > + return ip_vs_new_conn_out(svc, dest, skb, iph, dport, cport); > + /* currently no need to handle other than UDP */ > + return NULL; > +} > + > static struct ip_vs_pe ip_vs_sip_pe = > { > .name = "sip", > @@ -153,6 +167,7 @@ static struct ip_vs_pe ip_vs_sip_pe = > .ct_match = ip_vs_sip_ct_match, > .hashkey_raw = ip_vs_sip_hashkey_raw, > .show_pe_data = ip_vs_sip_show_pe_data, > + .conn_out = ip_vs_sip_conn_out, > }; > > static int __init ip_vs_sip_init(void) > -- > 1.8.3.1 Regards -- Julian Anastasov <ja@xxxxxx>