Re: [PATCH] ipvs: handle connections started by real-servers

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



	Hello,

On Tue, 5 Apr 2016, Marco Angaroni wrote:

> When using LVS-NAT and SIP persistence-egine over UDP, the following
> limitations are present with current implementation:
> 
>   1) To actually have load-balancing based on Call-ID header, you need to
>      use one-packet-scheduling mode. But with one-packet-scheduling the
>      connection is deleted just after packet is forwarded, so SIP responses
>      coming from real-servers do not match any connection and SNAT is
>      not applied.
> 
>   2) If you do not use "-o" option, IPVS behaves as normal UDP load
>      balancer, so different SIP calls (each one identified by a different
>      Call-ID) coming from the same ip-address/port go to the same
>      real-server. So basically you don’t have load-balancing based on
>      Call-ID as intended.
> 
>   3) Call-ID is not learned when a new SIP call is started by a real-server
>      (inside-to-outside direction), but only in the outside-to-inside
>      direction. This would be a general problem for all SIP servers acting
>      as Back2BackUserAgent.
> 
> This patch aims to solve problems 1) and 3) while keeping OPS mode
> mandatory for SIP-UDP, so that 2) is not a problem anymore.
> 
> The basic mechanism implemented is to make packets, that do not match any
> existent connection but come from real-servers, create new connections
> instead of let them pass without any effect.
> When such packets pass through ip_vs_out(), if their source ip address and
> source port match a configured real-server, a new connection is
> automatically created in the same way as it would have happened if the
> packet had come from outside-to-inside direction. A new connection template
> is created too if the virtual-service is persistent and there is no
> matching connection template found. The new connection automatically
> created, if the service had "-o" option, is an OPS connection that lasts
> only the time to forward the packet, just like it happens on the
> ingress side.
> 
> The main part of this mechanism is implemented inside a persistent-engine
> specific callback (at the moment only SIP persistent engine exists) and
> is triggered only for UDP packets, since connection oriented protocols, by
> using different set of ports (typically ephemeral ports) to open new
> outgoing connections, should not need this feature.
> 
> The following requisites are needed for automatic connection creation; if
> any is missing the packet simply goes the same way as before.
> a) virtual-service is not fwmark based (this is because fwmark services
>    do not store address and port of the virtual-service, required to
>    build the connection data).
> b) virtual-service and real-servers must not have been configured with
>    omitted port (this is again to have all data to create the connection).
> 
> Signed-off-by: Marco Angaroni <marcoangaroni@xxxxxxxxx>

	Nice addition, thanks! Simon, please apply.

Acked-by: Julian Anastasov <ja@xxxxxx>

> ---
>  include/net/ip_vs.h               |  17 +++++
>  net/netfilter/ipvs/ip_vs_core.c   | 154 ++++++++++++++++++++++++++++++++++++++
>  net/netfilter/ipvs/ip_vs_ctl.c    |  46 +++++++++++-
>  net/netfilter/ipvs/ip_vs_pe_sip.c |  15 ++++
>  4 files changed, 231 insertions(+), 1 deletion(-)
> 
> diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
> index 0816c87..7eff508 100644
> --- a/include/net/ip_vs.h
> +++ b/include/net/ip_vs.h
> @@ -731,6 +731,12 @@ struct ip_vs_pe {
>  	u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval,
>  			   bool inverse);
>  	int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf);
> +	/* create connections for real-server outgoing packets */
> +	struct ip_vs_conn* (*conn_out)(struct ip_vs_service *svc,
> +				       struct ip_vs_dest *dest,
> +				       struct sk_buff *skb,
> +				       const struct ip_vs_iphdr *iph,
> +				       __be16 dport, __be16 cport);
>  };
>  
>  /* The application module object (a.k.a. app incarnation) */
> @@ -874,6 +880,7 @@ struct netns_ipvs {
>  	/* Service counters */
>  	atomic_t		ftpsvc_counter;
>  	atomic_t		nullsvc_counter;
> +	atomic_t		conn_out_counter;
>  
>  #ifdef CONFIG_SYSCTL
>  	/* 1/rate drop and drop-entry variables */
> @@ -1147,6 +1154,12 @@ static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs)
>   */
>  const char *ip_vs_proto_name(unsigned int proto);
>  void ip_vs_init_hash_table(struct list_head *table, int rows);
> +struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc,
> +				      struct ip_vs_dest *dest,
> +				      struct sk_buff *skb,
> +				      const struct ip_vs_iphdr *iph,
> +				      __be16 dport,
> +				      __be16 cport);
>  #define IP_VS_INIT_HASH_TABLE(t) ip_vs_init_hash_table((t), ARRAY_SIZE((t)))
>  
>  #define IP_VS_APP_TYPE_FTP	1
> @@ -1378,6 +1391,10 @@ ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol
>  bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
>  			    const union nf_inet_addr *daddr, __be16 dport);
>  
> +struct ip_vs_dest *
> +ip_vs_find_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
> +			const union nf_inet_addr *daddr, __be16 dport);
> +
>  int ip_vs_use_count_inc(void);
>  void ip_vs_use_count_dec(void);
>  int ip_vs_register_nl_ioctl(void);
> diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
> index f57b4dc..398ccb5 100644
> --- a/net/netfilter/ipvs/ip_vs_core.c
> +++ b/net/netfilter/ipvs/ip_vs_core.c
> @@ -68,6 +68,7 @@ EXPORT_SYMBOL(ip_vs_conn_put);
>  #ifdef CONFIG_IP_VS_DEBUG
>  EXPORT_SYMBOL(ip_vs_get_debug_level);
>  #endif
> +EXPORT_SYMBOL(ip_vs_new_conn_out);
>  
>  static int ip_vs_net_id __read_mostly;
>  /* netns cnt used for uniqueness */
> @@ -1099,6 +1100,143 @@ static inline bool is_new_conn_expected(const struct ip_vs_conn *cp,
>  	}
>  }
>  
> +/* Generic function to create new connections for outgoing RS packets
> + *
> + * Pre-requisites for successful connection creation:
> + * 1) Virtual Service is NOT fwmark based:
> + *    In fwmark-VS actual vaddr and vport are unknown to IPVS
> + * 2) Real Server and Virtual Service were NOT configured without port:
> + *    This is to allow match of different VS to the same RS ip-addr
> + */
> +struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc,
> +				      struct ip_vs_dest *dest,
> +				      struct sk_buff *skb,
> +				      const struct ip_vs_iphdr *iph,
> +				      __be16 dport,
> +				      __be16 cport)
> +{
> +	struct ip_vs_conn_param param;
> +	struct ip_vs_conn *ct = NULL, *cp = NULL;
> +	const union nf_inet_addr *vaddr, *daddr, *caddr;
> +	union nf_inet_addr snet;
> +	__be16 vport;
> +	unsigned int flags;
> +
> +	EnterFunction(12);
> +	vaddr = &svc->addr;
> +	vport = svc->port;
> +	daddr = &iph->saddr;
> +	caddr = &iph->daddr;
> +
> +	/* check pre-requisites are satisfied */
> +	if (svc->fwmark)
> +		return NULL;
> +	if (!vport || !dport)
> +		return NULL;
> +
> +	/* for persistent service first create connection template */
> +	if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
> +		/* apply netmask the same way ingress-side does */
> +#ifdef CONFIG_IP_VS_IPV6
> +		if (svc->af == AF_INET6)
> +			ipv6_addr_prefix(&snet.in6, &caddr->in6,
> +					 (__force __u32)svc->netmask);
> +		else
> +#endif
> +			snet.ip = caddr->ip & svc->netmask;
> +		/* fill params and create template if not existent */
> +		if (ip_vs_conn_fill_param_persist(svc, skb, iph->protocol,
> +						  &snet, 0, vaddr,
> +						  vport, &param) < 0)
> +			return NULL;
> +		ct = ip_vs_ct_in_get(&param);
> +		if (!ct) {
> +			ct = ip_vs_conn_new(&param, dest->af, daddr, dport,
> +					    IP_VS_CONN_F_TEMPLATE, dest, 0);
> +			if (!ct) {
> +				kfree(param.pe_data);
> +				return NULL;
> +			}
> +			ct->timeout = svc->timeout;
> +		} else {
> +			kfree(param.pe_data);
> +		}
> +	}
> +
> +	/* connection flags */
> +	flags = ((svc->flags & IP_VS_SVC_F_ONEPACKET) &&
> +		 iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0;
> +	/* create connection */
> +	ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol,
> +			      caddr, cport, vaddr, vport, &param);
> +	cp = ip_vs_conn_new(&param, dest->af, daddr, dport, flags, dest, 0);
> +	if (!cp) {
> +		if (ct)
> +			ip_vs_conn_put(ct);
> +		return NULL;
> +	}
> +	if (ct) {
> +		ip_vs_control_add(cp, ct);
> +		ip_vs_conn_put(ct);
> +	}
> +	ip_vs_conn_stats(cp, svc);
> +
> +	/* return connection (will be used to handle outgoing packet) */
> +	IP_VS_DBG_BUF(6, "New connection RS-initiated:%c c:%s:%u v:%s:%u "
> +		      "d:%s:%u conn->flags:%X conn->refcnt:%d\n",
> +		      ip_vs_fwd_tag(cp),
> +		      IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
> +		      IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
> +		      IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
> +		      cp->flags, atomic_read(&cp->refcnt));
> +	LeaveFunction(12);
> +	return cp;
> +}
> +
> +/* Handle outgoing packets which are considered requests initiated by
> + * real servers, so that subsequent responses from external client can be
> + * routed to the right real server.
> + * Used also for outgoing responses in OPS mode.
> + *
> + * Connection management is handled by persistent-engine specific callback.
> + */
> +static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum,
> +					      struct netns_ipvs *ipvs,
> +					      int af, struct sk_buff *skb,
> +					      const struct ip_vs_iphdr *iph)
> +{
> +	struct ip_vs_dest *dest;
> +	struct ip_vs_conn *cp = NULL;
> +	__be16 _ports[2], *pptr;
> +
> +	if (hooknum == NF_INET_LOCAL_IN)
> +		return NULL;
> +
> +	pptr = frag_safe_skb_hp(skb, iph->len,
> +				sizeof(_ports), _ports, iph);
> +	if (!pptr)
> +		return NULL;
> +
> +	rcu_read_lock();
> +	dest = ip_vs_find_real_service(ipvs, af, iph->protocol,
> +				       &iph->saddr, pptr[0]);
> +	if (dest) {
> +		struct ip_vs_service *svc;
> +		struct ip_vs_pe *pe;
> +
> +		svc = rcu_dereference(dest->svc);
> +		if (svc) {
> +			pe = rcu_dereference(svc->pe);
> +			if (pe && pe->conn_out)
> +				cp = pe->conn_out(svc, dest, skb, iph,
> +						  pptr[0], pptr[1]);
> +		}
> +	}
> +	rcu_read_unlock();
> +
> +	return cp;
> +}
> +
>  /* Handle response packets: rewrite addresses and send away...
>   */
>  static unsigned int
> @@ -1244,6 +1382,22 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
>  
>  	if (likely(cp))
>  		return handle_response(af, skb, pd, cp, &iph, hooknum);
> +
> +	/* Check for real-server-started requests */
> +	if (atomic_read(&ipvs->conn_out_counter)) {
> +		/* Currently only for UDP:
> +		 * connection oriented protocols typically use
> +		 * ephemeral ports for outgoing connections, so
> +		 * related incoming responses would not match any VS
> +		 */
> +		if (pp->protocol == IPPROTO_UDP) {
> +			cp = __ip_vs_rs_conn_out(hooknum, ipvs, af, skb, &iph);
> +			if (likely(cp))
> +				return handle_response(af, skb, pd, cp, &iph,
> +						       hooknum);
> +		}
> +	}
> +
>  	if (sysctl_nat_icmp_send(ipvs) &&
>  	    (pp->protocol == IPPROTO_TCP ||
>  	     pp->protocol == IPPROTO_UDP ||
> diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
> index e7c1b05..fcb2681 100644
> --- a/net/netfilter/ipvs/ip_vs_ctl.c
> +++ b/net/netfilter/ipvs/ip_vs_ctl.c
> @@ -567,6 +567,36 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
>  	return false;
>  }
>  
> +/* Find real service record by <proto,addr,port>.
> + * In case of multiple records with the same <proto,addr,port>, only
> + * the first found record is returned.
> + *
> + * To be called under RCU lock.
> + */
> +struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af,
> +					   __u16 protocol,
> +					   const union nf_inet_addr *daddr,
> +					   __be16 dport)
> +{
> +	unsigned int hash;
> +	struct ip_vs_dest *dest;
> +
> +	/* Check for "full" addressed entries */
> +	hash = ip_vs_rs_hashkey(af, daddr, dport);
> +
> +	hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
> +		if (dest->port == dport &&
> +		    dest->af == af &&
> +		    ip_vs_addr_equal(af, &dest->addr, daddr) &&
> +			(dest->protocol == protocol || dest->vfwmark)) {
> +			/* HIT */
> +			return dest;
> +		}
> +	}
> +
> +	return NULL;
> +}
> +
>  /* Lookup destination by {addr,port} in the given service
>   * Called under RCU lock.
>   */
> @@ -1253,6 +1283,8 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
>  		atomic_inc(&ipvs->ftpsvc_counter);
>  	else if (svc->port == 0)
>  		atomic_inc(&ipvs->nullsvc_counter);
> +	if (svc->pe && svc->pe->conn_out)
> +		atomic_inc(&ipvs->conn_out_counter);
>  
>  	ip_vs_start_estimator(ipvs, &svc->stats);
>  
> @@ -1293,6 +1325,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
>  	struct ip_vs_scheduler *sched = NULL, *old_sched;
>  	struct ip_vs_pe *pe = NULL, *old_pe = NULL;
>  	int ret = 0;
> +	bool new_pe_conn_out, old_pe_conn_out;
>  
>  	/*
>  	 * Lookup the scheduler, by 'u->sched_name'
> @@ -1355,8 +1388,16 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
>  	svc->netmask = u->netmask;
>  
>  	old_pe = rcu_dereference_protected(svc->pe, 1);
> -	if (pe != old_pe)
> +	if (pe != old_pe) {
>  		rcu_assign_pointer(svc->pe, pe);
> +		/* check for optional methods in new pe */
> +		new_pe_conn_out = (pe && pe->conn_out) ? true : false;
> +		old_pe_conn_out = (old_pe && old_pe->conn_out) ? true : false;
> +		if (new_pe_conn_out && !old_pe_conn_out)
> +			atomic_inc(&svc->ipvs->conn_out_counter);
> +		if (old_pe_conn_out && !new_pe_conn_out)
> +			atomic_dec(&svc->ipvs->conn_out_counter);
> +	}
>  
>  out:
>  	ip_vs_scheduler_put(old_sched);
> @@ -1391,6 +1432,8 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
>  
>  	/* Unbind persistence engine, keep svc->pe */
>  	old_pe = rcu_dereference_protected(svc->pe, 1);
> +	if (old_pe && old_pe->conn_out)
> +		atomic_dec(&ipvs->conn_out_counter);
>  	ip_vs_pe_put(old_pe);
>  
>  	/*
> @@ -3960,6 +4003,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
>  		    (unsigned long) ipvs);
>  	atomic_set(&ipvs->ftpsvc_counter, 0);
>  	atomic_set(&ipvs->nullsvc_counter, 0);
> +	atomic_set(&ipvs->conn_out_counter, 0);
>  
>  	/* procfs stats */
>  	ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
> diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
> index b3e0e5b..dff3a3e 100644
> --- a/net/netfilter/ipvs/ip_vs_pe_sip.c
> +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
> @@ -143,6 +143,20 @@ static int ip_vs_sip_show_pe_data(const struct ip_vs_conn *cp, char *buf)
>  	return cp->pe_data_len;
>  }
>  
> +static struct ip_vs_conn *
> +ip_vs_sip_conn_out(struct ip_vs_service *svc,
> +		   struct ip_vs_dest *dest,
> +		   struct sk_buff *skb,
> +		   const struct ip_vs_iphdr *iph,
> +		   __be16 dport,
> +		   __be16 cport)
> +{
> +	if (likely(iph->protocol == IPPROTO_UDP))
> +		return ip_vs_new_conn_out(svc, dest, skb, iph, dport, cport);
> +	/* currently no need to handle other than UDP */
> +	return NULL;
> +}
> +
>  static struct ip_vs_pe ip_vs_sip_pe =
>  {
>  	.name =			"sip",
> @@ -153,6 +167,7 @@ static struct ip_vs_pe ip_vs_sip_pe =
>  	.ct_match =		ip_vs_sip_ct_match,
>  	.hashkey_raw =		ip_vs_sip_hashkey_raw,
>  	.show_pe_data =		ip_vs_sip_show_pe_data,
> +	.conn_out =		ip_vs_sip_conn_out,
>  };
>  
>  static int __init ip_vs_sip_init(void)
> -- 
> 1.8.3.1

Regards

--
Julian Anastasov <ja@xxxxxx>

[Index of Archives]     [Linux Filesystem Devel]     [Linux NFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]     [X.Org]

  Powered by Linux