Re: [PATCHv2 ipvs-next 3/3] netfilter: ipvs: Refactor Maglev hashing scheduler

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



	Hello,
	Thanks for your reviewing the patches.

On Wed, Feb 28, 2018 at 01:34:26AM +0200, Julian Anastasov wrote:
> 
> 	Hello,
> 
> On Sun, 25 Feb 2018, Inju Song wrote:
> 
> >   1) Change table size to primes array
> > 
> >   2) Add to check dest number > IP_VS_MH_TAB_SIZE
> > 
> >   3) Replace IP_VS_MH_LOOKUP_SIZE with IP_VS_MH_TAB_SIZE
> > 
> >   4) Replace large permutation array with dest_setup
> > 
> >   5) Add gcd, rshift and calculate ds->turns
> 
> 	Makefile and Kconfig should be a last patch, otherwise
> on bisecting we will fail to compile.
>

	Ok. I will move Makefile and Kconfig patch to last of the
patchsets in the next v3 patchsets. Then do I include a patch about
last_weight again?

> > Signed-off-by: Inju Song <inju.song@xxxxxxxxxxxxx>
> > ---
> >  net/netfilter/ipvs/ip_vs_mh.c | 291 ++++++++++++++++++++++++++----------------
> >  1 file changed, 178 insertions(+), 113 deletions(-)
> > 
> > diff --git a/net/netfilter/ipvs/ip_vs_mh.c b/net/netfilter/ipvs/ip_vs_mh.c
> > index d1bf50a..c868778 100644
> > --- a/net/netfilter/ipvs/ip_vs_mh.c
> > +++ b/net/netfilter/ipvs/ip_vs_mh.c
> 
> 	There must be license info at the beginning.
> 

	May be I will use GPLv1 license. Should I write the license info
like other source(ie. the beginning of ip_vs_sh.c)?

> > @@ -5,12 +5,13 @@
> >  #include <linux/slab.h>
> >  #include <linux/module.h>
> >  #include <linux/kernel.h>
> > -#include <linux/vmalloc.h>
> >  #include <linux/skbuff.h>
> >  
> >  #include <net/ip_vs.h>
> >  
> >  #include <linux/siphash.h>
> > +#include <linux/bitops.h>
> > +#include <linux/gcd.h>
> >  
> >  #define IP_VS_SVC_F_SCHED_MH_FALLBACK	IP_VS_SVC_F_SCHED1 /* MH fallback */
> >  #define IP_VS_SVC_F_SCHED_MH_PORT	IP_VS_SVC_F_SCHED2 /* MH use port */
> > @@ -19,23 +20,36 @@ struct ip_vs_mh_lookup {
> >  	struct ip_vs_dest __rcu	*dest;	/* real server (cache) */
> >  };
> >  
> > -/* for IPVS MH entry hash table */
> > -#ifndef CONFIG_IP_VS_MH_TAB_BITS
> > -#define CONFIG_IP_VS_MH_TAB_BITS        8
> > +struct ip_vs_mh_dest_setup {
> > +	int	offset; /* starting offset */
> > +	int	skip;	/* skip */
> > +	int	perm;	/* next_offset */
> > +	int	turns;	/* weight / gcd() */
> > +};
> > +
> > +/* Available prime numbers for MH table */
> > +static int primes[] = {251, 509, 1021, 2039, 4093,
> > +		       8191, 16381, 32749, 65521, 131071};
> > +
> > +/* For IPVS MH entry hash table */
> > +#ifndef CONFIG_IP_VS_MH_TAB_INDEX
> > +#define CONFIG_IP_VS_MH_TAB_INDEX	12
> >  #endif
> > -#define IP_VS_MH_TAB_BITS               CONFIG_IP_VS_MH_TAB_BITS
> > -#define IP_VS_MH_TAB_SIZE               BIT(IP_VS_MH_TAB_BITS)
> > -#define IP_VS_MH_LOOKUP_SIZE		65537 /* Must be prime number */
> > +#define IP_VS_MH_TAB_BITS		(CONFIG_IP_VS_MH_TAB_INDEX / 2)
> > +#define IP_VS_MH_TAB_INDEX		(CONFIG_IP_VS_MH_TAB_INDEX - 8)
> > +#define IP_VS_MH_TAB_SIZE               primes[IP_VS_MH_TAB_INDEX]
> >  
> >  struct ip_vs_mh_state {
> > -	struct rcu_head		rcu_head;
> > -	struct ip_vs_mh_lookup  lookup[IP_VS_MH_LOOKUP_SIZE];
> > -	unsigned int		**permutation;
> > -	hsiphash_key_t		hash1, hash2;
> > +	struct rcu_head			rcu_head;
> > +	struct ip_vs_mh_lookup		*lookup;
> > +	struct ip_vs_mh_dest_setup	*dest_setup;
> > +	hsiphash_key_t			hash1, hash2;
> > +	int				gcd;
> > +	int				rshift;
> >  };
> >  
> > -static inline void
> > -ip_vs_mh_generate_hash_secret(hsiphash_key_t *hash1, hsiphash_key_t *hash2)
> > +static inline void generate_hash_secret(hsiphash_key_t *hash1,
> > +					hsiphash_key_t *hash2)
> >  {
> >  	hash1->key[0] = 2654435761UL;
> >  	hash1->key[1] = 2654435761UL;
> > @@ -68,13 +82,13 @@ static inline bool is_unavailable(struct ip_vs_dest *dest)
> >  	return hsiphash(&v, sizeof(v), key);
> >  }
> >  
> > -static inline int ip_vs_mh_permutate(struct ip_vs_mh_state *s,
> > -				     struct ip_vs_service *svc)
> > +static int ip_vs_mh_permutate(struct ip_vs_mh_state *s,
> > +			      struct ip_vs_service *svc)
> >  {
> > -	int i, j;
> >  	struct list_head *p;
> > +	struct ip_vs_mh_dest_setup *ds;
> >  	struct ip_vs_dest *dest;
> > -	unsigned int offset, skip;
> > +	int lw;
> >  	bool empty;
> >  
> >  	p = &svc->destinations;
> > @@ -82,56 +96,50 @@ static inline int ip_vs_mh_permutate(struct ip_vs_mh_state *s,
> >  	if (empty)
> >  		return 0;
> 
> 	Above empty and list_empty can be removed, we can
> rely on the gcd < 1 check.
> 

	Ok. I think gcd < 1 check is enough for the destinations
empty case too.

> >  
> > -	/* extending permutation table to 2d arrays */
> > -	for (i = 1; i < svc->num_dests; i++)
> > -		s->permutation[i] = s->permutation[i - 1] +
> > -				    IP_VS_MH_LOOKUP_SIZE;
> > +	/* If gcd is smaller then 1, all last_weights of dest are zero.
> > +	 * so skip permutation for the dests.
> > +	 */
> > +	if (s->gcd < 1)
> > +		return 0;
> >  
> > -	i = 0;
> > +	/* Set dest_setup for the dests permutation */
> > +	ds = &s->dest_setup[0];
> >  	while ((p = p->next) != &svc->destinations) {
> >  		dest = list_entry(p, struct ip_vs_dest, n_list);
> > -		offset = ip_vs_mh_hashkey(svc->af, &dest->addr,	dest->port,
> > -					  &s->hash1, 0) % IP_VS_MH_LOOKUP_SIZE;
> > -		skip = ip_vs_mh_hashkey(svc->af, &dest->addr,
> > -					dest->port, &s->hash2, 0) %
> > -					(IP_VS_MH_LOOKUP_SIZE - 1) + 1;
> > -
> > -		for (j = 0; j < IP_VS_MH_LOOKUP_SIZE; j++) {
> > -			s->permutation[i][j] = (offset + (j * skip)) %
> > -						IP_VS_MH_LOOKUP_SIZE;
> > -		}
> > -		i++;
> > +
> > +		ds->offset = ip_vs_mh_hashkey(svc->af, &dest->addr,
> > +					      dest->port, &s->hash1, 0) %
> > +					      IP_VS_MH_TAB_SIZE;
> > +		ds->skip = ip_vs_mh_hashkey(svc->af, &dest->addr,
> > +					    dest->port, &s->hash2, 0) %
> > +					    (IP_VS_MH_TAB_SIZE - 1) + 1;
> > +		ds->perm = ds->offset % IP_VS_MH_TAB_SIZE;
> 
> 	offset is already < IP_VS_MH_TAB_SIZE, no need to mod.
> 

	It will be removed.

> > +
> > +		lw = atomic_read(&dest->last_weight);
> > +		ds->turns = (lw > 1) ? lw / s->gcd : lw;
> 
> 	May be we can move the rshift to the calcs here?
> 
> 	ds->turns = ((lw / s->gcd) >> s->rshift) ? : (lw != 0);
> 
> 	or optimized in different way but we must be sure
> that rshift will not clear turns when lw > 0.
> 

	I think it is very good optimized way.  I will move
rshift to turns calcs using this way.

> > +		ds++;
> >  	}
> >  
> >  	return 0;
> >  }
> >  
> > -static inline int
> > -ip_vs_mh_populate(struct ip_vs_mh_state *s, struct ip_vs_service *svc)
> > +static int ip_vs_mh_populate(struct ip_vs_mh_state *s,
> > +			     struct ip_vs_service *svc)
> >  {
> > -	int i, ret, dw_count;
> > -	unsigned int *next;
> > -	unsigned int **pmt;
> > +	int i, n;
> >  	struct ip_vs_mh_lookup *entry, *l;
> >  	struct list_head *p;
> >  	struct ip_vs_dest *dest;
> > -	unsigned int n, c;
> > +	struct ip_vs_mh_dest_setup *ds;
> > +	int c, rs, dt_count;
> >  	bool empty;
> >  
> > -	ret = 0;
> > -	next = kcalloc(IP_VS_MH_TAB_SIZE, sizeof(unsigned int),
> > -		       GFP_KERNEL);
> > -	if (!next)
> > -		return -ENOMEM;
> > -
> > -	entry = kcalloc(IP_VS_MH_LOOKUP_SIZE, sizeof(*entry),
> > +	entry = kcalloc(IP_VS_MH_TAB_SIZE, sizeof(*entry),
> >  			GFP_KERNEL);
> 
> 	We can avoid this allocation if s->gcd < 1 because
> entry is unused in this case.
> 
> > -	if (!entry) {
> > -		ret = -ENOMEM;
> > -		goto err_alloc;
> > -	}
> > +	if (!entry)
> > +		return -ENOMEM;
> >  
> > -	for (i = 0; i < IP_VS_MH_LOOKUP_SIZE; i++)
> > +	for (i = 0; i < IP_VS_MH_TAB_SIZE; i++)
> >  		RCU_INIT_POINTER(entry[i].dest, NULL);
> >  
> >  	p = &svc->destinations;
> > @@ -139,47 +147,61 @@ static inline int ip_vs_mh_permutate(struct ip_vs_mh_state *s,
> >  	if (empty)
> >  		goto out;
> 
> 	We can avoid above list_empty and to use
> empty = (s->gcd < 1) instead. Or may be 'empty' var is not
> need at all if we use bitmap and populate dests one by one,
> see below...
> 
> >  
> > +	/* If gcd is smaller then 1, the weight is zero for all dests.
> > +	 * so skip population for MH table and fills the table with NULLs.
> > +	 */
> > +	if (s->gcd < 1)
> > +		goto out;
> 
> 	if (s->gcd < 1)
> 		goto reset;
> 

	Yes, and reset label will be added for this case.

> > +
> >  	n = 0;
> > -	dw_count = 0;
> > -	pmt = s->permutation;
> > -	while (n < IP_VS_MH_LOOKUP_SIZE) {
> > +	rs = s->rshift;
> > +	dt_count = 0;
> > +	while (n < IP_VS_MH_TAB_SIZE) {
> >  		if (p == &svc->destinations)
> >  			p = p->next;
> >  
> > -		i = 0;
> > +		ds = &s->dest_setup[0];
> >  		while (p != &svc->destinations) {
> > -			c = pmt[i][next[i]];
> > +			/* ignore added server with zero weight */
> > +			if (ds->turns < 1) {
> > +				p = p->next;
> > +				ds++;
> > +				continue;
> > +			}
> >  
> > +			c = ds->perm;
> >  			while (entry[c].dest) {
> > -				next[i] = next[i] + 1;
> > -				c = pmt[i][next[i]];
> > +				/* Add skip, mod IP_VS_MH_TAB_SIZE */
> > +				ds->perm += ds->skip;
> > +				if (ds->perm >= IP_VS_MH_TAB_SIZE)
> > +					ds->perm -= IP_VS_MH_TAB_SIZE;
> > +				c = ds->perm;
> >  			}
> >  
> >  			dest = list_entry(p, struct ip_vs_dest, n_list);
> >  			RCU_INIT_POINTER(entry[c].dest, dest);
> 
> 	entry is not visible to RCU readers, so normal access
> is enough. It can be even a bitmap to reduce the memory usage
> 32 or 64 times, eg. we can use 'table' instead of 'entry':
> 
> 	unsigned long *table;
> 	table = kcalloc(BITS_TO_LONGS(IP_VS_MH_TAB_SIZE),
> 			sizeof(unsigned long));
> 
> 	then 'while (entry[c].dest)' becomes 'while (test_bit(c, table))',
> 	'entry[c].dest = dest;' becomes '__set_bit(c, table);'
> 
> 	And we can directly populate the entry:
> 
> 	dest = rcu_dereference_protected(s->lookup[c].dest, 1);
> 	dest2 = list_entry(p, struct ip_vs_dest, n_list);
> 	if (dest != dest2) {
> 		if (dest)
> 			ip_vs_dest_put(dest);
> 		ip_vs_dest_hold(dest2);
> 		RCU_INIT_POINTER(s->lookup[c].dest, dest2);
> 	}
> 

	Ok. Replacing arrays with bitmap will be useful to reduce
memory usuage so It will be replaced in the next patches.

> >  
> > -			next[i] = next[i] + 1;
> > -			n++;
> > -			if (n == IP_VS_MH_LOOKUP_SIZE)
> > -				break;
> > +			if (++n == IP_VS_MH_TAB_SIZE)
> > +				goto out;
> >  
> > -			if (++dw_count >= atomic_read(&dest->weight)) {
> > +			if (++dt_count >= (ds->turns >> rs)) {
> 
> 	turns can be already rshift-ed in ip_vs_mh_permutate.
> 
> > +				dt_count = 0;
> >  				p = p->next;
> > -				dw_count = 0;
> > -				i++;
> > +				ds++;
> >  			}
> >  		}
> >  	}
> 
> 	Here we should jump to kfree(table):
> 
> 	goto out;
> 
> >  
> >  out:
> 
> out: becomes reset:
> 
> 	This loop will remain for the 's->gcd < 1' case only:
> 
> >  	l = &s->lookup[0];
> > -	for (i = 0; i < IP_VS_MH_LOOKUP_SIZE; i++) {
> > -		dest = rcu_dereference_protected(entry[i].dest, 1);
> > +	for (i = 0; i < IP_VS_MH_TAB_SIZE; i++) {
> > +		dest = rcu_dereference_protected(l->dest, 1);
> >  		if (dest)
> >  			ip_vs_dest_put(dest);
> >  		if (empty) {
> >  			RCU_INIT_POINTER(l->dest, NULL);
> >  		} else {
> 
> 	This block will be removed:
> 
> > +			dest = rcu_dereference_protected(entry[i].dest, 1);
> >  			ip_vs_dest_hold(dest);
> >  			RCU_INIT_POINTER(l->dest, dest);
> >  
> > @@ -193,9 +215,7 @@ static inline int ip_vs_mh_permutate(struct ip_vs_mh_state *s,
> >  	}
> >  
> 
> out: label comes here to kfree bitmap 'table'.
> 
> >  	kfree(entry);
> > -err_alloc:
> > -	kfree(next);
> > -	return ret;
> > +	return 0;
> >  }
> >  
> >  /* Get ip_vs_dest associated with supplied parameters. */
> > @@ -204,14 +224,13 @@ static inline int ip_vs_mh_permutate(struct ip_vs_mh_state *s,
> >  	     const union nf_inet_addr *addr, __be16 port)
> >  {
> >  	unsigned int hash = ip_vs_mh_hashkey(svc->af, addr, port, &s->hash1, 0)
> > -					     % IP_VS_MH_LOOKUP_SIZE;
> > +					     % IP_VS_MH_TAB_SIZE;
> >  	struct ip_vs_dest *dest = rcu_dereference(s->lookup[hash].dest);
> >  
> >  	return (!dest || is_unavailable(dest)) ? NULL : dest;
> >  }
> >  
> > -/* As ip_vs_mh_get, but with fallback if selected server is unavailable
> > - */
> > +/* As ip_vs_mh_get, but with fallback if selected server is unavailable */
> >  static inline struct ip_vs_dest *
> >  ip_vs_mh_get_fallback(struct ip_vs_service *svc, struct ip_vs_mh_state *s,
> >  		      const union nf_inet_addr *addr, __be16 port)
> > @@ -222,7 +241,7 @@ static inline int ip_vs_mh_permutate(struct ip_vs_mh_state *s,
> >  
> >  	/* first try the dest it's supposed to go to */
> >  	ihash = ip_vs_mh_hashkey(svc->af, addr, port,
> > -				 &s->hash1, 0) % IP_VS_MH_LOOKUP_SIZE;
> > +				 &s->hash1, 0) % IP_VS_MH_TAB_SIZE;
> >  	dest = rcu_dereference(s->lookup[ihash].dest);
> >  	if (!dest)
> >  		return NULL;
> > @@ -235,10 +254,10 @@ static inline int ip_vs_mh_permutate(struct ip_vs_mh_state *s,
> >  	/* if the original dest is unavailable, loop around the table
> >  	 * starting from ihash to find a new dest
> >  	 */
> > -	for (offset = 0; offset < IP_VS_MH_LOOKUP_SIZE; offset++) {
> > -		roffset = (offset + ihash) % IP_VS_MH_LOOKUP_SIZE;
> > +	for (offset = 0; offset < IP_VS_MH_TAB_SIZE; offset++) {
> > +		roffset = (offset + ihash) % IP_VS_MH_TAB_SIZE;
> >  		hash = ip_vs_mh_hashkey(svc->af, addr, port, &s->hash1,
> > -					roffset) % IP_VS_MH_LOOKUP_SIZE;
> > +					roffset) % IP_VS_MH_TAB_SIZE;
> >  		dest = rcu_dereference(s->lookup[hash].dest);
> >  		if (!dest)
> >  			break;
> > @@ -261,7 +280,7 @@ static void ip_vs_mh_flush(struct ip_vs_mh_state *s)
> >  	struct ip_vs_dest *dest;
> >  
> >  	l = &s->lookup[0];
> > -	for (i = 0; i < IP_VS_MH_LOOKUP_SIZE; i++) {
> > +	for (i = 0; i < IP_VS_MH_TAB_SIZE; i++) {
> >  		dest = rcu_dereference_protected(l->dest, 1);
> >  		if (dest) {
> >  			ip_vs_dest_put(dest);
> > @@ -271,46 +290,85 @@ static void ip_vs_mh_flush(struct ip_vs_mh_state *s)
> >  	}
> >  }
> >  
> > -/* Assign all the hash buckets of the specified table with the service.
> > - */
> > -static int
> > -ip_vs_mh_reassign(struct ip_vs_mh_state *s, struct ip_vs_service *svc)
> > +/* Assign all the hash buckets of the specified table with the service. */
> > +static int ip_vs_mh_reassign(struct ip_vs_mh_state *s,
> > +			     struct ip_vs_service *svc)
> >  {
> >  	int ret;
> >  
> > -	/* ip_vs_mh_reassign is responsible for assigning
> > -	 * and releasing s->permutation table
> > -	 */
> > -	s->permutation = vzalloc(IP_VS_MH_TAB_SIZE * sizeof(unsigned int *));
> > -	if (!s->permutation)
> > -		return -ENOMEM;
> > +	if (svc->num_dests > IP_VS_MH_TAB_SIZE)
> > +		return -EINVAL;
> >  
> > -	s->permutation[0] = vzalloc(IP_VS_MH_TAB_SIZE * IP_VS_MH_LOOKUP_SIZE *
> > -				    sizeof(unsigned int));
> > -	if (!s->permutation[0]) {
> > -		ret = -ENOMEM;
> > -		goto err_alloc;
> > -	}
> > +	s->dest_setup = kcalloc(IP_VS_MH_TAB_SIZE,
> > +				sizeof(struct ip_vs_mh_dest_setup),
> > +				GFP_KERNEL);
> 
> 	For s->dest_setup we need to allocate svc->num_dests entries, 
> IP_VS_MH_TAB_SIZE is too large. But if num_dests=0 we should avoid
> the allocation, then after kfree(s->dest_setup) we should use
> s->dest_setup = NULL to avoid double free.
> 

	Yep, It will be fixed.

> > +	if (!s->dest_setup)
> > +		return -ENOMEM;
> >  
> > -	ret = ip_vs_mh_permutate(s, svc);
> > -	if (ret < 0)
> > -		goto err_out;
> > +	ip_vs_mh_permutate(s, svc);
> >  
> >  	ret = ip_vs_mh_populate(s, svc);
> >  	if (ret < 0)
> > -		goto err_out;
> > +		goto out;
> >  
> >  	IP_VS_DBG_BUF(6, "MH: reassign lookup table of %s:%d\n",
> >  		      IP_VS_DBG_ADDR(svc->af, &svc->addr),
> >  		      ntohs(svc->port));
> >  
> > -err_out:
> > -	vfree(s->permutation[0]);
> > -err_alloc:
> > -	vfree(s->permutation);
> > +out:
> > +	kfree(s->dest_setup);
> >  	return ret;
> >  }
> >  
> > +static int ip_vs_mh_gcd_weight(struct ip_vs_service *svc)
> > +{
> > +	struct ip_vs_dest *dest;
> > +	int weight;
> > +	int g = 0;
> > +
> > +	list_for_each_entry(dest, &svc->destinations, n_list) {
> > +		weight = atomic_read(&dest->last_weight);
> > +		if (weight > 0) {
> > +			if (g > 0)
> > +				g = gcd(weight, g);
> > +			else
> > +				g = weight;
> > +		}
> > +	}
> > +	return g;
> > +}
> > +
> > +/* To avoid assigning huge weight for the MH table,
> > + * calculate shift value with gcd.
> > + */
> > +static int ip_vs_mh_shift_weight(struct ip_vs_service *svc, int gcd)
> > +{
> > +	struct ip_vs_dest *dest;
> > +	int new_weight, weight = 0;
> > +	int mw, shift;
> > +
> > +	/* If gcd is smaller then 1, the weight is zero for all dests.
> > +	 * skip calculation rshift.
> > +	 */
> > +	if (gcd < 1)
> > +		return 0;
> > +
> > +	list_for_each_entry(dest, &svc->destinations, n_list) {
> > +		new_weight = atomic_read(&dest->last_weight);
> > +		if (new_weight > weight)
> > +			weight = new_weight;
> > +	}
> > +
> > +	/* Because gcd is greater than zero,
> > +	 * the maximum weight and gcd are always greater than zero
> > +	 */
> > +	mw = weight / gcd;
> > +
> > +	/* shift = occupied bits of weight/gcd - MH highest bits */
> > +	shift = fls(mw) - IP_VS_MH_TAB_BITS;
> > +	return (shift >= 0) ? shift : 0;
> > +}
> > +
> >  static int ip_vs_mh_init_svc(struct ip_vs_service *svc)
> >  {
> >  	struct ip_vs_mh_state *s;
> > @@ -320,18 +378,24 @@ static int ip_vs_mh_init_svc(struct ip_vs_service *svc)
> >  	if (!s)
> >  		return -ENOMEM;
> >  
> > -	svc->sched_data = s;
> > +	s->lookup = kcalloc(IP_VS_MH_TAB_SIZE, sizeof(struct ip_vs_mh_lookup),
> > +			    GFP_KERNEL);
> > +	if (!s->lookup) {
> > +		kfree_rcu(s, rcu_head);
> 
> 	kfree(s) is ok here.
> 
> > +		return -ENOMEM;
> > +	}
> > +
> > +	generate_hash_secret(&s->hash1, &s->hash2);
> > +	s->gcd = ip_vs_mh_gcd_weight(svc);
> > +	s->rshift = ip_vs_mh_shift_weight(svc, s->gcd);
> >  
> > +	svc->sched_data = s;
> >  	IP_VS_DBG(6,
> >  		  "MH lookup table (memory=%zdbytes) allocated for current service\n",
> > -		  sizeof(struct ip_vs_mh_lookup) * IP_VS_MH_LOOKUP_SIZE);
> > -
> > -	ip_vs_mh_generate_hash_secret(&s->hash1, &s->hash2);
> > +		  sizeof(struct ip_vs_mh_lookup) * IP_VS_MH_TAB_SIZE);
> >  
> >  	/* permutate & populate with current dests */
> > -	ip_vs_mh_reassign(s, svc);
> > -
> > -	return 0;
> > +	return ip_vs_mh_reassign(s, svc);
> >  }
> >  
> >  static void ip_vs_mh_done_svc(struct ip_vs_service *svc)
> > @@ -344,7 +408,7 @@ static void ip_vs_mh_done_svc(struct ip_vs_service *svc)
> >  	/* release the table itself */
> >  	kfree_rcu(s, rcu_head);
> 
> 	s->lookup is not freed, we should free both
> s and s->lookup in new RCU callback. It will need a change in
> ip_vs_mh_cleanup: synchronize_rcu() -> rcu_barrier(), so that
> module unload to wait for the callback to complete.
> See ip_vs_lblcr.c how call_rcu() is called to schedule
> callback. We will need:
> 
> static void ip_vs_mh_state_free(struct rcu_head *head)
> {
>         struct ip_vs_mh_state *s;
> 
> 	s = container_of(head, struct ip_vs_mh_state, rcu_head);
> 	kfree(s->lookup);
> 	kfree(s);
> }
> 

	I missed freeing s->lookup. Thanks for letting me know.
I will apply these reviews.

> >  	IP_VS_DBG(6, "MH lookup table (memory=%zdbytes) released\n",
> > -		  sizeof(struct ip_vs_mh_lookup) * IP_VS_MH_LOOKUP_SIZE);
> > +		  sizeof(struct ip_vs_mh_lookup) * IP_VS_MH_TAB_SIZE);
> >  }
> >  
> >  static int ip_vs_mh_dest_changed(struct ip_vs_service *svc,
> > @@ -352,10 +416,11 @@ static int ip_vs_mh_dest_changed(struct ip_vs_service *svc,
> >  {
> >  	struct ip_vs_mh_state *s = svc->sched_data;
> >  
> > -	/* assign the hash buckets with the updated service */
> > -	ip_vs_mh_reassign(s, svc);
> > +	s->gcd = ip_vs_mh_gcd_weight(svc);
> > +	s->rshift = ip_vs_mh_shift_weight(svc, s->gcd);
> >  
> > -	return 0;
> > +	/* assign the hash buckets with the updated service */
> > +	return ip_vs_mh_reassign(s, svc);
> >  }
> >  
> >  /* Helper function to get port number */
> > -- 
> > 1.8.3.1
> 
> Regards
> 
> --
> Julian Anastasov <ja@xxxxxx>
> 
> 

Regards

-- 
Inju Song
NAVER Corporation

--
To unsubscribe from this list: send the line "unsubscribe lvs-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Filesystem Devel]     [Linux NFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]     [X.Org]

  Powered by Linux