Search Postgresql Archives

Re: intarray internals

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

I've prepared a Quick & Dirty patch serie for some missing parts in
intarray contrib module. Here they go with some explanation...


On May 06 12:38, Volkan YAZICI wrote:
> [4]
> In the inner_int_contains() function of _int_tool.c, there's a while
> loop like
> 
> [Code assumes that arrays are sorted.]
> 
> na = ARRNELEMS(a);
> nb = ARRNELEMS(b);
> da = ARRPTR(a);
> db = ARRPTR(b);
> 
> i = j = n = 0;
> while (i < na && j < nb)
>     if (da[i] < db[j])
>         i++;
>     else if (da[i] == db[j])
>     {
>         n++;
>         i++;
>         j++;
>     }
>     else
>         j++;
> 
> return (n == nb) ? TRUE : FALSE;
> 
> AFAICS, last "j++" should be replaced with "return FALSE". Because, "n"
> cannot be equal to "nb" no more, if "j" gets incremented without
> incrementing "n" (remember "j < nb" in the "while" condition).

intarray_contains.patch.0 is for above problem.


[5]
ISTM, inner_int_union() of _int_tool.c, makes some redundant visits to
array:

    ...
    /* union */
    i = j = 0;
    while (i < na && j < nb)
        if (da[i] < db[j])
            *dr++ = da[i++];
        else
            *dr++ = db[j++];

    while (i < na)
        *dr++ = da[i++];
    while (j < nb)
        *dr++ = db[j++];

}

if (ARRNELEMS(r) > 1)
    r = _int_unique(r);

IMHO, uniting unique values (instead of uniting and then removing
duplicates) should work faster. intarray_union.patch.0 is for this
problem. (Patched code, handles uniting for unique values.)


[6]
There's a seperate sorting algorithm isort() in _int_tool.c. Looks like
it executes some kind of shell sort on the array and returns true if
array has any duplicates. It's used for common sorting and deciding on
executing _int_unique() on the related array if isort() says it has
duplicates.

IIRC, our inner qsort() has a smaller O(N) degree when compared to above
sorting algorithm. Also for the code's sake it'd be better to switch
using qsort() in all sorting related stuff. For these reasons,
intarray_sort.patch.0 addresses this (probable) gotcha.


All 3 patches passed regression tests for intarray contrib module. But
these are just for addressing some gotchas I found while reading code.
Your comments for these problems(?) are welcome.


Regards.
Index: contrib/intarray/_int_tool.c
===================================================================
RCS file: /projects/cvsroot/pgsql/contrib/intarray/_int_tool.c,v
retrieving revision 1.6
diff -c -r1.6 _int_tool.c
*** contrib/intarray/_int_tool.c	19 Nov 2005 03:00:09 -0000	1.6
--- contrib/intarray/_int_tool.c	6 May 2006 17:51:35 -0000
***************
*** 34,40 ****
  			j++;
  		}
  		else
! 			j++;
  
  	return (n == nb) ? TRUE : FALSE;
  }
--- 34,40 ----
  			j++;
  		}
  		else
! 			break;
  
  	return (n == nb) ? TRUE : FALSE;
  }
Index: contrib/intarray/_int_tool.c
===================================================================
RCS file: /projects/cvsroot/pgsql/contrib/intarray/_int_tool.c,v
retrieving revision 1.6
diff -c -r1.6 _int_tool.c
*** contrib/intarray/_int_tool.c	19 Nov 2005 03:00:09 -0000	1.6
--- contrib/intarray/_int_tool.c	7 May 2006 09:59:09 -0000
***************
*** 183,221 ****
  	return;
  }
  
- 
- /* len >= 2 */
- bool
- isort(int4 *a, int len)
- {
- 	int4		tmp,
- 				index;
- 	int4	   *cur,
- 			   *end;
- 	bool		r = FALSE;
- 
- 	end = a + len;
- 	do
- 	{
- 		index = 0;
- 		cur = a + 1;
- 		while (cur < end)
- 		{
- 			if (*(cur - 1) > *cur)
- 			{
- 				tmp = *(cur - 1);
- 				*(cur - 1) = *cur;
- 				*cur = tmp;
- 				index = 1;
- 			}
- 			else if (!r && *(cur - 1) == *cur)
- 				r = TRUE;
- 			cur++;
- 		}
- 	} while (index);
- 	return r;
- }
- 
  ArrayType *
  new_intArrayType(int num)
  {
--- 183,188 ----
Index: contrib/intarray/_int.h
===================================================================
RCS file: /projects/cvsroot/pgsql/contrib/intarray/_int.h,v
retrieving revision 1.9
diff -c -r1.9 _int.h
*** contrib/intarray/_int.h	3 May 2006 16:31:07 -0000	1.9
--- contrib/intarray/_int.h	7 May 2006 09:59:10 -0000
***************
*** 38,56 ****
  
  #define ARRISVOID(x)  ((x) == NULL || ARRNELEMS(x) == 0)
  
- #define SORT(x) \
- 	do { \
- 		 if ( ARRNELEMS( x ) > 1 ) \
- 			isort( ARRPTR( x ), ARRNELEMS( x ) ); \
- 	} while(0)
- 
- #define PREPAREARR(x) \
- 	do { \
- 		 if ( ARRNELEMS( x ) > 1 ) \
- 			if ( isort( ARRPTR( x ), ARRNELEMS( x ) ) ) \
- 				x = _int_unique( x ); \
- 	} while(0)
- 
  /* "wish" function */
  #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
  
--- 38,43 ----
***************
*** 105,111 ****
  /*
  ** useful function
  */
- bool		isort(int4 *a, const int len);
  ArrayType  *new_intArrayType(int num);
  ArrayType  *copy_intArrayType(ArrayType *a);
  ArrayType  *resize_intArrayType(ArrayType *a, int num);
--- 92,97 ----
***************
*** 171,173 ****
--- 157,168 ----
  if (ARRNELEMS(a) > 1)											\
  		qsort((void*)ARRPTR(a), ARRNELEMS(a),sizeof(int4),		\
  				(direction) ? compASC : compDESC )
+ 
+ #define SORT(x)	QSORT(x, 1)
+ 
+ #define PREPAREARR(x) \
+ 	if (ARRNELEMS(x) > 1) \
+ 	{ \
+ 		SORT(x); \
+ 		x = _int_unique( x ); \
+ 	}
Index: contrib/intarray/_int_tool.c
===================================================================
RCS file: /projects/cvsroot/pgsql/contrib/intarray/_int_tool.c,v
retrieving revision 1.6
diff -c -r1.6 _int_tool.c
*** contrib/intarray/_int_tool.c	19 Nov 2005 03:00:09 -0000	1.6
--- contrib/intarray/_int_tool.c	6 May 2006 17:49:13 -0000
***************
*** 94,103 ****
  	if (ARRISVOID(b))
  		r = copy_intArrayType(a);
  
! 	if (r)
! 		dr = ARRPTR(r);
  	else
  	{
  		na = ARRNELEMS(a);
  		nb = ARRNELEMS(b);
  		da = ARRPTR(a);
--- 94,108 ----
  	if (ARRISVOID(b))
  		r = copy_intArrayType(a);
  
! 	if (r)	/* Only one of the arrays isn't void and it's assigned to r. */
! 	{
! 		if (ARRNELEMS(r) > 1)
! 			r = _int_unique(r);				/* Remove repetitive values. */
! 	}
  	else
  	{
+ 		int k;
+ 		
  		na = ARRNELEMS(a);
  		nb = ARRNELEMS(b);
  		da = ARRPTR(a);
***************
*** 106,128 ****
  		r = new_intArrayType(na + nb);
  		dr = ARRPTR(r);
  
! 		/* union */
! 		i = j = 0;
! 		while (i < na && j < nb)
! 			if (da[i] < db[j])
! 				*dr++ = da[i++];
! 			else
! 				*dr++ = db[j++];
! 
! 		while (i < na)
! 			*dr++ = da[i++];
! 		while (j < nb)
! 			*dr++ = db[j++];
! 
  	}
  
! 	if (ARRNELEMS(r) > 1)
! 		r = _int_unique(r);
  
  	return r;
  }
--- 111,164 ----
  		r = new_intArrayType(na + nb);
  		dr = ARRPTR(r);
  
! /* Move i to next new integer in the array. */
! #define NEXT_NEW(arr, len, i) \
! 	if (i < len) \
! 	{ \
! 		while (++i < len) \
! 			if (arr[i] != arr[i-1]) \
! 				break; \
  	}
+ 		
+ 		/*
+ 		 * Form sorted union of arrays A and B.
+ 		 * (Repetitive values will be discarded.)
+ 		 */
+ 		for (i = j = k = 0; i < na || j < nb; k++)
+ 		{
+ 			if (i >= na)					/* A is exhausted. */
+ 			{
+ 				dr[k] = db[j];
+ 				NEXT_NEW(db, nb, j);
+ 			}
+ 			else if (j >= nb) 				/* B is exhausted. */
+ 			{
+ 				dr[k] = da[i];
+ 				NEXT_NEW(da, na, i);
+ 			}
+ 			else 							/* a[] and b[] still isn't exhausted. */
+ 			{
+ 				if (da[i] < db[j])
+ 				{
+ 					dr[k] = da[i];
+ 					NEXT_NEW(da, na, i);
+ 				}
+ 				else if (da[i] > db[j])
+ 				{
+ 					dr[k] = db[j];
+ 					NEXT_NEW(db, nb, j);
+ 				}
+ 				else
+ 				{
+ 					dr[k] = da[i];
+ 					NEXT_NEW(da, na, i);
+ 					NEXT_NEW(db, nb, j);
+ 				}
+ 			}
+ 		}
  
! 		r = resize_intArrayType(r, k);
! 	}
  
  	return r;
  }

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Postgresql Jobs]     [Postgresql Admin]     [Postgresql Performance]     [Linux Clusters]     [PHP Home]     [PHP on Windows]     [Kernel Newbies]     [PHP Classes]     [PHP Books]     [PHP Databases]     [Postgresql & PHP]     [Yosemite]
  Powered by Linux