Hi, I've prepared a Quick & Dirty patch serie for some missing parts in intarray contrib module. Here they go with some explanation... On May 06 12:38, Volkan YAZICI wrote: > [4] > In the inner_int_contains() function of _int_tool.c, there's a while > loop like > > [Code assumes that arrays are sorted.] > > na = ARRNELEMS(a); > nb = ARRNELEMS(b); > da = ARRPTR(a); > db = ARRPTR(b); > > i = j = n = 0; > while (i < na && j < nb) > if (da[i] < db[j]) > i++; > else if (da[i] == db[j]) > { > n++; > i++; > j++; > } > else > j++; > > return (n == nb) ? TRUE : FALSE; > > AFAICS, last "j++" should be replaced with "return FALSE". Because, "n" > cannot be equal to "nb" no more, if "j" gets incremented without > incrementing "n" (remember "j < nb" in the "while" condition). intarray_contains.patch.0 is for above problem. [5] ISTM, inner_int_union() of _int_tool.c, makes some redundant visits to array: ... /* union */ i = j = 0; while (i < na && j < nb) if (da[i] < db[j]) *dr++ = da[i++]; else *dr++ = db[j++]; while (i < na) *dr++ = da[i++]; while (j < nb) *dr++ = db[j++]; } if (ARRNELEMS(r) > 1) r = _int_unique(r); IMHO, uniting unique values (instead of uniting and then removing duplicates) should work faster. intarray_union.patch.0 is for this problem. (Patched code, handles uniting for unique values.) [6] There's a seperate sorting algorithm isort() in _int_tool.c. Looks like it executes some kind of shell sort on the array and returns true if array has any duplicates. It's used for common sorting and deciding on executing _int_unique() on the related array if isort() says it has duplicates. IIRC, our inner qsort() has a smaller O(N) degree when compared to above sorting algorithm. Also for the code's sake it'd be better to switch using qsort() in all sorting related stuff. For these reasons, intarray_sort.patch.0 addresses this (probable) gotcha. All 3 patches passed regression tests for intarray contrib module. But these are just for addressing some gotchas I found while reading code. Your comments for these problems(?) are welcome. Regards.
Index: contrib/intarray/_int_tool.c =================================================================== RCS file: /projects/cvsroot/pgsql/contrib/intarray/_int_tool.c,v retrieving revision 1.6 diff -c -r1.6 _int_tool.c *** contrib/intarray/_int_tool.c 19 Nov 2005 03:00:09 -0000 1.6 --- contrib/intarray/_int_tool.c 6 May 2006 17:51:35 -0000 *************** *** 34,40 **** j++; } else ! j++; return (n == nb) ? TRUE : FALSE; } --- 34,40 ---- j++; } else ! break; return (n == nb) ? TRUE : FALSE; }
Index: contrib/intarray/_int_tool.c =================================================================== RCS file: /projects/cvsroot/pgsql/contrib/intarray/_int_tool.c,v retrieving revision 1.6 diff -c -r1.6 _int_tool.c *** contrib/intarray/_int_tool.c 19 Nov 2005 03:00:09 -0000 1.6 --- contrib/intarray/_int_tool.c 7 May 2006 09:59:09 -0000 *************** *** 183,221 **** return; } - - /* len >= 2 */ - bool - isort(int4 *a, int len) - { - int4 tmp, - index; - int4 *cur, - *end; - bool r = FALSE; - - end = a + len; - do - { - index = 0; - cur = a + 1; - while (cur < end) - { - if (*(cur - 1) > *cur) - { - tmp = *(cur - 1); - *(cur - 1) = *cur; - *cur = tmp; - index = 1; - } - else if (!r && *(cur - 1) == *cur) - r = TRUE; - cur++; - } - } while (index); - return r; - } - ArrayType * new_intArrayType(int num) { --- 183,188 ---- Index: contrib/intarray/_int.h =================================================================== RCS file: /projects/cvsroot/pgsql/contrib/intarray/_int.h,v retrieving revision 1.9 diff -c -r1.9 _int.h *** contrib/intarray/_int.h 3 May 2006 16:31:07 -0000 1.9 --- contrib/intarray/_int.h 7 May 2006 09:59:10 -0000 *************** *** 38,56 **** #define ARRISVOID(x) ((x) == NULL || ARRNELEMS(x) == 0) - #define SORT(x) \ - do { \ - if ( ARRNELEMS( x ) > 1 ) \ - isort( ARRPTR( x ), ARRNELEMS( x ) ); \ - } while(0) - - #define PREPAREARR(x) \ - do { \ - if ( ARRNELEMS( x ) > 1 ) \ - if ( isort( ARRPTR( x ), ARRNELEMS( x ) ) ) \ - x = _int_unique( x ); \ - } while(0) - /* "wish" function */ #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) ) --- 38,43 ---- *************** *** 105,111 **** /* ** useful function */ - bool isort(int4 *a, const int len); ArrayType *new_intArrayType(int num); ArrayType *copy_intArrayType(ArrayType *a); ArrayType *resize_intArrayType(ArrayType *a, int num); --- 92,97 ---- *************** *** 171,173 **** --- 157,168 ---- if (ARRNELEMS(a) > 1) \ qsort((void*)ARRPTR(a), ARRNELEMS(a),sizeof(int4), \ (direction) ? compASC : compDESC ) + + #define SORT(x) QSORT(x, 1) + + #define PREPAREARR(x) \ + if (ARRNELEMS(x) > 1) \ + { \ + SORT(x); \ + x = _int_unique( x ); \ + }
Index: contrib/intarray/_int_tool.c =================================================================== RCS file: /projects/cvsroot/pgsql/contrib/intarray/_int_tool.c,v retrieving revision 1.6 diff -c -r1.6 _int_tool.c *** contrib/intarray/_int_tool.c 19 Nov 2005 03:00:09 -0000 1.6 --- contrib/intarray/_int_tool.c 6 May 2006 17:49:13 -0000 *************** *** 94,103 **** if (ARRISVOID(b)) r = copy_intArrayType(a); ! if (r) ! dr = ARRPTR(r); else { na = ARRNELEMS(a); nb = ARRNELEMS(b); da = ARRPTR(a); --- 94,108 ---- if (ARRISVOID(b)) r = copy_intArrayType(a); ! if (r) /* Only one of the arrays isn't void and it's assigned to r. */ ! { ! if (ARRNELEMS(r) > 1) ! r = _int_unique(r); /* Remove repetitive values. */ ! } else { + int k; + na = ARRNELEMS(a); nb = ARRNELEMS(b); da = ARRPTR(a); *************** *** 106,128 **** r = new_intArrayType(na + nb); dr = ARRPTR(r); ! /* union */ ! i = j = 0; ! while (i < na && j < nb) ! if (da[i] < db[j]) ! *dr++ = da[i++]; ! else ! *dr++ = db[j++]; ! ! while (i < na) ! *dr++ = da[i++]; ! while (j < nb) ! *dr++ = db[j++]; ! } ! if (ARRNELEMS(r) > 1) ! r = _int_unique(r); return r; } --- 111,164 ---- r = new_intArrayType(na + nb); dr = ARRPTR(r); ! /* Move i to next new integer in the array. */ ! #define NEXT_NEW(arr, len, i) \ ! if (i < len) \ ! { \ ! while (++i < len) \ ! if (arr[i] != arr[i-1]) \ ! break; \ } + + /* + * Form sorted union of arrays A and B. + * (Repetitive values will be discarded.) + */ + for (i = j = k = 0; i < na || j < nb; k++) + { + if (i >= na) /* A is exhausted. */ + { + dr[k] = db[j]; + NEXT_NEW(db, nb, j); + } + else if (j >= nb) /* B is exhausted. */ + { + dr[k] = da[i]; + NEXT_NEW(da, na, i); + } + else /* a[] and b[] still isn't exhausted. */ + { + if (da[i] < db[j]) + { + dr[k] = da[i]; + NEXT_NEW(da, na, i); + } + else if (da[i] > db[j]) + { + dr[k] = db[j]; + NEXT_NEW(db, nb, j); + } + else + { + dr[k] = da[i]; + NEXT_NEW(da, na, i); + NEXT_NEW(db, nb, j); + } + } + } ! r = resize_intArrayType(r, k); ! } return r; }