On Wed, 22 Apr 2009, Matthew Wakeling wrote:
I will post a patch when I have ported my bioseg code over to the seg data
type.
Here is my patch ported over to the seg contrib package, attached. Apply
it to seg.c and all should be well. A similar thing needs to be done to
cube, but I haven't looked at that.
Matthew
--
An optimist sees the glass as half full, a pessimist as half empty,
and an engineer as having redundant storage capacity.
--- seg.c 2006-09-10 18:36:51.000000000 +0100
+++ seg.c_new 2009-04-22 17:03:13.000000000 +0100
@@ -69,7 +69,6 @@
bool seg_right(SEG * a, SEG * b);
bool seg_over_right(SEG * a, SEG * b);
SEG *seg_union(SEG * a, SEG * b);
-SEG *seg_inter(SEG * a, SEG * b);
void rt_seg_size(SEG * a, float *sz);
float *seg_size(SEG * a);
@@ -269,14 +268,22 @@
gseg_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result)
{
SEG *ud;
+ SEG *origrange;
+ SEG *newrange;
float tmp1,
tmp2;
- ud = seg_union((SEG *) DatumGetPointer(origentry->key),
- (SEG *) DatumGetPointer(newentry->key));
+ origrange = (SEG *) DatumGetPointer(origentry->key);
+ newrange = (SEG *) DatumGetPointer(newentry->key);
+ ud = seg_union(origrange, newrange);
rt_seg_size(ud, &tmp1);
- rt_seg_size((SEG *) DatumGetPointer(origentry->key), &tmp2);
- *result = tmp1 - tmp2;
+ rt_seg_size(origrange, &tmp2);
+ if (tmp1 == tmp2) {
+ rt_seg_size(newrange, &tmp1);
+ *result = (tmp2 - tmp1) / tmp2;
+ } else {
+ *result = tmp1 - tmp2 + 1.0;
+ }
#ifdef GIST_DEBUG
fprintf(stderr, "penalty\n");
@@ -297,27 +304,16 @@
GIST_SPLITVEC *v)
{
OffsetNumber i,
- j;
SEG *datum_alpha,
- *datum_beta;
SEG *datum_l,
*datum_r;
- SEG *union_d,
- *union_dl,
- *union_dr;
- SEG *inter_d;
- bool firsttime;
- float size_alpha,
- size_beta,
- size_union,
- size_inter;
- float size_waste,
- waste;
- float size_l,
- size_r;
+ bool firsttime;
+ float lowest,
+ highest,
+ midpoint,
+ split,
+ midpointsum;
int nbytes;
- OffsetNumber seed_1 = 1,
- seed_2 = 2;
OffsetNumber *left,
*right;
OffsetNumber maxoff;
@@ -326,107 +322,64 @@
fprintf(stderr, "picksplit\n");
#endif
- maxoff = entryvec->n - 2;
+ maxoff = entryvec->n - 1;
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
v->spl_left = (OffsetNumber *) palloc(nbytes);
v->spl_right = (OffsetNumber *) palloc(nbytes);
+ midpointsum = 0.0;
firsttime = true;
- waste = 0.0;
+ lowest = 0.0;
+ highest = 0.0;
- for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i))
+ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
{
datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[i].key);
- for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j))
- {
- datum_beta = (SEG *) DatumGetPointer(entryvec->vector[j].key);
-
- /* compute the wasted space by unioning these guys */
- /* size_waste = size_union - size_inter; */
- union_d = seg_union(datum_alpha, datum_beta);
- rt_seg_size(union_d, &size_union);
- inter_d = seg_inter(datum_alpha, datum_beta);
- rt_seg_size(inter_d, &size_inter);
- size_waste = size_union - size_inter;
-
- /*
- * are these a more promising split that what we've already seen?
- */
- if (size_waste > waste || firsttime)
- {
- waste = size_waste;
- seed_1 = i;
- seed_2 = j;
- firsttime = false;
- }
- }
+ midpoint = (datum_alpha->lower + datum_alpha->upper) / 2;
+ midpointsum += midpoint;
+ if (firsttime || (midpoint < lowest))
+ {
+ lowest = midpoint;
+ }
+ if (firsttime || (midpoint > highest))
+ {
+ highest = midpoint;
+ }
+ firsttime = false;
}
+ split = midpointsum / maxoff;
left = v->spl_left;
v->spl_nleft = 0;
right = v->spl_right;
v->spl_nright = 0;
- datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[seed_1].key);
- datum_l = seg_union(datum_alpha, datum_alpha);
- rt_seg_size(datum_l, &size_l);
- datum_beta = (SEG *) DatumGetPointer(entryvec->vector[seed_2].key);
- datum_r = seg_union(datum_beta, datum_beta);
- rt_seg_size(datum_r, &size_r);
+ datum_l = (SEG *) palloc(sizeof(*datum_l));
+ datum_l->lower = lowest;
+ datum_l->upper = lowest;
+ datum_r = (SEG *) palloc(sizeof(*datum_r));
+ datum_r->lower = highest;
+ datum_r->upper = highest;
/*
- * Now split up the regions between the two seeds. An important property
- * of this split algorithm is that the split vector v has the indices of
- * items to be split in order in its left and right vectors. We exploit
- * this property by doing a merge in the code that actually splits the
- * page.
- *
- * For efficiency, we also place the new index tuple in this loop. This is
- * handled at the very end, when we have placed all the existing tuples
- * and i == maxoff + 1.
+ * Now split up the regions between the two seeds.
*/
- maxoff = OffsetNumberNext(maxoff);
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
{
- /*
- * If we've already decided where to place this item, just put it on
- * the right list. Otherwise, we need to figure out which page needs
- * the least enlargement in order to store the item.
- */
-
- if (i == seed_1)
- {
- *left++ = i;
- v->spl_nleft++;
- continue;
- }
- else if (i == seed_2)
- {
- *right++ = i;
- v->spl_nright++;
- continue;
- }
-
- /* okay, which page needs least enlargement? */
datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[i].key);
- union_dl = seg_union(datum_l, datum_alpha);
- union_dr = seg_union(datum_r, datum_alpha);
- rt_seg_size(union_dl, &size_alpha);
- rt_seg_size(union_dr, &size_beta);
+ midpoint = (datum_alpha->lower + datum_alpha->upper) / 2;
/* pick which page to add it to */
- if (size_alpha - size_l < size_beta - size_r)
+ if (mipoint <= split)
{
- datum_l = union_dl;
- size_l = size_alpha;
+ datum_l = seg_union(datum_l, datum_alpha);
*left++ = i;
v->spl_nleft++;
}
else
{
- datum_r = union_dr;
- size_r = size_alpha;
+ datum_r = seg_union(datum_r, datum_alpha);
*right++ = i;
v->spl_nright++;
}
@@ -665,45 +618,6 @@
return (n);
}
-
-SEG *
-seg_inter(SEG * a, SEG * b)
-{
- SEG *n;
-
- n = (SEG *) palloc(sizeof(*n));
-
- /* take min of upper endpoints */
- if (a->upper < b->upper)
- {
- n->upper = a->upper;
- n->u_sigd = a->u_sigd;
- n->u_ext = a->u_ext;
- }
- else
- {
- n->upper = b->upper;
- n->u_sigd = b->u_sigd;
- n->u_ext = b->u_ext;
- }
-
- /* take max of lower endpoints */
- if (a->lower > b->lower)
- {
- n->lower = a->lower;
- n->l_sigd = a->l_sigd;
- n->l_ext = a->l_ext;
- }
- else
- {
- n->lower = b->lower;
- n->l_sigd = b->l_sigd;
- n->l_ext = b->l_ext;
- }
-
- return (n);
-}
-
void
rt_seg_size(SEG * a, float *size)
{
--
Sent via pgsql-performance mailing list (pgsql-performance@xxxxxxxxxxxxxx)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-performance