Search Postgresql Archives

Re: Multi-parameter aggregates.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Michael Fuhr wrote:

On Mon, Nov 21, 2005 at 11:03:22AM -0500, Berend Tober wrote:
I'm interested in defining a covariance aggregate function. (As a
...aggregates must take a single value, so the above won't
work as written.  However, in PostgreSQL 8.0 or later you could
define the aggregate's base type to be a composite type ...

Thanks for you help. Here is what I came up with, using the existing POINT data type, which when you think about it makes a lot of sense, and even thoug having to explicitly cast a pair of columns as a point type is a little cludgy, I'm get the analytical tool I need, regardless.

CREATE TYPE public._regression AS (
   n integer,
   x double precision,
   y double precision,
   xy double precision
   );


CREATE OR REPLACE FUNCTION public.weighted_average(double precision, double precision, double precision)
   RETURNS double precision AS '
   BEGIN
   IF $1>1.0 THEN
RAISE EXCEPTION ''Weighted average % coefficient exceeds unity.'', $1;
   ELSIF $1<0 THEN
RAISE EXCEPTION ''Weighted average % coefficient less than zero.'', $1;
   END IF;
   RETURN $1*$2 + (1.-$1)*$3;
   END;
   'LANGUAGE 'plpgsql' IMMUTABLE STRICT;


CREATE OR REPLACE FUNCTION public.regression_accum(_regression, point)
   RETURNS _regression AS '
   SELECT
       $1.n+1,
       weighted_average($1.n/($1.n+1.), $1.x, $2[0]),
       weighted_average($1.n/($1.n+1.), $1.y, $2[1]),
       weighted_average($1.n/($1.n+1.), $1.xy, $2[0]*$2[1]);
   'LANGUAGE 'sql' IMMUTABLE STRICT;
GRANT EXECUTE ON FUNCTION public.regression_accum(_regression, point) TO public;
COMMENT ON FUNCTION public.regression_accum(_regression, point)
   IS 'regression aggregate transition function';

CREATE OR REPLACE FUNCTION public.covariance(_regression)
   RETURNS double precision AS '
   BEGIN
   IF $1.n = 0 THEN
       RAISE EXCEPTION ''No covariance data'';
   END IF;
   RETURN $1.xy - $1.x*$1.y;
   END;
   'LANGUAGE 'plpgsql' IMMUTABLE STRICT;
GRANT EXECUTE ON FUNCTION public.covariance(_regression) TO public;
COMMENT ON FUNCTION public.covariance(_regression)
   IS 'cov aggregate final function';

CREATE AGGREGATE public.cov(
 BASETYPE=point,
 SFUNC=regression_accum,
 STYPE=_regression,
 FINALFUNC=covariance,
 INITCOND='(0,0,0,0)'
);

-- Examples

CREATE LOCAL TEMPORARY TABLE test (
   example integer,
   x numeric,
   y numeric
) without oids;

-- Hand calculated: cov(x,y)=0.25
INSERT INTO test VALUES (1, 0.0, 0.0);
INSERT INTO test VALUES (1, 1.0, 1.0);

-- Hand calculated: cov(x,x)=0.389, cov(y,y)=1.556, cov(x,y)=0.778
INSERT INTO test VALUES (2, 0.0, 0.0);
INSERT INTO test VALUES (2, 1.0, 2.0);
INSERT INTO test VALUES (2, 1.5, 3.0);

-- Hand calculated: cov(x,x)=0.264, cov(y,y)=0.192, cov(x,y)=0.214
INSERT INTO test VALUES (3, 0.25, 0.125);
INSERT INTO test VALUES (3, 1.0, 1.0);
INSERT INTO test VALUES (3, 1.5, 1.1);


--
http://www.quantlet.com/mdstat/scripts/mva/htmlbook/mvahtmlnode22.html: cov(x,y) = -80.02
INSERT INTO test VALUES (4, 230, 125);
INSERT INTO test VALUES (4, 181,  99);
INSERT INTO test VALUES (4, 165,  97);
INSERT INTO test VALUES (4, 150, 115);
INSERT INTO test VALUES (4,  97, 120);
INSERT INTO test VALUES (4, 192, 100);
INSERT INTO test VALUES (4, 181,  80);
INSERT INTO test VALUES (4, 189,  90);
INSERT INTO test VALUES (4, 172,  95);
INSERT INTO test VALUES (4, 170, 125);

/*
Note that the value COV(X,X) and COV(Y,Y) do not equal output from
the native Postgresql aggregates VARIANCE(x) and VARIANCE(y),
respectively. I cannot explain why they are different, but COV()
is consistent with direct calculation.
*/

SELECT
   'Example '||example AS title,
   COUNT(*) AS N,
   COV(POINT(x,x)) AS covXX,
   COV(POINT(y,y)) AS covYY,
   COV(POINT(x,y)) AS covXY,
   VARIANCE(x),
   VARIANCE(y)
   FROM test
   GROUP BY 1
   ORDER BY 1;



[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Postgresql Jobs]     [Postgresql Admin]     [Postgresql Performance]     [Linux Clusters]     [PHP Home]     [PHP on Windows]     [Kernel Newbies]     [PHP Classes]     [PHP Books]     [PHP Databases]     [Postgresql & PHP]     [Yosemite]
  Powered by Linux