Problem is that the query is not slow, it just stalls and never finishes ( I waited several hours for a 2 seconds query). Would it still be possible to derive an EXPLAIN ANALYZE from it in that case?
Another point is that, when I remove the percentile_disc function from the query, the problem is gone. This happens all the way in the end of the query when all WITH statements probably already materialized (see below).
By the way, I worked around the problem by splitting the query in two and work with an intermediary table. Still I'm curious how this is caused.
(And thanks for looking in to it !)
Tom
For brevity, here is the full query and table descriptions including indexes:
DROP TABLE IF EXISTS noisemodel.aspectclusters;
CREATE TABLE noisemodel.aspectclusters AS
WITH points AS (
SELECT a.*
FROM noisemodel.roofpoints a
INNER JOIN noisemodel.blocks b ON (a.gridid = b.gridid AND a.blockid = b.blockid)
WHERE slope >= 10 AND slope < 80
)
,ghostpoints AS (
SELECT
id ,
gridid,
blockid ,
buildingid,
pt ,
height,
aspect + 360 as aspect,
nx ,
ny ,
nz ,
slope ,
curvature,
coplanar
FROM points
WHERE aspect < 20
)
,allpoints AS (
SELECT * FROM points
UNION ALL
SELECT * FROM ghostpoints
)
,classes AS (
SELECT blockid, generate_series(0,390, 3) as class
FROM noisemodel.blocks
)
,aspect_buckets AS(
SELECT
a.blockid,
(width_bucket(aspect, 0, 390,(390/3)::int) * 3) bucket,
count(*) as count
FROM allpoints a
WHERE coplanar = 1 --only use clean planar points
GROUP BY a.blockid, bucket
ORDER BY a.blockid, bucket
)
,histogram AS (
SELECT
a.blockid,
class,
COALESCE(count,0) AS count
FROM classes a
LEFT JOIN aspect_buckets b ON (a.blockid = b.blockid AND a.class=b.bucket)
ORDER BY a.blockid, class
)
,histogram_avg AS (
SELECT blockid, class,count,
plv8_moving_avg(
array_agg(count) OVER (PARTITION BY blockid ORDER BY class ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)
) AS avg_count
FROM histogram
)
,leadlag AS (
SELECT blockid,class, count,avg_count,
lead(avg_count,1) OVER (PARTITION BY blockid ORDER BY class) AS lead,
lag(avg_count,1) OVER (PARTITION BY blockid ORDER BY class) AS lag
FROM histogram_avg
)
,result AS (
SELECT leadlag.blockid,
array_agg(class) peaks,
plv8_find_bottoms(array_agg(class),9,381) bottoms
FROM leadlag
WHERE 1=1
AND class > 9 AND class <= 381 --ignoring head and tail of histogram
AND (avg_count >= lead)
AND (avg_count > lag)
AND avg_count > 5
GROUP BY leadlag.blockid
ORDER BY leadlag.blockid
)
,grouped AS (
SELECT
a.id,a.blockid, pt, aspect,height,
COALESCE(peaks[width_bucket(a.aspect, bottoms)],peaks[1]) aspectclass
FROM allpoints a
INNER JOIN result b ON (a.blockid = b.blockid)
)
,clustered AS (
SELECT id,pt, blockid, aspect,height,aspectclass,
--Not being very picky on the aspect clusters
ST_ClusterDBScan(pt, eps := 2.0, minpoints := 10) over (partition by blockid, aspectclass) as cid
FROM grouped
WHERE Abs(aspect - aspectclass) < 10 --shouldn't be too much difference between assigned bucket and actual aspect
)
SELECT
c1.id,c1.cid, c1.blockid,
CASE
WHEN aspectmedian > 360 THEN aspectmedian - 360
ELSE aspectmedian
END as aspect,
pt geom
FROM clustered c1
JOIN (
SELECT cid, blockid, aspectclass,
--find median aspect and median height within bucket
percentile_cont(0.5) within group (order by aspect) as aspectmedian --<-- THIS SEEMS TO BE THE CULPRIT OF STALLING THE QUERY
FROM clustered
GROUP BY blockid,cid, aspectclass
) c2 ON (c1.cid = c2.cid AND c1.blockid = c2.blockid AND c1.aspectclass = c2.aspectclass)
WHERE c1.cid Is Not Null;
CREATE TABLE noisemodel.aspectclusters AS
WITH points AS (
SELECT a.*
FROM noisemodel.roofpoints a
INNER JOIN noisemodel.blocks b ON (a.gridid = b.gridid AND a.blockid = b.blockid)
WHERE slope >= 10 AND slope < 80
)
,ghostpoints AS (
SELECT
id ,
gridid,
blockid ,
buildingid,
pt ,
height,
aspect + 360 as aspect,
nx ,
ny ,
nz ,
slope ,
curvature,
coplanar
FROM points
WHERE aspect < 20
)
,allpoints AS (
SELECT * FROM points
UNION ALL
SELECT * FROM ghostpoints
)
,classes AS (
SELECT blockid, generate_series(0,390, 3) as class
FROM noisemodel.blocks
)
,aspect_buckets AS(
SELECT
a.blockid,
(width_bucket(aspect, 0, 390,(390/3)::int) * 3) bucket,
count(*) as count
FROM allpoints a
WHERE coplanar = 1 --only use clean planar points
GROUP BY a.blockid, bucket
ORDER BY a.blockid, bucket
)
,histogram AS (
SELECT
a.blockid,
class,
COALESCE(count,0) AS count
FROM classes a
LEFT JOIN aspect_buckets b ON (a.blockid = b.blockid AND a.class=b.bucket)
ORDER BY a.blockid, class
)
,histogram_avg AS (
SELECT blockid, class,count,
plv8_moving_avg(
array_agg(count) OVER (PARTITION BY blockid ORDER BY class ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)
) AS avg_count
FROM histogram
)
,leadlag AS (
SELECT blockid,class, count,avg_count,
lead(avg_count,1) OVER (PARTITION BY blockid ORDER BY class) AS lead,
lag(avg_count,1) OVER (PARTITION BY blockid ORDER BY class) AS lag
FROM histogram_avg
)
,result AS (
SELECT leadlag.blockid,
array_agg(class) peaks,
plv8_find_bottoms(array_agg(class),9,381) bottoms
FROM leadlag
WHERE 1=1
AND class > 9 AND class <= 381 --ignoring head and tail of histogram
AND (avg_count >= lead)
AND (avg_count > lag)
AND avg_count > 5
GROUP BY leadlag.blockid
ORDER BY leadlag.blockid
)
,grouped AS (
SELECT
a.id,a.blockid, pt, aspect,height,
COALESCE(peaks[width_bucket(a.aspect, bottoms)],peaks[1]) aspectclass
FROM allpoints a
INNER JOIN result b ON (a.blockid = b.blockid)
)
,clustered AS (
SELECT id,pt, blockid, aspect,height,aspectclass,
--Not being very picky on the aspect clusters
ST_ClusterDBScan(pt, eps := 2.0, minpoints := 10) over (partition by blockid, aspectclass) as cid
FROM grouped
WHERE Abs(aspect - aspectclass) < 10 --shouldn't be too much difference between assigned bucket and actual aspect
)
SELECT
c1.id,c1.cid, c1.blockid,
CASE
WHEN aspectmedian > 360 THEN aspectmedian - 360
ELSE aspectmedian
END as aspect,
pt geom
FROM clustered c1
JOIN (
SELECT cid, blockid, aspectclass,
--find median aspect and median height within bucket
percentile_cont(0.5) within group (order by aspect) as aspectmedian --<-- THIS SEEMS TO BE THE CULPRIT OF STALLING THE QUERY
FROM clustered
GROUP BY blockid,cid, aspectclass
) c2 ON (c1.cid = c2.cid AND c1.blockid = c2.blockid AND c1.aspectclass = c2.aspectclass)
WHERE c1.cid Is Not Null;
---------------tables that the query draws from: ----------------------------
ROOFPOINTS has approx 335k records
CREATE TABLE noisemodel.roofpoints
(
id bigint,
gridid text,
blockid bigint,
buildingid character varying(16),
pt geometry,
height double precision,
aspect double precision,
nx numeric,
ny numeric,
nz numeric,
slope numeric,
curvature numeric,
coplanar numeric
)
WITH (
OIDS=FALSE
);
CREATE INDEX roofpoints_blockid_idx
ON noisemodel.roofpoints
USING btree
(blockid);
CREATE INDEX roofpoints_gridid_idx
ON noisemodel.roofpoints
USING btree
(gridid COLLATE pg_catalog."default");
CREATE INDEX roofpoints_id_idx
ON noisemodel.roofpoints
USING btree
(id);
CREATE INDEX sidx_roofpoints_pt
ON noisemodel.roofpoints
USING gist
(pt);
(
id bigint,
gridid text,
blockid bigint,
buildingid character varying(16),
pt geometry,
height double precision,
aspect double precision,
nx numeric,
ny numeric,
nz numeric,
slope numeric,
curvature numeric,
coplanar numeric
)
WITH (
OIDS=FALSE
);
CREATE INDEX roofpoints_blockid_idx
ON noisemodel.roofpoints
USING btree
(blockid);
CREATE INDEX roofpoints_gridid_idx
ON noisemodel.roofpoints
USING btree
(gridid COLLATE pg_catalog."default");
CREATE INDEX roofpoints_id_idx
ON noisemodel.roofpoints
USING btree
(id);
CREATE INDEX sidx_roofpoints_pt
ON noisemodel.roofpoints
USING gist
(pt);
BLOCKS has approx 77 records
CREATE TABLE noisemodel.blocks
(
gridid text,
blockid bigint,
geom geometry
)
WITH (
OIDS=FALSE
);
(
gridid text,
blockid bigint,
geom geometry
)
WITH (
OIDS=FALSE
);
On Wed, Jul 4, 2018 at 6:01 PM, Tom Lane <tgl@xxxxxxxxxxxxx> wrote:
Tom van Tilburg <tom.van.tilburg@xxxxxxxxx> writes:
> I have a set of relatively complex queries producing tables (including
> postgis and pgpointcloud functions) that I run consecutively, and sometimes
> (depending on the base-data) my 5th query stalls (CPU 100%, runs forever)
> seemingly on the percentile_cont function. *When I replace percentile_cont
> with just a value it passes*.
> The setup roughly looks like this:
> query1 - creates 77 records with polygons
> query2 - creates 89 records with polygons
> query3 - creates ~350k records with points (inside above polygons)
> query4 - creates ~220k records with points clustered by height (from result
> query3)
> query5 - creates ~102k records with point clustered by normal (from result
> query3)
> The odd thing is, when I run query5 directly after query4, it will stall on
> some datasets (always same sets). Though when I cancel the query and run it
> again, it will pass in about 2 seconds.
Hard to say for sure with just this much detail, but what this smells
like is a bad query plan choice based on out-of-date statistics. The
fact that the query is fast when you retry could then be explained by
supposing that the autovacuum daemon has gotten in there and updated
the stats while you were waiting. So I'd try inserting a manual ANALYZE
of the table(s) that the earlier queries modify.
If that doesn't fix it, we'd need much more detail to offer help. See
https://wiki.postgresql.org/wiki/Slow_Query_Questions
It'd be particularly useful to compare EXPLAIN output in both the
"slow" and "fast" states.
regards, tom lane