Re: performance advice needed: join vs explicit subselect

Karsten Hilbert <Karsten.Hilbert@xxxxxxx> · Tue, 27 Jan 2009 22:12:16 +0100

On Tue, Jan 27, 2009 at 06:48:11PM +0000, Sam Mason wrote:

> > table test_results
> > 	modified_by integer foreign key staff(pk),
> > 	intended_reviewer integer foreign key staff(pk),
> > 	actual_reviewer integer foreign key staff(pk)
> > 
> > (this table will contain millions of rows)
> > 
> > table staff
> > 	pk integer
> > 	name text
> > 
> > (this table will contain at most 50 rows)
> > 
> > Now I want to set up a view which aggregates test results
> > with staff names for all three foreign keys. This would mean
> > I would either have to
> > 
> > - join test_results to staff three times, once for each
> >   of the foreign keys, this is going to be messy with
> >   tracking table aliases, duplicate column names etc
> 
> if you've only got three columns it shouldn't be too bad should it?

This is what one deserves for thinking to be able to distill
the essence of a problem :-)

The view in question is in fact a lot more complicated. This
is the best I've been able to come up with so far (and it is
still slow - slow as in 3-4 seconds for 20 records out of
(currently only) 50 !):

create view clin.v_test_results as

select
	cenc.fk_patient
		as pk_patient,
	-- test_result
	tr.pk as pk_test_result,
	tr.clin_when,
	-- unified
	vttu.unified_code,
	vttu.unified_name,
	case when coalesce(trim(both from tr.val_alpha), '') = ''
		then tr.val_num::text
		else case when tr.val_num is null
			then tr.val_alpha
			else tr.val_num::text || ' (' || tr.val_alpha || ')'
		end
	end as unified_val,
	coalesce(tr.val_target_min, tr.val_normal_min)
		as unified_target_min,
	coalesce(tr.val_target_max, tr.val_normal_max)
		as unified_target_max,
	coalesce(tr.val_target_range, tr.val_normal_range)
		as unified_target_range,
	tr.soap_cat,
	tr.narrative
		as comment,
	-- test result data
	tr.val_num,
	tr.val_alpha,
	tr.val_unit,
	vttu.conversion_unit,
	tr.val_normal_min,
	tr.val_normal_max,
	tr.val_normal_range,
	tr.val_target_min,
	tr.val_target_max,
	tr.val_target_range,
	tr.abnormality_indicator,
	tr.norm_ref_group,
	tr.note_test_org,
	tr.material,
	tr.material_detail,
	-- test type data
	vttu.code_tt,
	vttu.name_tt,
	vttu.coding_system_tt,
	vttu.comment_tt,
	vttu.code_unified,
	vttu.name_unified,
	vttu.coding_system_unified,
	vttu.comment_unified,

	-- episode/issue data
	epi.description
		as episode,

	-- status of last review
	coalesce(rtr.fk_reviewed_row, 0)::bool
		as reviewed,
	rtr.is_technically_abnormal
		as is_technically_abnormal,
	rtr.clinically_relevant
		as is_clinically_relevant,
	rtr.comment
		as review_comment,

	(select
		short_alias || ' (' ||
		coalesce(title || ' ', '') ||
		coalesce(firstnames || ' ', '') ||
		coalesce(lastnames, '') ||
		')'
	 from dem.v_staff
	 where pk_staff = rtr.fk_reviewer
	) as last_reviewer,

	rtr.modified_when
		as last_reviewed,

	coalesce (
		(rtr.fk_reviewer = (select pk from dem.staff where db_user = current_user)),
		False
	)
		as review_by_you,

	coalesce (
		(tr.fk_intended_reviewer = rtr.fk_reviewer),
		False
	)
		as review_by_responsible_reviewer,

	-- potential review status
	(select
		short_alias || ' (' ||
		coalesce(title || ' ', '') ||
		coalesce(firstnames || ' ', '') ||
		coalesce(lastnames, '') ||
		')'
	 from dem.v_staff
	 where pk_staff = tr.fk_intended_reviewer
	) as responsible_reviewer,

	coalesce (
		(tr.fk_intended_reviewer = (select pk from dem.staff where db_user = current_user)),
		False
	)
		as you_are_responsible,

	case when ((select 1 from dem.staff where db_user = tr.modified_by) is null)
		then '<' || tr.modified_by || '>'
		else (select short_alias from dem.staff where db_user = tr.modified_by)
	end
		as modified_by,

	tr.modified_when,
	tr.row_version as row_version,

	-- management keys
	-- clin.clin_root_item
	tr.pk_item,
	tr.fk_encounter as pk_encounter,
	tr.fk_episode as pk_episode,
	-- test_result
	tr.fk_type as pk_test_type,
	tr.fk_intended_reviewer as pk_intended_reviewer,
	tr.xmin as xmin_test_result,
	-- v_unified_test_types
	vttu.pk_test_org,
	vttu.pk_test_type_unified,
	-- v_pat_episodes
	epi.fk_health_issue
		as pk_health_issue,
	-- reviewed_test_results
	rtr.fk_reviewer as pk_last_reviewer
from
	clin.test_result tr
		left join clin.encounter cenc on (tr.fk_encounter = cenc.pk)
			left join clin.episode epi on (tr.fk_episode = epi.pk)
				left join clin.reviewed_test_results rtr on (tr.pk = rtr.fk_reviewed_row)
	,
	clin.v_unified_test_types vttu
where
	tr.fk_type = vttu.pk_test_type
;

> > - write three explicit sub-selects for the columns I want
> >   to denormalize into the view definition
> 
> This would look a bit prettier, but PG tends not to optimize at all.  It
> always executes it as a subplan and hence will only work nicely when
> you've got a very small subset of the test_results coming back.
Potentially in the low hundreds.

Thanks !
Karsten
-- 
GPG key ID E4071346 @ wwwkeys.pgp.net
E167 67FD A291 2BEA 73BD  4537 78B9 A9F9 E407 1346

-- 
Sent via pgsql-general mailing list (pgsql-general@xxxxxxxxxxxxxx)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-general