Hello,
I have a fact table ( table and indexes are bellow ) that will probably get arround 2 billion rows.
- Can postgresql support such table (this table is the fact table of a datamart -> many join query with dimensions tables) ?
- If yes, I would like to test (say insert 2 billion test rows), what serveur configuration do I need ? How much RAM ?
- If not, would it be better to think about a cluster or other ?
- (Have you any idea to optimize this table ?)
Thanks a lot !
CREATE TABLE observation_fact
(
encounter_num integer NOT NULL,
patient_num integer NOT NULL,
concept_cd character varying(50) NOT NULL,
provider_id character varying(50) NOT NULL,
start_date timestamp without time zone NOT NULL,
modifier_cd character varying(100) NOT NULL DEFAULT '@'::character varying,
instance_num integer NOT NULL DEFAULT 1,
valtype_cd character varying(50),
tval_char character varying(255),
nval_num numeric(18,5),
valueflag_cd character varying(50),
quantity_num numeric(18,5),
units_cd character varying(50),
end_date timestamp without time zone,
location_cd character varying(50),
observation_blob text,
confidence_num numeric(18,5),
update_date timestamp without time zone,
download_date timestamp without time zone,
import_date timestamp without time zone,
sourcesystem_cd character varying(50),
upload_id integer,
text_search_index serial NOT NULL,
CONSTRAINT observation_fact_pk PRIMARY KEY (patient_num, concept_cd, modifier_cd, start_date, encounter_num, instance_num, provider_id)
)
WITH (
OIDS=FALSE
);
CREATE INDEX of_idx_allobservation_fact
ON i2b2databeta.observation_fact
USING btree
(patient_num, encounter_num, concept_cd COLLATE pg_catalog."default", start_date, provider_id COLLATE pg_catalog."default", modifier_cd COLLATE pg_catalog."default", instance_num, valtype_cd COLLATE pg_catalog."default", tval_char COLLATE pg_catalog."default", nval_num, valueflag_cd COLLATE pg_catalog."default", quantity_num, units_cd COLLATE pg_catalog."default", end_date, location_cd COLLATE pg_catalog."default", confidence_num);
CREATE INDEX of_idx_clusteredconcept
ON i2b2databeta.observation_fact
USING btree
(concept_cd COLLATE pg_catalog."default");
CREATE INDEX of_idx_encounter_patient
ON i2b2databeta.observation_fact
USING btree
(encounter_num, patient_num, instance_num);
CREATE INDEX of_idx_modifier
ON i2b2databeta.observation_fact
USING btree
(modifier_cd COLLATE pg_catalog."default");
CREATE INDEX of_idx_sourcesystem_cd
ON i2b2databeta.observation_fact
USING btree
(sourcesystem_cd COLLATE pg_catalog."default");
CREATE INDEX of_idx_start_date
ON i2b2databeta.observation_fact
USING btree
(start_date, patient_num);
CREATE INDEX of_idx_uploadid
ON i2b2databeta.observation_fact
USING btree
(upload_id);
CREATE UNIQUE INDEX of_text_search_unique
ON i2b2databeta.observation_fact
USING btree
(text_search_index);
(
encounter_num integer NOT NULL,
patient_num integer NOT NULL,
concept_cd character varying(50) NOT NULL,
provider_id character varying(50) NOT NULL,
start_date timestamp without time zone NOT NULL,
modifier_cd character varying(100) NOT NULL DEFAULT '@'::character varying,
instance_num integer NOT NULL DEFAULT 1,
valtype_cd character varying(50),
tval_char character varying(255),
nval_num numeric(18,5),
valueflag_cd character varying(50),
quantity_num numeric(18,5),
units_cd character varying(50),
end_date timestamp without time zone,
location_cd character varying(50),
observation_blob text,
confidence_num numeric(18,5),
update_date timestamp without time zone,
download_date timestamp without time zone,
import_date timestamp without time zone,
sourcesystem_cd character varying(50),
upload_id integer,
text_search_index serial NOT NULL,
CONSTRAINT observation_fact_pk PRIMARY KEY (patient_num, concept_cd, modifier_cd, start_date, encounter_num, instance_num, provider_id)
)
WITH (
OIDS=FALSE
);
CREATE INDEX of_idx_allobservation_fact
ON i2b2databeta.observation_fact
USING btree
(patient_num, encounter_num, concept_cd COLLATE pg_catalog."default", start_date, provider_id COLLATE pg_catalog."default", modifier_cd COLLATE pg_catalog."default", instance_num, valtype_cd COLLATE pg_catalog."default", tval_char COLLATE pg_catalog."default", nval_num, valueflag_cd COLLATE pg_catalog."default", quantity_num, units_cd COLLATE pg_catalog."default", end_date, location_cd COLLATE pg_catalog."default", confidence_num);
CREATE INDEX of_idx_clusteredconcept
ON i2b2databeta.observation_fact
USING btree
(concept_cd COLLATE pg_catalog."default");
CREATE INDEX of_idx_encounter_patient
ON i2b2databeta.observation_fact
USING btree
(encounter_num, patient_num, instance_num);
CREATE INDEX of_idx_modifier
ON i2b2databeta.observation_fact
USING btree
(modifier_cd COLLATE pg_catalog."default");
CREATE INDEX of_idx_sourcesystem_cd
ON i2b2databeta.observation_fact
USING btree
(sourcesystem_cd COLLATE pg_catalog."default");
CREATE INDEX of_idx_start_date
ON i2b2databeta.observation_fact
USING btree
(start_date, patient_num);
CREATE INDEX of_idx_uploadid
ON i2b2databeta.observation_fact
USING btree
(upload_id);
CREATE UNIQUE INDEX of_text_search_unique
ON i2b2databeta.observation_fact
USING btree
(text_search_index);