Working with 9.4.
We are in the process of unpacking complicated XML-data into tables. XML-data are already in a table with two fields (id, xml) - 47+ million records.
Some of hour queries to extract the data and insert it in other tables runs for days and in one case we have created a table with 758million unique records.We are in the process of unpacking complicated XML-data into tables. XML-data are already in a table with two fields (id, xml) - 47+ million records.
I have recently read that it is probably better for processes like this to copy result of the query to a csv-file and then import it again with copy as an insert. Next time I will try that.
The following query has been running for 6 days now and are still running (I have anonymized it a little bit) on a server with 768 GB RAM. It has created 44 temporary files so far:
INSERT INTO table_a_link(uid,gn_id)
WITH p AS
(SELECT ARRAY[ARRAY['t','some_xpath']] AS some_xpath),
q AS
(SELECT (xpath('//t:UID/text()',xml,some_xpath))[1] uid,
unnest(xpath('//t:grant',xml,some_xpath)) AS gr
FROM source.xml_data a,
p
WHERE xpath_exists('//t:grant/t:grant_agency', xml ,some_xpath)),
r AS
(
SELECT
CASE WHEN
xpath_exists('//t:grant_ids', gr, some_xpath)
THEN
unnest(xpath('//t:grant_ids', gr, some_xpath))
ELSE
NULL
END
AS GR_ids
FROM q,
p ) ,
y as (SELECT A.UUID AS FO_ID,
/* unnest(xpath('//t:grant_agency/text()',GR,ns))::citext AS agency,
*/ CASE WHEN
xpath_exists('//t:grant_id', gr_ids, some_xpath)
THEN
unnest(xpath('//t:grant_id/text()', gr_ids, some_xpath))::citext
ELSE
NULL
END
grant_NO,
uid::varchar(19)
from WOS.FUNDING_ORG A, p,q
left join r on (xpath('//t:grant/t:grant_ids/t:grant_id/text()',gr,
ARRAY[ARRAY['t','some_xpath']])::citext =
xpath('//t:grant_id/text()',GR_IDS,ARRAY[ARRAY['t','some_xpath']])::citext)
WHERE A.FUNDING_ORG = (xpath('//t:grant_agency/text()',GR,some_xpath))[1]::CITEXT
)
select distinct y.uid, B.uuid gn_id
from y, table_b B
where
y.fo_id = B.fo_id
and
y.grant_no is not distinct from b.grant_no
INSERT INTO table_a_link(uid,gn_id)
WITH p AS
(SELECT ARRAY[ARRAY['t','some_xpath']] AS some_xpath),
q AS
(SELECT (xpath('//t:UID/text()',xml,some_xpath))[1] uid,
unnest(xpath('//t:grant',xml,some_xpath)) AS gr
FROM source.xml_data a,
p
WHERE xpath_exists('//t:grant/t:grant_agency', xml ,some_xpath)),
r AS
(
SELECT
CASE WHEN
xpath_exists('//t:grant_ids', gr, some_xpath)
THEN
unnest(xpath('//t:grant_ids', gr, some_xpath))
ELSE
NULL
END
AS GR_ids
FROM q,
p ) ,
y as (SELECT A.UUID AS FO_ID,
/* unnest(xpath('//t:grant_agency/text()',GR,ns))::citext AS agency,
*/ CASE WHEN
xpath_exists('//t:grant_id', gr_ids, some_xpath)
THEN
unnest(xpath('//t:grant_id/text()', gr_ids, some_xpath))::citext
ELSE
NULL
END
grant_NO,
uid::varchar(19)
from WOS.FUNDING_ORG A, p,q
left join r on (xpath('//t:grant/t:grant_ids/t:grant_id/text()',gr,
ARRAY[ARRAY['t','some_xpath']])::citext =
xpath('//t:grant_id/text()',GR_IDS,ARRAY[ARRAY['t','some_xpath']])::citext)
WHERE A.FUNDING_ORG = (xpath('//t:grant_agency/text()',GR,some_xpath))[1]::CITEXT
)
select distinct y.uid, B.uuid gn_id
from y, table_b B
where
y.fo_id = B.fo_id
and
y.grant_no is not distinct from b.grant_no
Regards.
Johann