At Mon, 31 Aug 2020 16:04:43 +0200, Pavel Stehule <pavel.stehule@xxxxxxxxx> wrote in > po 31. 8. 2020 v 13:29 odesílatel Thomas Kellerer <shammat@xxxxxxx> napsal: > > > Thorsten Schöning schrieb am 31.08.2020 um 12:37: > > > So for what query size or number of IDs to compare in IN would you > > > consider a different approach at all? > > > > > > In my experience "hundreds" of IDs tend to be quite slow if used with an > > IN clause. > > > > Rewriting the IN to a JOIN against a VALUES clause is very often faster: > > > > So instead of: > > > > select * > > from t > > where id in (1,2,3, .... ,500); > > > > using this: > > > > select * > > from t > > join ( > > values (1),(2),(3),...(500) > > ) as x(id) on x.id = t.id > > > > produces more often than not a more efficient execution plan (assuming no > > values are duplicated in the IN list) > > > > Obviously I don't know if such a re-write is even feasible though. > > > > yes - this query probably will have a slow start, but the execution will be > fast. Unfortunately, there are not available statistics. FWIW, the attached is the dusted-off version of a part of a stalled development of mine, which unconditionally(!) creates on-the-fly statistics on VALUES list. It seems to work for certain cases, although the planning time increases significantly. =$ CREATE TABLE t1 AS SELECT a, a * 2 AS b FROM generate_series(0, 99999) a; =$ CREATE INDEX ON t1 (a); > perl q.pl(*) | psql *: q.pl: > print "explain analyze select b from t1 join (values "; > foreach $i (0..10000) { > print ", " if ($i > 0); > printf("(%d)", $i/10 + 1000); > } > print ") as v(v) on (v.v = t1.a);"; patched: Merge Join (cost=824.25..1005.19 rows=10001 width=4) (actual time=13.513..24.285 rows=10001 loops=1) Merge Cond: (t1.a = "*VALUES*".column1) -> Index Scan using t1_a_idx on t1 (cost=0.29..3050.29 rows=100000 width=8) (actual time=0.033..1.629 rows=2002 loops=1) -> Sort (cost=789.47..814.47 rows=10001 width=4) (actual time=12.557..14.546 rows=10001 loops=1) Sort Key: "*VALUES*".column1 Sort Method: quicksort Memory: 931kB -> Values Scan on "*VALUES*" (cost=0.00..125.01 rows=10001 width=4) (actual time=0.002..8.271 rows=10001 loops=1) Planning Time: 17.290 ms Execution Time: 26.344 ms (9 rows) master: Hash Join (cost=250.03..2168.03 rows=10001 width=4) (actual time=14.482..77.205 rows=10001 loops=1) Hash Cond: (t1.a = "*VALUES*".column1) -> Seq Scan on t1 (cost=0.00..1443.00 rows=100000 width=8) (actual time=0.017..23.540 rows=100000 loops=1) -> Hash (cost=125.01..125.01 rows=10001 width=4) (actual time=13.786..13.788 rows=10001 loops=1) Buckets: 16384 Batches: 1 Memory Usage: 480kB -> Values Scan on "*VALUES*" (cost=0.00..125.01 rows=10001 width=4) (actual time=0.002..8.503 rows=10001 loops=1) Planning Time: 12.365 ms Execution Time: 78.567 ms (8 rows) regards. -- Kyotaro Horiguchi NTT Open Source Software Center
>From c6680488178bb7eeae66c92f46d475ac69102481 Mon Sep 17 00:00:00 2001 From: Kyotaro Horiguchi <horiguchi.kyotaro@xxxxxxxxxxxxx> Date: Thu, 6 Jul 2017 14:46:49 +0900 Subject: [PATCH] Temporary statistics on VALUES list When using "IN (VALUES" instead of "IN (list.." to obtain better plans, sometimes the final estimate goes bad because of the lack of that for VALUES. This patch allowes VALUES to have statistics and improves the estimate in the case. --- src/backend/commands/analyze.c | 289 +++++++++++++++++++++---------- src/backend/utils/adt/selfuncs.c | 59 ++++++- src/include/commands/vacuum.h | 2 + src/include/nodes/parsenodes.h | 2 + 4 files changed, 260 insertions(+), 92 deletions(-) diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 8af12b5c6b..aba06415c8 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -102,6 +102,9 @@ static int compare_rows(const void *a, const void *b); static int acquire_inherited_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, double *totalrows, double *totaldeadrows); +static HeapTuple construct_stats_tuple(VacAttrStats *stats, + Oid attrrelid, bool inh, + Relation statsrel, HeapTuple oldtup); static void update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats); static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); @@ -278,6 +281,86 @@ analyze_rel(Oid relid, RangeVar *relation, pgstat_progress_end_command(); } +/* + * analyze_values() -- analyze one attribute in a values list then put the + * result into vardata. + */ +static List *valuesrows; + +static Datum +values_attrFetchFunc(VacAttrStatsP stats, int rownum, bool *isNull) +{ + List *row = (List *) list_nth(valuesrows, rownum); + Const *con; + + Assert(IsA(row, List)); + Assert(stats->tupattnum <= list_length(row)); + con = (Const *) list_nth(row, stats->tupattnum - 1); + Assert(IsA(con, Const)); + + return con->constvalue; +} + +HeapTuple +analyze_values(Var *var, List *values_lists) +{ + int i; + HeapTuple typetup; + HeapTuple result = NULL; + VacAttrStats *stats; + int rows; + bool ok; + MemoryContext analyze_cxt = + AllocSetContextCreate(CurrentMemoryContext, "Temp Analyze", + ALLOCSET_DEFAULT_SIZES); + + valuesrows = values_lists; + rows = list_length(valuesrows); + stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats)); + stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE); + stats->attr->attrelid = InvalidOid; + stats->attr->attstattarget = default_statistics_target; + stats->attrtypid = var->vartype; + stats->attrtypmod = var->vartypmod; + typetup = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(var->vartype)); + if (!HeapTupleIsValid(typetup)) + elog(ERROR, "cache lookup failed for type %u", var->vartype); + stats->attrtype = (Form_pg_type) GETSTRUCT(typetup); + stats->attrcollid = var->varcollid; + stats->anl_context = analyze_cxt; + stats->tupattnum = var->varattno; + stats->nodelay = true; /* don't make a delay during analyze */ + + for (i = 0 ; i < STATISTIC_NUM_SLOTS ; i++) + { + stats->statypid[i] = stats->attrtypid; + stats->statyplen[i] = stats->attrtype->typlen; + stats->statypbyval[i] = stats->attrtype->typbyval; + stats->statypalign[i] = stats->attrtype->typalign; + } + if (OidIsValid(stats->attrtype->typanalyze)) + ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze, + PointerGetDatum(stats))); + else + ok = std_typanalyze(stats); + if (ok && stats->compute_stats != NULL && stats->minrows > 0) + { + Relation sd; + + stats->compute_stats(stats, values_attrFetchFunc, rows, rows); + sd = table_open(StatisticRelationId, AccessShareLock); + result = construct_stats_tuple(stats, InvalidOid, false, sd, NULL); + table_close(sd, AccessShareLock); + } + + heap_freetuple(typetup); + pfree(stats->attr); + pfree(stats); + MemoryContextDelete(analyze_cxt); + + return result; +} + /* * do_analyze_rel() -- analyze one relation, recursively or not * @@ -1421,6 +1504,112 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, return numrows; } +/* construct_stats_tuple() -- construct a statistics tuple */ +static HeapTuple +construct_stats_tuple(VacAttrStats *stats, Oid attrrelid, bool inh, + Relation statsrel, HeapTuple oldtup) +{ + HeapTuple stup; + int i, + k, + n; + Datum values[Natts_pg_statistic]; + bool nulls[Natts_pg_statistic]; + bool replaces[Natts_pg_statistic]; + + /* + * Construct a new pg_statistic tuple + */ + for (i = 0; i < Natts_pg_statistic; ++i) + { + nulls[i] = false; + replaces[i] = true; + } + + values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(attrrelid); + values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(stats->attr->attnum); + values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inh); + values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac); + values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth); + values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct); + i = Anum_pg_statistic_stakind1 - 1; + for (k = 0; k < STATISTIC_NUM_SLOTS; k++) + { + values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */ + } + i = Anum_pg_statistic_staop1 - 1; + for (k = 0; k < STATISTIC_NUM_SLOTS; k++) + { + values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */ + } + i = Anum_pg_statistic_stacoll1 - 1; + for (k = 0; k < STATISTIC_NUM_SLOTS; k++) + { + values[i++] = ObjectIdGetDatum(stats->stacoll[k]); /* stacollN */ + } + i = Anum_pg_statistic_stanumbers1 - 1; + for (k = 0; k < STATISTIC_NUM_SLOTS; k++) + { + int nnum = stats->numnumbers[k]; + + if (nnum > 0) + { + Datum *numdatums = (Datum *) palloc(nnum * sizeof(Datum)); + ArrayType *arry; + + for (n = 0; n < nnum; n++) + numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]); + /* XXX knows more than it should about type float4: */ + arry = construct_array(numdatums, nnum, + FLOAT4OID, + sizeof(float4), true, TYPALIGN_INT); + values[i++] = PointerGetDatum(arry); /* stanumbersN */ + } + else + { + nulls[i] = true; + values[i++] = (Datum) 0; + } + } + i = Anum_pg_statistic_stavalues1 - 1; + for (k = 0; k < STATISTIC_NUM_SLOTS; k++) + { + if (stats->numvalues[k] > 0) + { + ArrayType *arry; + + arry = construct_array(stats->stavalues[k], + stats->numvalues[k], + stats->statypid[k], + stats->statyplen[k], + stats->statypbyval[k], + stats->statypalign[k]); + values[i++] = PointerGetDatum(arry); /* stavaluesN */ + } + else + { + nulls[i] = true; + values[i++] = (Datum) 0; + } + } + + if (HeapTupleIsValid(oldtup)) + { + /* construct a modified tuple */ + stup = heap_modify_tuple(oldtup, + RelationGetDescr(statsrel), + values, + nulls, + replaces); + } + else + { + /* construct a new tuple */ + stup = heap_form_tuple(RelationGetDescr(statsrel), values, nulls); + } + + return stup; +} /* * update_attstats() -- update attribute statistics for one relation @@ -1460,114 +1649,29 @@ update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats) VacAttrStats *stats = vacattrstats[attno]; HeapTuple stup, oldtup; - int i, - k, - n; - Datum values[Natts_pg_statistic]; - bool nulls[Natts_pg_statistic]; - bool replaces[Natts_pg_statistic]; /* Ignore attr if we weren't able to collect stats */ if (!stats->stats_valid) continue; - /* - * Construct a new pg_statistic tuple - */ - for (i = 0; i < Natts_pg_statistic; ++i) - { - nulls[i] = false; - replaces[i] = true; - } - - values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(relid); - values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(stats->attr->attnum); - values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inh); - values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac); - values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth); - values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct); - i = Anum_pg_statistic_stakind1 - 1; - for (k = 0; k < STATISTIC_NUM_SLOTS; k++) - { - values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */ - } - i = Anum_pg_statistic_staop1 - 1; - for (k = 0; k < STATISTIC_NUM_SLOTS; k++) - { - values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */ - } - i = Anum_pg_statistic_stacoll1 - 1; - for (k = 0; k < STATISTIC_NUM_SLOTS; k++) - { - values[i++] = ObjectIdGetDatum(stats->stacoll[k]); /* stacollN */ - } - i = Anum_pg_statistic_stanumbers1 - 1; - for (k = 0; k < STATISTIC_NUM_SLOTS; k++) - { - int nnum = stats->numnumbers[k]; - - if (nnum > 0) - { - Datum *numdatums = (Datum *) palloc(nnum * sizeof(Datum)); - ArrayType *arry; - - for (n = 0; n < nnum; n++) - numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]); - /* XXX knows more than it should about type float4: */ - arry = construct_array(numdatums, nnum, - FLOAT4OID, - sizeof(float4), true, TYPALIGN_INT); - values[i++] = PointerGetDatum(arry); /* stanumbersN */ - } - else - { - nulls[i] = true; - values[i++] = (Datum) 0; - } - } - i = Anum_pg_statistic_stavalues1 - 1; - for (k = 0; k < STATISTIC_NUM_SLOTS; k++) - { - if (stats->numvalues[k] > 0) - { - ArrayType *arry; - - arry = construct_array(stats->stavalues[k], - stats->numvalues[k], - stats->statypid[k], - stats->statyplen[k], - stats->statypbyval[k], - stats->statypalign[k]); - values[i++] = PointerGetDatum(arry); /* stavaluesN */ - } - else - { - nulls[i] = true; - values[i++] = (Datum) 0; - } - } - /* Is there already a pg_statistic tuple for this attribute? */ oldtup = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(relid), Int16GetDatum(stats->attr->attnum), BoolGetDatum(inh)); + /* construct a statistics tuple */ + stup = construct_stats_tuple(stats, relid, inh, sd, oldtup); + if (HeapTupleIsValid(oldtup)) { /* Yes, replace it */ - stup = heap_modify_tuple(oldtup, - RelationGetDescr(sd), - values, - nulls, - replaces); ReleaseSysCache(oldtup); CatalogTupleUpdate(sd, &stup->t_self, stup); } else { /* No, insert new tuple */ - stup = heap_form_tuple(RelationGetDescr(sd), values, nulls); CatalogTupleInsert(sd, stup); } @@ -1776,7 +1880,8 @@ compute_trivial_stats(VacAttrStatsP stats, Datum value; bool isnull; - vacuum_delay_point(); + if (!stats->nodelay) + vacuum_delay_point(); value = fetchfunc(stats, i, &isnull); @@ -1892,7 +1997,8 @@ compute_distinct_stats(VacAttrStatsP stats, int firstcount1, j; - vacuum_delay_point(); + if (!stats->nodelay) + vacuum_delay_point(); value = fetchfunc(stats, i, &isnull); @@ -2239,7 +2345,8 @@ compute_scalar_stats(VacAttrStatsP stats, Datum value; bool isnull; - vacuum_delay_point(); + if (!stats->nodelay) + vacuum_delay_point(); value = fetchfunc(stats, i, &isnull); diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 00c7afc66f..225938df94 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -5226,10 +5226,67 @@ examine_simple_variable(PlannerInfo *root, Var *var, examine_simple_variable(rel->subroot, var, vardata); } } + else if (rte->rtekind == RTE_VALUES) + { + HeapTuple statstup; + bool all_const = true; + ListCell *lc1, *lc2; + + /* known that this attr doesn't have statistics */ + if (bms_is_member(var->varattno, rte->notmpstats)) + return; + + if (!rte->tmpstats || + !HeapTupleIsValid(rte->tmpstats[var->varattno])) + { + /* no stats for this var. calcuate stats if possible */ + + /* we allow only all-const values lists */ + foreach (lc1, rte->values_lists) + { + foreach (lc2, (List *) lfirst(lc1)) + { + if (!IsA(lfirst(lc2), Const)) + { + all_const = false; + break; + } + } + if (!all_const) + break; + } + if (!all_const) + { + /* set negative info and return */ + rte->notmpstats = + bms_add_member(rte->notmpstats, var->varattno); + return; + } + } + + if (!rte->tmpstats) + { + int ncols = list_length(rte->coltypes); + rte->tmpstats = + (HeapTuple*) palloc0(sizeof(HeapTuple) * (ncols + 1)); + } + + if (!rte->tmpstats[var->varattno]) + rte->tmpstats[var->varattno] = + analyze_values(var, rte->values_lists); + + statstup = heap_copytuple(rte->tmpstats[var->varattno]); + if (HeapTupleIsValid(statstup)) + { + vardata->statsTuple = statstup; + vardata->freefunc = heap_freetuple; + vardata->acl_ok = true; + } + } else { /* - * Otherwise, the Var comes from a FUNCTION, VALUES, or CTE RTE. (We + * Otherwise, the Var comes from a FUNCTION, or CTE RTE. (We * won't see RTE_JOIN here because join alias Vars have already been * flattened.) There's not much we can do with function outputs, but * maybe someday try to be smarter about VALUES and/or CTEs. diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index a4cd721400..41ac254f6e 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -126,6 +126,7 @@ typedef struct VacAttrStats Form_pg_type attrtype; /* copy of pg_type row for attrtypid */ Oid attrcollid; /* collation of data being analyzed */ MemoryContext anl_context; /* where to save long-lived data */ + bool nodelay; /* true to skip delays */ /* * These fields must be filled in by the typanalyze routine, unless it @@ -283,6 +284,7 @@ extern Relation vacuum_open_relation(Oid relid, RangeVar *relation, extern void analyze_rel(Oid relid, RangeVar *relation, VacuumParams *params, List *va_cols, bool in_outer_xact, BufferAccessStrategy bstrategy); +extern HeapTuple analyze_values(Var *var, List *values_list); extern bool std_typanalyze(VacAttrStats *stats); /* in utils/misc/sampling.c --- duplicate of declarations in utils/sampling.h */ diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 47d4c07306..7966e647cb 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -1123,6 +1123,8 @@ typedef struct RangeTblEntry Bitmapset *updatedCols; /* columns needing UPDATE permission */ Bitmapset *extraUpdatedCols; /* generated columns being updated */ List *securityQuals; /* security barrier quals to apply, if any */ + struct HeapTupleData **tmpstats;/*temporary statistics, indexed by attnum */ + Bitmapset *notmpstats; /* negative cache of tmpstats */ } RangeTblEntry; /* -- 2.18.4