Below is a patch against 8.2.4 (more or less), Heikki can you take a look at it? This enables the use of index scan of a child table by recognizing sort order of the append node. Kurt Harriman did the work. - Luke Index: cdb-pg/src/backend/optimizer/path/indxpath.c =================================================================== RCS file: /data/FISHEYE_REPOSITORIES/greenplum/cvsroot/cdb2/cdb-pg/src/backend/optimiz er/path/indxpath.c,v diff -u -N -r1.22 -r1.22.2.1 --- cdb-pg/src/backend/optimizer/path/indxpath.c 25 Apr 2007 22:07:21 -0000 1.22 +++ cdb-pg/src/backend/optimizer/path/indxpath.c 10 Aug 2007 03:41:15 -0000 1.22.2.1 @@ -379,8 +379,51 @@ index_pathkeys = build_index_pathkeys(root, index, ForwardScanDirection, true); - useful_pathkeys = truncate_useless_pathkeys(root, rel, - index_pathkeys); + /* + * CDB: For appendrel child, pathkeys contain Var nodes in terms + * of the child's baserel. Transform the pathkey list to refer to + * columns of the appendrel. + */ + if (rel->reloptkind == RELOPT_OTHER_MEMBER_REL) + { + AppendRelInfo *appinfo = NULL; + RelOptInfo *appendrel = NULL; + ListCell *appcell; + CdbPathLocus notalocus; + + /* Find the appendrel of which this baserel is a child. */ + foreach(appcell, root->append_rel_list) + { + appinfo = (AppendRelInfo *)lfirst(appcell); + if (appinfo->child_relid == rel->relid) + break; + } + Assert(appinfo); + appendrel = find_base_rel(root, appinfo->parent_relid); + + /* + * The pathkey list happens to have the same format as the + * partitioning key of a Hashed locus, so by disguising it + * we can use cdbpathlocus_pull_above_projection() to do the + * transformation. + */ + CdbPathLocus_MakeHashed(¬alocus, index_pathkeys); + notalocus = + cdbpathlocus_pull_above_projection(root, + notalocus, + rel->relids, + rel->reltargetlist, + appendrel->reltargetlist, + appendrel->relid); + if (CdbPathLocus_IsHashed(notalocus)) + index_pathkeys = truncate_useless_pathkeys(root, appendrel, + notalocus.partkey); + else + index_pathkeys = NULL; + } + + useful_pathkeys = truncate_useless_pathkeys(root, rel, + index_pathkeys); } else useful_pathkeys = NIL; Index: cdb-pg/src/backend/optimizer/path/pathkeys.c =================================================================== RCS file: /data/FISHEYE_REPOSITORIES/greenplum/cvsroot/cdb2/cdb-pg/src/backend/optimiz er/path/pathkeys.c,v diff -u -N -r1.18 -r1.18.2.1 --- cdb-pg/src/backend/optimizer/path/pathkeys.c 30 Apr 2007 05:44:07 -0000 1.18 +++ cdb-pg/src/backend/optimizer/path/pathkeys.c 10 Aug 2007 03:41:15 -0000 1.18.2.1 @@ -1403,55 +1403,53 @@ { PathKeyItem *item; Expr *newexpr; + AttrNumber targetindex; Assert(pathkey); - /* Use constant expr if available. Will be at head of list. */ - if (CdbPathkeyEqualsConstant(pathkey)) + /* Find an expr that we can rewrite to use the projected columns. */ + item = cdbpullup_findPathKeyItemInTargetList(pathkey, + relids, + targetlist, + &targetindex); // OUT + + /* If not found, see if the equiv class contains a constant expr. */ + if (!item && + CdbPathkeyEqualsConstant(pathkey)) { item = (PathKeyItem *)linitial(pathkey); newexpr = (Expr *)copyObject(item->key); } - /* New vars for old! */ - else - { - AttrNumber targetindex; + /* Fail if no usable expr. */ + else if (!item) + return NULL; - /* Find an expr that we can rewrite to use the projected columns. */ - item = cdbpullup_findPathKeyItemInTargetList(pathkey, - relids, - targetlist, - &targetindex); // OUT - if (!item) - return NULL; + /* If found matching targetlist item, make a Var that references it. */ + else if (targetindex > 0) + newexpr = (Expr *)cdbpullup_makeVar(newrelid, + targetindex, + newvarlist, + (Expr *)item->key); - /* If found matching targetlist item, make a Var that references it. */ - if (targetindex > 0) - newexpr = (Expr *)cdbpullup_makeVar(newrelid, - targetindex, - newvarlist, - (Expr *)item->key); + /* Replace expr's Var nodes with new ones referencing the targetlist. */ + else + newexpr = cdbpullup_expr((Expr *)item->key, + targetlist, + newvarlist, + newrelid); - /* Replace expr's Var nodes with new ones referencing the targetlist. */ - else - newexpr = cdbpullup_expr((Expr *)item->key, - targetlist, - newvarlist, - newrelid); + /* Pull up RelabelType node too, unless tlist expr has right type. */ + if (IsA(item->key, RelabelType)) + { + RelabelType *oldrelabel = (RelabelType *)item->key; - /* Pull up RelabelType node too, unless tlist expr has right type. */ - if (IsA(item->key, RelabelType)) - { - RelabelType *oldrelabel = (RelabelType *)item->key; - - if (oldrelabel->resulttype != exprType((Node *)newexpr) || - oldrelabel->resulttypmod != exprTypmod((Node *)newexpr)) - newexpr = (Expr *)makeRelabelType(newexpr, - oldrelabel->resulttype, - oldrelabel->resulttypmod, - oldrelabel->relabelformat); - } + if (oldrelabel->resulttype != exprType((Node *)newexpr) || + oldrelabel->resulttypmod != exprTypmod((Node *)newexpr)) + newexpr = (Expr *)makeRelabelType(newexpr, + oldrelabel->resulttype, + oldrelabel->resulttypmod, + oldrelabel->relabelformat); } Insist(newexpr); Index: cdb-pg/src/backend/optimizer/util/pathnode.c =================================================================== RCS file: /data/FISHEYE_REPOSITORIES/greenplum/cvsroot/cdb2/cdb-pg/src/backend/optimiz er/util/pathnode.c,v diff -u -N -r1.52.2.4 -r1.52.2.5 --- cdb-pg/src/backend/optimizer/util/pathnode.c 5 Aug 2007 23:06:44 -0000 1.52.2.4 +++ cdb-pg/src/backend/optimizer/util/pathnode.c 10 Aug 2007 03:41:15 -0000 1.52.2.5 @@ -1563,7 +1563,15 @@ pathnode->path.rescannable = false; } - return pathnode; + /* + * CDB: If there is exactly one subpath, its ordering is preserved. + * Child rel's pathkey exprs are already expressed in terms of the + * columns of the parent appendrel. See find_usable_indexes(). + */ + if (list_length(subpaths) == 1) + pathnode->path.pathkeys = ((Path *)linitial(subpaths))->pathkeys; + + return pathnode; } /* On 8/24/07 3:38 AM, "Heikki Linnakangas" <heikki@xxxxxxxxxxxxxxxx> wrote: > Anton wrote: >>>> =# explain SELECT * FROM n_traf ORDER BY date_time DESC LIMIT 1; >>>> QUERY PLAN >>> ---------------------------------------------------------------------------- >>> ----------------------------- >>>> Limit (cost=824637.69..824637.69 rows=1 width=32) >>>> -> Sort (cost=824637.69..838746.44 rows=5643499 width=32) >>>> Sort Key: public.n_traf.date_time >>>> -> Result (cost=0.00..100877.99 rows=5643499 width=32) >>>> -> Append (cost= 0.00..100877.99 rows=5643499 width=32) >>>> -> Seq Scan on n_traf (cost=0.00..22.30 >>>> rows=1230 width=32) >>>> -> Seq Scan on n_traf_y2007m01 n_traf >>>> (cost=0.00..22.30 rows=1230 width=32) >> ... >>>> -> Seq Scan on n_traf_y2007m12 n_traf >>>> (cost=0.00..22.30 rows=1230 width=32) >>>> (18 rows) >>>> >>>> Why it no uses indexes at all? >>>> ------------------------------------------- >>> I'm no expert but I'd guess that the the planner doesn't know which >>> partition holds the latest time so it has to read them all. >> >> Agree. But why it not uses indexes when it reading them? > > The planner isn't smart enough to push the "ORDER BY ... LIMIT ..." > below the append node. Therefore it needs to fetch all rows from all the > tables, and the fastest way to do that is a seq scan. ---------------------------(end of broadcast)--------------------------- TIP 7: You can help support the PostgreSQL project by donating at http://www.postgresql.org/about/donate