Tom Lane escribió: > Alvaro Herrera <alvherre@xxxxxxxxxxxxxxxxx> writes: > >> Given that the worst-case consequence is extra index vacuum passes, > >> which don't hurt that much when a table is small, maybe some smaller > >> estimate like 100 TIDs per page would be enough. Or, instead of > >> using a hard-wired constant, look at pg_class.reltuples/relpages > >> to estimate the average tuple density ... > > > This sounds like a reasonable compromise. > > Do you want to make it happen? I'm not having much luck really. I think the problem is that ANALYZE stores reltuples as the number of live tuples, so if you delete a big portion of a big table, then ANALYZE and then VACUUM, there's a huge misestimation and extra index cleanup passes happen, which is a bad thing. There seems to be no way to estimate the dead space, is there? We could go to pgstats but that seems backwards. I was having a problem at first with estimating for small tables which had no valid info in pg_class.reltuples, but I worked around that by using MaxHeapTuplesPerPage. (I was experimenting with the code that estimates average tuple width in estimate_rel_size() but then figured it was too much work.) So this part is fine AFAICS. I attach the patch I am playing with, and the simple test I've been examining (on which I comment the ANALYZE on some runs, change the conditions on the DELETE, put the CREATE INDEX before insertion instead of after it, etc). -- Alvaro Herrera http://www.CommandPrompt.com/ The PostgreSQL Company - Command Prompt, Inc.
drop table if exists foo; create table foo (a int, b varchar); create index foo_idx on foo(a); insert into foo select * from generate_series(1, 200000); delete from foo where a % 2 = 0; -- or a % 3 = 0; analyze foo; vacuum verbose foo;
Index: src/backend/commands/vacuumlazy.c =================================================================== RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/commands/vacuumlazy.c,v retrieving revision 1.90 diff -c -p -r1.90 vacuumlazy.c *** src/backend/commands/vacuumlazy.c 30 May 2007 20:11:57 -0000 1.90 --- src/backend/commands/vacuumlazy.c 29 Aug 2007 18:36:18 -0000 *************** static int lazy_vacuum_page(Relation one *** 120,126 **** static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats); static BlockNumber count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats); ! static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks); static void lazy_record_dead_tuple(LVRelStats *vacrelstats, ItemPointer itemptr); static void lazy_record_free_space(LVRelStats *vacrelstats, --- 121,128 ---- static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats); static BlockNumber count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats); ! static void lazy_space_alloc(Relation onerel, LVRelStats *vacrelstats, ! BlockNumber relblocks); static void lazy_record_dead_tuple(LVRelStats *vacrelstats, ItemPointer itemptr); static void lazy_record_free_space(LVRelStats *vacrelstats, *************** lazy_scan_heap(Relation onerel, LVRelSta *** 289,295 **** vacrelstats->rel_pages = nblocks; vacrelstats->nonempty_pages = 0; ! lazy_space_alloc(vacrelstats, nblocks); for (blkno = 0; blkno < nblocks; blkno++) { --- 291,297 ---- vacrelstats->rel_pages = nblocks; vacrelstats->nonempty_pages = 0; ! lazy_space_alloc(onerel, vacrelstats, nblocks); for (blkno = 0; blkno < nblocks; blkno++) { *************** count_nondeletable_pages(Relation onerel *** 964,979 **** * See the comments at the head of this file for rationale. */ static void ! lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks) { long maxtuples; int maxpages; if (vacrelstats->hasindex) { ! maxtuples = (maintenance_work_mem * 1024L) / sizeof(ItemPointerData); maxtuples = Min(maxtuples, INT_MAX); maxtuples = Min(maxtuples, MaxAllocSize / sizeof(ItemPointerData)); /* stay sane if small maintenance_work_mem */ maxtuples = Max(maxtuples, MaxHeapTuplesPerPage); } --- 966,999 ---- * See the comments at the head of this file for rationale. */ static void ! lazy_space_alloc(Relation onerel, LVRelStats *vacrelstats, BlockNumber relblocks) { long maxtuples; int maxpages; if (vacrelstats->hasindex) { ! BlockNumber relpages; ! double reltuples; ! ! /* coerce values in pg_class to more desirable types */ ! relpages = (BlockNumber) onerel->rd_rel->relpages; ! reltuples = (double) onerel->rd_rel->reltuples; ! ! /* ! * If the relation has never been vacuumed, assume worst-case ! * number of tuples. Otherwise, use the density from pg_class to ! * estimate it. ! */ ! if (relpages > 0) ! maxtuples = (long) (reltuples / (double) relpages * (double) relblocks); ! else ! maxtuples = MaxHeapTuplesPerPage * relblocks; ! ! maxtuples = Min(maxtuples, (maintenance_work_mem * 1024L) / sizeof(ItemPointerData)); maxtuples = Min(maxtuples, INT_MAX); maxtuples = Min(maxtuples, MaxAllocSize / sizeof(ItemPointerData)); + /* stay sane if small maintenance_work_mem */ maxtuples = Max(maxtuples, MaxHeapTuplesPerPage); } *************** lazy_space_alloc(LVRelStats *vacrelstats *** 987,992 **** --- 1007,1014 ---- vacrelstats->dead_tuples = (ItemPointer) palloc(maxtuples * sizeof(ItemPointerData)); + elog(NOTICE, "alloc'ed %ld bytes for %ld tuples", maxtuples * sizeof(ItemPointerData), maxtuples); + maxpages = MaxFSMPages; maxpages = Min(maxpages, MaxAllocSize / sizeof(PageFreeSpaceInfo)); /* No need to allocate more pages than the relation has blocks */
---------------------------(end of broadcast)--------------------------- TIP 1: if posting/reading through Usenet, please send an appropriate subscribe-nomail command to majordomo@xxxxxxxxxxxxxx so that your message can get through to the mailing list cleanly