On Thu, Jun 4, 2015 at 9:42 AM, Robert Haas <robertmhaas@xxxxxxxxx> wrote: > Thanks for the review. Here's a new version. I've fixed the things Alvaro and Noah noted, and some compiler warnings about set but unused variables. I also tested it, and it doesn't quite work as hoped. If started on a cluster where oldestMultiXid is incorrectly set to 1, it starts up and indicates that the member wraparound guards are disabled. But even after everything is fixed, they don't get enabled until after the next full restart. I think that's because TruncateMultiXact() bails out too early, without calling DetermineSafeOldestOffset. My attempt at a quick fix for that problem didn't work out, so I'm posting this version for now to facilitate further review and testing. -- Robert Haas EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company
commit eb39cf10e4ff853ed4b9d3fab599cf42911e6f70 Author: Robert Haas <rhaas@postgresql.org> Date: Thu Jun 4 11:58:49 2015 -0400 bar diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 699497c..209d3e6 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -196,13 +196,24 @@ typedef struct MultiXactStateData /* next-to-be-assigned offset */ MultiXactOffset nextOffset; + /* Have we completed multixact startup? */ + bool finishedStartup; + /* - * Oldest multixact that is still on disk. Anything older than this - * should not be consulted. These values are updated by vacuum. + * Oldest multixact that is still potentially referenced by a relation. + * Anything older than this should not be consulted. These values are + * updated by vacuum. */ MultiXactId oldestMultiXactId; Oid oldestMultiXactDB; + + /* + * Oldest multixact offset that is potentially referenced by a + * multixact referenced by a relation. We don't always know this value, + * so there's a flag here to indicate whether or not we currently do. + */ MultiXactOffset oldestOffset; + bool oldestOffsetKnown; /* * This is what the previous checkpoint stored as the truncate position. @@ -219,6 +230,7 @@ typedef struct MultiXactStateData /* support for members anti-wraparound measures */ MultiXactOffset offsetStopLimit; + bool offsetStopLimitKnown; /* * Per-backend data starts here. We have two arrays stored in the area @@ -348,10 +360,11 @@ static bool MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2); static void ExtendMultiXactOffset(MultiXactId multi); static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers); -static void DetermineSafeOldestOffset(MultiXactId oldestMXact); +static void DetermineSafeOldestOffset(MultiXactOffset oldestMXact); static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary, MultiXactOffset start, uint32 distance); -static MultiXactOffset find_multixact_start(MultiXactId multi); +static bool SetOffsetVacuumLimit(bool finish_setup); +static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result); static void WriteMZeroPageXlogRec(int pageno, uint8 info); @@ -960,7 +973,8 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) * against catastrophic data loss due to multixact wraparound. The basic * rules are: * - * If we're past multiVacLimit or the safe threshold for member storage space, + * If we're past multiVacLimit or the safe threshold for member storage + * space, or we don't know what the safe threshold for member storage is, * start trying to force autovacuum cycles. * If we're past multiWarnLimit, start issuing warnings. * If we're past multiStopLimit, refuse to create new MultiXactIds. @@ -969,6 +983,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) *---------- */ if (!MultiXactIdPrecedes(result, MultiXactState->multiVacLimit) || + !MultiXactState->oldestOffsetKnown || (MultiXactState->nextOffset - MultiXactState->oldestOffset > MULTIXACT_MEMBER_SAFE_THRESHOLD)) { @@ -1083,7 +1098,8 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) *---------- */ #define OFFSET_WARN_SEGMENTS 20 - if (MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, nextOffset, + if (MultiXactState->offsetStopLimitKnown && + MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, nextOffset, nmembers)) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), @@ -1095,7 +1111,8 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) MultiXactState->offsetStopLimit - nextOffset - 1), errhint("Execute a database-wide VACUUM in database with OID %u with reduced vacuum_multixact_freeze_min_age and vacuum_multixact_freeze_table_age settings.", MultiXactState->oldestMultiXactDB))); - else if (MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, + else if (MultiXactState->offsetStopLimitKnown && + MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, nextOffset, nmembers + MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT * OFFSET_WARN_SEGMENTS)) ereport(WARNING, @@ -1975,12 +1992,6 @@ StartupMultiXact(void) */ pageno = MXOffsetToMemberPage(offset); MultiXactMemberCtl->shared->latest_page_number = pageno; - - /* - * compute the oldest member we need to keep around to avoid old member - * data overrun. - */ - DetermineSafeOldestOffset(MultiXactState->oldestMultiXactId); } /* @@ -1994,6 +2005,7 @@ TrimMultiXact(void) { MultiXactId multi = MultiXactState->nextMXact; MultiXactOffset offset = MultiXactState->nextOffset; + MultiXactId oldestMXact; int pageno; int entryno; int flagsoff; @@ -2066,6 +2078,13 @@ TrimMultiXact(void) } LWLockRelease(MultiXactMemberControlLock); + + if (SetOffsetVacuumLimit(true) && IsUnderPostmaster) + SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); + LWLockAcquire(MultiXactGenLock, LW_SHARED); + oldestMXact = MultiXactState->lastCheckpointedOldest; + LWLockRelease(MultiXactGenLock); + DetermineSafeOldestOffset(oldestMXact); } /* @@ -2165,8 +2184,7 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid) MultiXactId multiStopLimit; MultiXactId multiWrapLimit; MultiXactId curMulti; - MultiXactOffset oldestOffset; - MultiXactOffset nextOffset; + bool needs_offset_vacuum; Assert(MultiXactIdIsValid(oldest_datminmxid)); @@ -2219,35 +2237,6 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid) if (multiVacLimit < FirstMultiXactId) multiVacLimit += FirstMultiXactId; - /* - * Determine the offset of the oldest multixact that might still be - * referenced. Normally, we can read the offset from the multixact itself, - * but there's an important special case: if there are no multixacts in - * existence at all, oldest_datminmxid obviously can't point to one. It - * will instead point to the multixact ID that will be assigned the next - * time one is needed. - * - * NB: oldest_dataminmxid is the oldest multixact that might still be - * referenced from a table, unlike in DetermineSafeOldestOffset, where we - * do this same computation based on the oldest value that might still - * exist in the SLRU. This is because here we're trying to compute a - * threshold for activating autovacuum, which can only remove references - * to multixacts, whereas there we are computing a threshold for creating - * new multixacts, which requires the old ones to have first been - * truncated away by a checkpoint. - */ - LWLockAcquire(MultiXactGenLock, LW_SHARED); - if (MultiXactState->nextMXact == oldest_datminmxid) - { - oldestOffset = MultiXactState->nextOffset; - LWLockRelease(MultiXactGenLock); - } - else - { - LWLockRelease(MultiXactGenLock); - oldestOffset = find_multixact_start(oldest_datminmxid); - } - /* Grab lock for just long enough to set the new limit values */ LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); MultiXactState->oldestMultiXactId = oldest_datminmxid; @@ -2256,9 +2245,7 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid) MultiXactState->multiWarnLimit = multiWarnLimit; MultiXactState->multiStopLimit = multiStopLimit; MultiXactState->multiWrapLimit = multiWrapLimit; - MultiXactState->oldestOffset = oldestOffset; curMulti = MultiXactState->nextMXact; - nextOffset = MultiXactState->nextOffset; LWLockRelease(MultiXactGenLock); /* Log the info */ @@ -2266,6 +2253,9 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid) (errmsg("MultiXactId wrap limit is %u, limited by database with OID %u", multiWrapLimit, oldest_datoid))); + /* Set limits for offset vacuum. */ + needs_offset_vacuum = SetOffsetVacuumLimit(false); + /* * If past the autovacuum force point, immediately signal an autovac * request. The reason for this is that autovac only processes one @@ -2274,8 +2264,7 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid) * another iteration immediately if there are still any old databases. */ if ((MultiXactIdPrecedes(multiVacLimit, curMulti) || - (nextOffset - oldestOffset > MULTIXACT_MEMBER_SAFE_THRESHOLD)) && - IsUnderPostmaster && !InRecovery) + needs_offset_vacuum) && IsUnderPostmaster && !InRecovery) SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); /* Give an immediate warning if past the wrap warn point */ @@ -2531,6 +2520,25 @@ static void DetermineSafeOldestOffset(MultiXactId oldestMXact) { MultiXactOffset oldestOffset; + MultiXactOffset nextOffset; + MultiXactOffset offsetStopLimit; + MultiXactOffset prevOffsetStopLimit; + MultiXactId nextMXact; + bool finishedStartup; + bool prevOffsetStopLimitKnown; + + /* Fetch values from shared memory. */ + LWLockAcquire(MultiXactGenLock, LW_SHARED); + finishedStartup = MultiXactState->finishedStartup; + nextMXact = MultiXactState->nextMXact; + nextOffset = MultiXactState->nextOffset; + prevOffsetStopLimit = MultiXactState->offsetStopLimit; + prevOffsetStopLimitKnown = MultiXactState->offsetStopLimitKnown; + LWLockRelease(MultiXactGenLock); + + /* Don't worry about this until after we've started up. */ + if (!finishedStartup) + return; /* * Determine the offset of the oldest multixact. Normally, we can read @@ -2539,32 +2547,132 @@ DetermineSafeOldestOffset(MultiXactId oldestMXact) * obviously can't point to one. It will instead point to the multixact * ID that will be assigned the next time one is needed. * - * NB: oldestMXact should be the oldest multixact that still exists in - * the SLRU, unlike in SetMultiXactIdLimit, where we do this same - * computation based on the oldest value that might be referenced in a - * table. + * NB: oldestMXact should be the oldest multixact that still exists in the + * SLRU, unlike in SetOffsetVacuumLimit, where we do this same computation + * based on the oldest value that might be referenced in a table. */ - LWLockAcquire(MultiXactGenLock, LW_SHARED); - if (MultiXactState->nextMXact == oldestMXact) - { - oldestOffset = MultiXactState->nextOffset; - LWLockRelease(MultiXactGenLock); - } + if (nextMXact == oldestMXact) + oldestOffset = nextOffset; else { - LWLockRelease(MultiXactGenLock); - oldestOffset = find_multixact_start(oldestMXact); + bool oldestOffsetKnown; + + oldestOffsetKnown = find_multixact_start(oldestMXact, &oldestOffset); + if (!oldestOffsetKnown) + { + ereport(LOG, + (errmsg("MultiXact member wraparound protections are disabled because oldest checkpointed MultiXact %u does not exist on disk", + oldestMXact))); + return; + } } /* move back to start of the corresponding segment */ - oldestOffset -= oldestOffset % - (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT); + offsetStopLimit = oldestOffset - (oldestOffset % + (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT)); + /* always leave one segment before the wraparound point */ + offsetStopLimit -= (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT); + + /* if nothing has changed, we're done */ + if (prevOffsetStopLimitKnown && offsetStopLimit == prevOffsetStopLimit) + return; LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); - /* always leave one segment before the wraparound point */ - MultiXactState->offsetStopLimit = oldestOffset - - (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT); + MultiXactState->offsetStopLimit = offsetStopLimit; + MultiXactState->offsetStopLimitKnown = true; + LWLockRelease(MultiXactGenLock); + + if (!prevOffsetStopLimitKnown && IsUnderPostmaster) + ereport(LOG, + (errmsg("MultiXact member wraparound protections are now enabled"))); + ereport(DEBUG1, + (errmsg("MultiXact member stop limit is now %u based on MultiXact %u", + offsetStopLimit, oldestMXact))); +} + +/* + * Determine how aggressively we need to vacuum in order to prevent member + * wraparound. + * + * To determine the oldest multixact ID, we look at oldestMultiXactId, not + * lastCheckpointedOldest. That's because vacuuming can't help with anything + * older than oldestMultiXactId; anything older than that isn't referenced + * by any table. Offsets older than oldestMultiXactId but not as old as + * lastCheckpointedOldest will go away after the next checkpoint. + * + * The return value is true if emergency autovacuum is required and false + * otherwise. + */ +static bool +SetOffsetVacuumLimit(bool finish_setup) +{ + MultiXactId oldestMultiXactId; + MultiXactId nextMXact; + bool finishedStartup; + MultiXactOffset oldestOffset = 0; /* placate compiler */ + MultiXactOffset nextOffset; + bool oldestOffsetKnown = false; + MultiXactOffset prevOldestOffset; + + /* Read relevant fields from shared memory. */ + LWLockAcquire(MultiXactGenLock, LW_SHARED); + oldestMultiXactId = MultiXactState->oldestMultiXactId; + nextMXact = MultiXactState->nextMXact; + nextOffset = MultiXactState->nextOffset; + finishedStartup = MultiXactState->finishedStartup; + prevOldestOffset = MultiXactState->oldestOffset; LWLockRelease(MultiXactGenLock); + + /* Don't do this until after any recovery is complete. */ + if (!finishedStartup && !finish_setup) + return false; + + /* + * If no multixacts exist, then oldestMultiXactId will be the next + * multixact that will be created, rather than an existing multixact. + */ + if (oldestMultiXactId == nextMXact) + { + /* + * When the next multixact gets created, it will be stored at the + * next offset. + */ + oldestOffset = nextOffset; + oldestOffsetKnown = true; + } + else + { + /* + * Figure out where the oldest existing multixact's offsets are stored. + * Due to bugs in early release of PostgreSQL 9.3.X and 9.4.X, the + * supposedly-earliest multixact might not really exist. We are + * careful not to fail in that case. + */ + oldestOffsetKnown = + find_multixact_start(oldestMultiXactId, &oldestOffset); + } + + /* + * Except when initializing the system for the first time, there's no + * need to update anything if we don't know the oldest offset or if it + * hasn't changed. + */ + if (finish_setup || + (oldestOffsetKnown && prevOldestOffset != oldestOffset)) + { + /* Install the new limits. */ + LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); + MultiXactState->oldestOffset = oldestOffset; + MultiXactState->oldestOffsetKnown = oldestOffsetKnown; + MultiXactState->finishedStartup = true; + LWLockRelease(MultiXactGenLock); + } + + /* + * Do we need an emergency autovacuum? If we're not sure, assume yes. + */ + return !oldestOffsetKnown || + (nextOffset - oldestOffset > MULTIXACT_MEMBER_SAFE_THRESHOLD); } /* @@ -2617,9 +2725,12 @@ MultiXactOffsetWouldWrap(MultiXactOffset boundary, MultiXactOffset start, /* * Find the starting offset of the given MultiXactId. + * + * Returns false if the file containing the multi does not exist on disk. + * Otherwise, returns true and sets *result to the starting member offset. */ -static MultiXactOffset -find_multixact_start(MultiXactId multi) +static bool +find_multixact_start(MultiXactId multi, MultiXactOffset *result) { MultiXactOffset offset; int pageno; @@ -2630,6 +2741,9 @@ find_multixact_start(MultiXactId multi) pageno = MultiXactIdToOffsetPage(multi); entryno = MultiXactIdToOffsetEntry(multi); + if (!SimpleLruDoesPhysicalPageExist(MultiXactOffsetCtl, pageno)) + return false; + /* lock is acquired by SimpleLruReadPage_ReadOnly */ slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno, multi); offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno]; @@ -2642,25 +2756,31 @@ find_multixact_start(MultiXactId multi) /* * Determine how many multixacts, and how many multixact members, currently - * exist. + * exist. Return false if unable to determine. */ -static void +static bool ReadMultiXactCounts(uint32 *multixacts, MultiXactOffset *members) { MultiXactOffset nextOffset; MultiXactOffset oldestOffset; MultiXactId oldestMultiXactId; MultiXactId nextMultiXactId; + bool oldestOffsetKnown; LWLockAcquire(MultiXactGenLock, LW_SHARED); nextOffset = MultiXactState->nextOffset; oldestMultiXactId = MultiXactState->oldestMultiXactId; nextMultiXactId = MultiXactState->nextMXact; oldestOffset = MultiXactState->oldestOffset; + oldestOffsetKnown = MultiXactState->oldestOffsetKnown; LWLockRelease(MultiXactGenLock); + if (!oldestOffsetKnown) + return false; + *members = nextOffset - oldestOffset; *multixacts = nextMultiXactId - oldestMultiXactId; + return true; } /* @@ -2702,7 +2822,9 @@ MultiXactMemberFreezeThreshold(void) uint32 victim_multixacts; double fraction; - ReadMultiXactCounts(&multixacts, &members); + /* If we can't determine member space utilization, assume the worst. */ + if (!ReadMultiXactCounts(&multixacts, &members)) + return 0; /* If member space utilization is low, no special action is required. */ if (members <= MULTIXACT_MEMBER_SAFE_THRESHOLD) @@ -2854,8 +2976,13 @@ TruncateMultiXact(void) /* * First, compute the safe truncation point for MultiXactMember. This is * the starting offset of the oldest multixact. + * + * Due to bugs in early releases of PostgreSQL 9.3.X and 9.4.X, + * oldestOffset might point to a multixact that does not exist. If so, + * don't truncate anything until that gets cleaned up. */ - oldestOffset = find_multixact_start(oldestMXact); + if (!find_multixact_start(oldestMXact, &oldestOffset)) + return; /* * To truncate MultiXactMembers, we need to figure out the active page
commit 07e698d68a83e4898ffb5270a4ec4705d072bb9f Author: Robert Haas <rhaas@postgresql.org> Date: Thu Jun 4 11:58:49 2015 -0400 bar diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 9568ff1..e4c75e4 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -198,13 +198,24 @@ typedef struct MultiXactStateData /* next-to-be-assigned offset */ MultiXactOffset nextOffset; + /* Have we completed multixact startup? */ + bool finishedStartup; + /* - * Oldest multixact that is still on disk. Anything older than this - * should not be consulted. These values are updated by vacuum. + * Oldest multixact that is still potentially referenced by a relation. + * Anything older than this should not be consulted. These values are + * updated by vacuum. */ MultiXactId oldestMultiXactId; Oid oldestMultiXactDB; + + /* + * Oldest multixact offset that is potentially referenced by a + * multixact referenced by a relation. We don't always know this value, + * so there's a flag here to indicate whether or not we currently do. + */ MultiXactOffset oldestOffset; + bool oldestOffsetKnown; /* * This is what the previous checkpoint stored as the truncate position. @@ -221,6 +232,7 @@ typedef struct MultiXactStateData /* support for members anti-wraparound measures */ MultiXactOffset offsetStopLimit; + bool offsetStopLimitKnown; /* * Per-backend data starts here. We have two arrays stored in the area @@ -350,10 +362,11 @@ static bool MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2); static void ExtendMultiXactOffset(MultiXactId multi); static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers); -static void DetermineSafeOldestOffset(MultiXactId oldestMXact); +static void DetermineSafeOldestOffset(MultiXactOffset oldestMXact); static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary, MultiXactOffset start, uint32 distance); -static MultiXactOffset find_multixact_start(MultiXactId multi); +static bool SetOffsetVacuumLimit(bool finish_setup); +static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result); static void WriteMZeroPageXlogRec(int pageno, uint8 info); @@ -955,7 +968,8 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) * against catastrophic data loss due to multixact wraparound. The basic * rules are: * - * If we're past multiVacLimit or the safe threshold for member storage space, + * If we're past multiVacLimit or the safe threshold for member storage + * space, or we don't know what the safe threshold for member storage is, * start trying to force autovacuum cycles. * If we're past multiWarnLimit, start issuing warnings. * If we're past multiStopLimit, refuse to create new MultiXactIds. @@ -964,6 +978,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) *---------- */ if (!MultiXactIdPrecedes(result, MultiXactState->multiVacLimit) || + !MultiXactState->oldestOffsetKnown || (MultiXactState->nextOffset - MultiXactState->oldestOffset > MULTIXACT_MEMBER_SAFE_THRESHOLD)) { @@ -1078,7 +1093,8 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) *---------- */ #define OFFSET_WARN_SEGMENTS 20 - if (MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, nextOffset, + if (MultiXactState->offsetStopLimitKnown && + MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, nextOffset, nmembers)) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), @@ -1090,7 +1106,8 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) MultiXactState->offsetStopLimit - nextOffset - 1), errhint("Execute a database-wide VACUUM in database with OID %u with reduced vacuum_multixact_freeze_min_age and vacuum_multixact_freeze_table_age settings.", MultiXactState->oldestMultiXactDB))); - else if (MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, + else if (MultiXactState->offsetStopLimitKnown && + MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, nextOffset, nmembers + MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT * OFFSET_WARN_SEGMENTS)) ereport(WARNING, @@ -1956,12 +1973,6 @@ StartupMultiXact(void) */ pageno = MXOffsetToMemberPage(offset); MultiXactMemberCtl->shared->latest_page_number = pageno; - - /* - * compute the oldest member we need to keep around to avoid old member - * data overrun. - */ - DetermineSafeOldestOffset(MultiXactState->oldestMultiXactId); } /* @@ -1975,6 +1986,7 @@ TrimMultiXact(void) { MultiXactId multi = MultiXactState->nextMXact; MultiXactOffset offset = MultiXactState->nextOffset; + MultiXactId oldestMXact; int pageno; int entryno; int flagsoff; @@ -2047,6 +2059,13 @@ TrimMultiXact(void) } LWLockRelease(MultiXactMemberControlLock); + + if (SetOffsetVacuumLimit(true) && IsUnderPostmaster) + SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); + LWLockAcquire(MultiXactGenLock, LW_SHARED); + oldestMXact = MultiXactState->lastCheckpointedOldest; + LWLockRelease(MultiXactGenLock); + DetermineSafeOldestOffset(oldestMXact); } /* @@ -2146,8 +2165,7 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid) MultiXactId multiStopLimit; MultiXactId multiWrapLimit; MultiXactId curMulti; - MultiXactOffset oldestOffset; - MultiXactOffset nextOffset; + bool needs_offset_vacuum; Assert(MultiXactIdIsValid(oldest_datminmxid)); @@ -2200,35 +2218,6 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid) if (multiVacLimit < FirstMultiXactId) multiVacLimit += FirstMultiXactId; - /* - * Determine the offset of the oldest multixact that might still be - * referenced. Normally, we can read the offset from the multixact - * itself, but there's an important special case: if there are no - * multixacts in existence at all, oldest_datminmxid obviously can't point - * to one. It will instead point to the multixact ID that will be - * assigned the next time one is needed. - * - * NB: oldest_dataminmxid is the oldest multixact that might still be - * referenced from a table, unlike in DetermineSafeOldestOffset, where we - * do this same computation based on the oldest value that might still - * exist in the SLRU. This is because here we're trying to compute a - * threshold for activating autovacuum, which can only remove references - * to multixacts, whereas there we are computing a threshold for creating - * new multixacts, which requires the old ones to have first been - * truncated away by a checkpoint. - */ - LWLockAcquire(MultiXactGenLock, LW_SHARED); - if (MultiXactState->nextMXact == oldest_datminmxid) - { - oldestOffset = MultiXactState->nextOffset; - LWLockRelease(MultiXactGenLock); - } - else - { - LWLockRelease(MultiXactGenLock); - oldestOffset = find_multixact_start(oldest_datminmxid); - } - /* Grab lock for just long enough to set the new limit values */ LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); MultiXactState->oldestMultiXactId = oldest_datminmxid; @@ -2237,9 +2226,7 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid) MultiXactState->multiWarnLimit = multiWarnLimit; MultiXactState->multiStopLimit = multiStopLimit; MultiXactState->multiWrapLimit = multiWrapLimit; - MultiXactState->oldestOffset = oldestOffset; curMulti = MultiXactState->nextMXact; - nextOffset = MultiXactState->nextOffset; LWLockRelease(MultiXactGenLock); /* Log the info */ @@ -2247,6 +2234,9 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid) (errmsg("MultiXactId wrap limit is %u, limited by database with OID %u", multiWrapLimit, oldest_datoid))); + /* Set limits for offset vacuum. */ + needs_offset_vacuum = SetOffsetVacuumLimit(false); + /* * If past the autovacuum force point, immediately signal an autovac * request. The reason for this is that autovac only processes one @@ -2255,8 +2245,7 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid) * another iteration immediately if there are still any old databases. */ if ((MultiXactIdPrecedes(multiVacLimit, curMulti) || - (nextOffset - oldestOffset > MULTIXACT_MEMBER_SAFE_THRESHOLD)) && - IsUnderPostmaster && !InRecovery) + needs_offset_vacuum) && IsUnderPostmaster && !InRecovery) SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); /* Give an immediate warning if past the wrap warn point */ @@ -2512,6 +2501,25 @@ static void DetermineSafeOldestOffset(MultiXactId oldestMXact) { MultiXactOffset oldestOffset; + MultiXactOffset nextOffset; + MultiXactOffset offsetStopLimit; + MultiXactOffset prevOffsetStopLimit; + MultiXactId nextMXact; + bool finishedStartup; + bool prevOffsetStopLimitKnown; + + /* Fetch values from shared memory. */ + LWLockAcquire(MultiXactGenLock, LW_SHARED); + finishedStartup = MultiXactState->finishedStartup; + nextMXact = MultiXactState->nextMXact; + nextOffset = MultiXactState->nextOffset; + prevOffsetStopLimit = MultiXactState->offsetStopLimit; + prevOffsetStopLimitKnown = MultiXactState->offsetStopLimitKnown; + LWLockRelease(MultiXactGenLock); + + /* Don't worry about this until after we've started up. */ + if (!finishedStartup) + return; /* * Determine the offset of the oldest multixact. Normally, we can read @@ -2521,30 +2529,131 @@ DetermineSafeOldestOffset(MultiXactId oldestMXact) * ID that will be assigned the next time one is needed. * * NB: oldestMXact should be the oldest multixact that still exists in the - * SLRU, unlike in SetMultiXactIdLimit, where we do this same computation + * SLRU, unlike in SetOffsetVacuumLimit, where we do this same computation * based on the oldest value that might be referenced in a table. */ - LWLockAcquire(MultiXactGenLock, LW_SHARED); - if (MultiXactState->nextMXact == oldestMXact) - { - oldestOffset = MultiXactState->nextOffset; - LWLockRelease(MultiXactGenLock); - } + if (nextMXact == oldestMXact) + oldestOffset = nextOffset; else { - LWLockRelease(MultiXactGenLock); - oldestOffset = find_multixact_start(oldestMXact); + bool oldestOffsetKnown; + + oldestOffsetKnown = find_multixact_start(oldestMXact, &oldestOffset); + if (!oldestOffsetKnown) + { + ereport(LOG, + (errmsg("MultiXact member wraparound protections are disabled because oldest checkpointed MultiXact %u does not exist on disk", + oldestMXact))); + return; + } } /* move back to start of the corresponding segment */ - oldestOffset -= oldestOffset % - (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT); + offsetStopLimit = oldestOffset - (oldestOffset % + (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT)); + /* always leave one segment before the wraparound point */ + offsetStopLimit -= (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT); + + /* if nothing has changed, we're done */ + if (prevOffsetStopLimitKnown && offsetStopLimit == prevOffsetStopLimit) + return; LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); - /* always leave one segment before the wraparound point */ - MultiXactState->offsetStopLimit = oldestOffset - - (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT); + MultiXactState->offsetStopLimit = offsetStopLimit; + MultiXactState->offsetStopLimitKnown = true; + LWLockRelease(MultiXactGenLock); + + if (!prevOffsetStopLimitKnown && IsUnderPostmaster) + ereport(LOG, + (errmsg("MultiXact member wraparound protections are now enabled"))); + ereport(DEBUG1, + (errmsg("MultiXact member stop limit is now %u based on MultiXact %u", + offsetStopLimit, oldestMXact))); +} + +/* + * Determine how aggressively we need to vacuum in order to prevent member + * wraparound. + * + * To determine the oldest multixact ID, we look at oldestMultiXactId, not + * lastCheckpointedOldest. That's because vacuuming can't help with anything + * older than oldestMultiXactId; anything older than that isn't referenced + * by any table. Offsets older than oldestMultiXactId but not as old as + * lastCheckpointedOldest will go away after the next checkpoint. + * + * The return value is true if emergency autovacuum is required and false + * otherwise. + */ +static bool +SetOffsetVacuumLimit(bool finish_setup) +{ + MultiXactId oldestMultiXactId; + MultiXactId nextMXact; + bool finishedStartup; + MultiXactOffset oldestOffset = 0; /* placate compiler */ + MultiXactOffset nextOffset; + bool oldestOffsetKnown = false; + MultiXactOffset prevOldestOffset; + + /* Read relevant fields from shared memory. */ + LWLockAcquire(MultiXactGenLock, LW_SHARED); + oldestMultiXactId = MultiXactState->oldestMultiXactId; + nextMXact = MultiXactState->nextMXact; + nextOffset = MultiXactState->nextOffset; + finishedStartup = MultiXactState->finishedStartup; + prevOldestOffset = MultiXactState->oldestOffset; LWLockRelease(MultiXactGenLock); + + /* Don't do this until after any recovery is complete. */ + if (!finishedStartup && !finish_setup) + return false; + + /* + * If no multixacts exist, then oldestMultiXactId will be the next + * multixact that will be created, rather than an existing multixact. + */ + if (oldestMultiXactId == nextMXact) + { + /* + * When the next multixact gets created, it will be stored at the + * next offset. + */ + oldestOffset = nextOffset; + oldestOffsetKnown = true; + } + else + { + /* + * Figure out where the oldest existing multixact's offsets are stored. + * Due to bugs in early release of PostgreSQL 9.3.X and 9.4.X, the + * supposedly-earliest multixact might not really exist. We are + * careful not to fail in that case. + */ + oldestOffsetKnown = + find_multixact_start(oldestMultiXactId, &oldestOffset); + } + + /* + * Except when initializing the system for the first time, there's no + * need to update anything if we don't know the oldest offset or if it + * hasn't changed. + */ + if (finish_setup || + (oldestOffsetKnown && prevOldestOffset != oldestOffset)) + { + /* Install the new limits. */ + LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); + MultiXactState->oldestOffset = oldestOffset; + MultiXactState->oldestOffsetKnown = oldestOffsetKnown; + MultiXactState->finishedStartup = true; + LWLockRelease(MultiXactGenLock); + } + + /* + * Do we need an emergency autovacuum? If we're not sure, assume yes. + */ + return !oldestOffsetKnown || + (nextOffset - oldestOffset > MULTIXACT_MEMBER_SAFE_THRESHOLD); } /* @@ -2597,9 +2706,12 @@ MultiXactOffsetWouldWrap(MultiXactOffset boundary, MultiXactOffset start, /* * Find the starting offset of the given MultiXactId. + * + * Returns false if the file containing the multi does not exist on disk. + * Otherwise, returns true and sets *result to the starting member offset. */ -static MultiXactOffset -find_multixact_start(MultiXactId multi) +static bool +find_multixact_start(MultiXactId multi, MultiXactOffset *result) { MultiXactOffset offset; int pageno; @@ -2610,6 +2722,9 @@ find_multixact_start(MultiXactId multi) pageno = MultiXactIdToOffsetPage(multi); entryno = MultiXactIdToOffsetEntry(multi); + if (!SimpleLruDoesPhysicalPageExist(MultiXactOffsetCtl, pageno)) + return false; + /* lock is acquired by SimpleLruReadPage_ReadOnly */ slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno, multi); offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno]; @@ -2622,25 +2737,31 @@ find_multixact_start(MultiXactId multi) /* * Determine how many multixacts, and how many multixact members, currently - * exist. + * exist. Return false if unable to determine. */ -static void +static bool ReadMultiXactCounts(uint32 *multixacts, MultiXactOffset *members) { MultiXactOffset nextOffset; MultiXactOffset oldestOffset; MultiXactId oldestMultiXactId; MultiXactId nextMultiXactId; + bool oldestOffsetKnown; LWLockAcquire(MultiXactGenLock, LW_SHARED); nextOffset = MultiXactState->nextOffset; oldestMultiXactId = MultiXactState->oldestMultiXactId; nextMultiXactId = MultiXactState->nextMXact; oldestOffset = MultiXactState->oldestOffset; + oldestOffsetKnown = MultiXactState->oldestOffsetKnown; LWLockRelease(MultiXactGenLock); + if (!oldestOffsetKnown) + return false; + *members = nextOffset - oldestOffset; *multixacts = nextMultiXactId - oldestMultiXactId; + return true; } /* @@ -2682,7 +2803,9 @@ MultiXactMemberFreezeThreshold(void) uint32 victim_multixacts; double fraction; - ReadMultiXactCounts(&multixacts, &members); + /* If we can't determine member space utilization, assume the worst. */ + if (!ReadMultiXactCounts(&multixacts, &members)) + return 0; /* If member space utilization is low, no special action is required. */ if (members <= MULTIXACT_MEMBER_SAFE_THRESHOLD) @@ -2834,8 +2957,13 @@ TruncateMultiXact(void) /* * First, compute the safe truncation point for MultiXactMember. This is * the starting offset of the oldest multixact. + * + * Due to bugs in early releases of PostgreSQL 9.3.X and 9.4.X, + * oldestOffset might point to a multixact that does not exist. If so, + * don't truncate anything until that gets cleaned up. */ - oldestOffset = find_multixact_start(oldestMXact); + if (!find_multixact_start(oldestMXact, &oldestOffset)) + return; /* * To truncate MultiXactMembers, we need to figure out the active page
-- Sent via pgsql-general mailing list (pgsql-general@xxxxxxxxxxxxxx) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-general