Search Postgresql Archives

Re: "PANIC: could not open critical system index 2662" - twice

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

On 2023-05-08 17:46:37 -0700, Andres Freund wrote:
> My current gut feeling is that we should use datconnlimit == -2 to prevent
> connections after reaching DropDatabaseBuffers() in dropdb(), and use a new
> column in 16, for both createdb() and dropdb().

Attached is a rough prototype of that idea (only using datconnlimit == -2 for
now).

This would need a fair bit more polish. The tests are crappy and output stuff
to stderr and don't validate enough. The error messages are bad. No docs
changes. More comments about why datconnlimit == -2 is used. Etc.

But I think it should be sufficient to discuss whether this is a viable path.


I guess we need to move this to -hackers. Perhaps I'll post subsequent
versions below
https://www.postgresql.org/message-id/20230314174521.74jl6ffqsee5mtug%40awork3.anarazel.de ?

Greetings,

Andres Freund
>From 5784ba0b21eaf05c2989c57ce1f73c13edf0111e Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@xxxxxxxxxxx>
Date: Mon, 8 May 2023 18:28:33 -0700
Subject: [PATCH v1] Handle interrupted DROP DATABASE

Author:
Reviewed-by:
Discussion: https://postgr.es/m/20230509004637.cgvmfwrbht7xm7p6@xxxxxxxxxxxxxxxxxx
Backpatch:
---
 src/include/catalog/pg_database.h           |  2 +-
 src/backend/commands/dbcommands.c           | 50 ++++++++++++++-------
 src/backend/utils/init/postinit.c           | 11 +++++
 src/test/recovery/meson.build               |  1 +
 src/test/recovery/t/037_invalid_database.pl | 48 ++++++++++++++++++++
 5 files changed, 95 insertions(+), 17 deletions(-)
 create mode 100644 src/test/recovery/t/037_invalid_database.pl

diff --git a/src/include/catalog/pg_database.h b/src/include/catalog/pg_database.h
index d004f4dc8aa..5cd972da324 100644
--- a/src/include/catalog/pg_database.h
+++ b/src/include/catalog/pg_database.h
@@ -49,7 +49,7 @@ CATALOG(pg_database,1262,DatabaseRelationId) BKI_SHARED_RELATION BKI_ROWTYPE_OID
 	/* new connections allowed? */
 	bool		datallowconn;
 
-	/* max connections allowed (-1=no limit) */
+	/* max connections allowed (-1=no limit, -2=invalid database) */
 	int32		datconnlimit;
 
 	/* all Xids < this are frozen in this DB */
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 2e242eeff24..a0360d9ad80 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -1569,6 +1569,7 @@ dropdb(const char *dbname, bool missing_ok, bool force)
 	bool		db_istemplate;
 	Relation	pgdbrel;
 	HeapTuple	tup;
+	Form_pg_database datform;
 	int			notherbackends;
 	int			npreparedxacts;
 	int			nslots,
@@ -1684,17 +1685,6 @@ dropdb(const char *dbname, bool missing_ok, bool force)
 						dbname),
 				 errdetail_busy_db(notherbackends, npreparedxacts)));
 
-	/*
-	 * Remove the database's tuple from pg_database.
-	 */
-	tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(db_id));
-	if (!HeapTupleIsValid(tup))
-		elog(ERROR, "cache lookup failed for database %u", db_id);
-
-	CatalogTupleDelete(pgdbrel, &tup->t_self);
-
-	ReleaseSysCache(tup);
-
 	/*
 	 * Delete any comments or security labels associated with the database.
 	 */
@@ -1711,6 +1701,31 @@ dropdb(const char *dbname, bool missing_ok, bool force)
 	 */
 	dropDatabaseDependencies(db_id);
 
+	/*
+	 * Tell the cumulative stats system to forget it immediately, too.
+	 */
+	pgstat_drop_database(db_id);
+
+	tup = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(db_id));
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "cache lookup failed for database %u", db_id);
+	datform = (Form_pg_database) GETSTRUCT(tup);
+
+	/*
+	 * Subsequent actions are not transactional (consider
+	 * DropDatabaseBuffers() discarding modified buffers). But we might crash
+	 * or get interrupted below. To prevent accessing a database with invalid
+	 * contents, mark the database as invalid using an in-place update.
+	 */
+	datform->datconnlimit = -2;
+	heap_inplace_update(pgdbrel, tup);
+
+	/*
+	 * Also delete the tuple - transactionally. If this transaction commits,
+	 * the row will be gone, but if we fail, dropdb() can be invoked again.
+	 */
+	CatalogTupleDelete(pgdbrel, &tup->t_self);
+
 	/*
 	 * Drop db-specific replication slots.
 	 */
@@ -1723,11 +1738,6 @@ dropdb(const char *dbname, bool missing_ok, bool force)
 	 */
 	DropDatabaseBuffers(db_id);
 
-	/*
-	 * Tell the cumulative stats system to forget it immediately, too.
-	 */
-	pgstat_drop_database(db_id);
-
 	/*
 	 * Tell checkpointer to forget any pending fsync and unlink requests for
 	 * files in the database; else the fsyncs will fail at next checkpoint, or
@@ -2339,6 +2349,14 @@ AlterDatabase(ParseState *pstate, AlterDatabaseStmt *stmt, bool isTopLevel)
 	datform = (Form_pg_database) GETSTRUCT(tuple);
 	dboid = datform->oid;
 
+	if (datform->datconnlimit == -2)
+	{
+		ereport(FATAL,
+				errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				errmsg("database \"%s\" is invalid", stmt->dbname),
+				errdetail("Use DROP DATABASE to drop invalid databases"));
+	}
+
 	if (!object_ownercheck(DatabaseRelationId, dboid, GetUserId()))
 		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
 					   stmt->dbname);
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 53420f4974f..876b5cc1fec 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -1120,6 +1120,7 @@ InitPostgres(const char *in_dbname, Oid dboid,
 	if (!bootstrap)
 	{
 		HeapTuple	tuple;
+		Form_pg_database datform;
 
 		tuple = GetDatabaseTuple(dbname);
 		if (!HeapTupleIsValid(tuple) ||
@@ -1129,6 +1130,16 @@ InitPostgres(const char *in_dbname, Oid dboid,
 					(errcode(ERRCODE_UNDEFINED_DATABASE),
 					 errmsg("database \"%s\" does not exist", dbname),
 					 errdetail("It seems to have just been dropped or renamed.")));
+
+		datform = (Form_pg_database) GETSTRUCT(tuple);
+
+		if (datform->datconnlimit == -2)
+		{
+			ereport(FATAL,
+					errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					errmsg("database \"%s\" is invalid", dbname),
+					 errdetail("Use DROP DATABASE to drop invalid databases"));
+		}
 	}
 
 	/*
diff --git a/src/test/recovery/meson.build b/src/test/recovery/meson.build
index 20089580100..e7328e48944 100644
--- a/src/test/recovery/meson.build
+++ b/src/test/recovery/meson.build
@@ -42,6 +42,7 @@ tests += {
       't/034_create_database.pl',
       't/035_standby_logical_decoding.pl',
       't/036_truncated_dropped.pl',
+      't/037_invalid_database.pl',
     ],
   },
 }
diff --git a/src/test/recovery/t/037_invalid_database.pl b/src/test/recovery/t/037_invalid_database.pl
new file mode 100644
index 00000000000..af309601b3b
--- /dev/null
+++ b/src/test/recovery/t/037_invalid_database.pl
@@ -0,0 +1,48 @@
+# Copyright (c) 2023, PostgreSQL Global Development Group
+#
+# Test we handle interrupted DROP DATABASE correctly.
+
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+my $node = PostgreSQL::Test::Cluster->new('node');
+$node->init;
+$node->start;
+
+
+# First verify that we can't connect to or ALTER an invalid database. Just
+# mark the database as invalid ourselves, thats a lot easier than hitting the
+# required race conditions.
+#
+# FIXME: The test always outputs stuff to STDERR, that needs to be fixed. Also
+# likely worth ensuring that the expected errors are generated.
+
+$node->safe_psql("postgres", qq(
+CREATE DATABASE invalid;
+UPDATE pg_database SET datconnlimit = -2 WHERE datname = 'invalid';
+));
+
+ok($node->psql('invalid', '') == 2,
+   "can't connect to invalid database");
+
+diag $node->psql('postgres', 'ALTER DATABASE invalid CONNECTION LIMIT 10');
+ok($node->psql('postgres', 'ALTER DATABASE invalid CONNECTION LIMIT 10') == 2,
+   "can't ALTER invalid database");
+
+# But we need to be able to drop an invalid database.
+ok($node->psql('postgres', 'DROP DATABASE invalid') == 0,
+   "can DROP invalid database");
+
+# Ensure database is gone
+diag $node->psql('postgres', 'DROP DATABASE invalid');
+ok($node->psql('postgres', 'DROP DATABASE invalid') == 3,
+   "can't drop already dropped database");
+
+
+# FIXME: It'd be good to test the actual interruption path. But it's not
+# immediately obvious how.
+
+done_testing();
-- 
2.38.0


[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Postgresql Jobs]     [Postgresql Admin]     [Postgresql Performance]     [Linux Clusters]     [PHP Home]     [PHP on Windows]     [Kernel Newbies]     [PHP Classes]     [PHP Databases]     [Postgresql & PHP]     [Yosemite]

  Powered by Linux