Hi, On 2023-05-08 17:46:37 -0700, Andres Freund wrote: > My current gut feeling is that we should use datconnlimit == -2 to prevent > connections after reaching DropDatabaseBuffers() in dropdb(), and use a new > column in 16, for both createdb() and dropdb(). Attached is a rough prototype of that idea (only using datconnlimit == -2 for now). This would need a fair bit more polish. The tests are crappy and output stuff to stderr and don't validate enough. The error messages are bad. No docs changes. More comments about why datconnlimit == -2 is used. Etc. But I think it should be sufficient to discuss whether this is a viable path. I guess we need to move this to -hackers. Perhaps I'll post subsequent versions below https://www.postgresql.org/message-id/20230314174521.74jl6ffqsee5mtug%40awork3.anarazel.de ? Greetings, Andres Freund
>From 5784ba0b21eaf05c2989c57ce1f73c13edf0111e Mon Sep 17 00:00:00 2001 From: Andres Freund <andres@xxxxxxxxxxx> Date: Mon, 8 May 2023 18:28:33 -0700 Subject: [PATCH v1] Handle interrupted DROP DATABASE Author: Reviewed-by: Discussion: https://postgr.es/m/20230509004637.cgvmfwrbht7xm7p6@xxxxxxxxxxxxxxxxxx Backpatch: --- src/include/catalog/pg_database.h | 2 +- src/backend/commands/dbcommands.c | 50 ++++++++++++++------- src/backend/utils/init/postinit.c | 11 +++++ src/test/recovery/meson.build | 1 + src/test/recovery/t/037_invalid_database.pl | 48 ++++++++++++++++++++ 5 files changed, 95 insertions(+), 17 deletions(-) create mode 100644 src/test/recovery/t/037_invalid_database.pl diff --git a/src/include/catalog/pg_database.h b/src/include/catalog/pg_database.h index d004f4dc8aa..5cd972da324 100644 --- a/src/include/catalog/pg_database.h +++ b/src/include/catalog/pg_database.h @@ -49,7 +49,7 @@ CATALOG(pg_database,1262,DatabaseRelationId) BKI_SHARED_RELATION BKI_ROWTYPE_OID /* new connections allowed? */ bool datallowconn; - /* max connections allowed (-1=no limit) */ + /* max connections allowed (-1=no limit, -2=invalid database) */ int32 datconnlimit; /* all Xids < this are frozen in this DB */ diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 2e242eeff24..a0360d9ad80 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -1569,6 +1569,7 @@ dropdb(const char *dbname, bool missing_ok, bool force) bool db_istemplate; Relation pgdbrel; HeapTuple tup; + Form_pg_database datform; int notherbackends; int npreparedxacts; int nslots, @@ -1684,17 +1685,6 @@ dropdb(const char *dbname, bool missing_ok, bool force) dbname), errdetail_busy_db(notherbackends, npreparedxacts))); - /* - * Remove the database's tuple from pg_database. - */ - tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(db_id)); - if (!HeapTupleIsValid(tup)) - elog(ERROR, "cache lookup failed for database %u", db_id); - - CatalogTupleDelete(pgdbrel, &tup->t_self); - - ReleaseSysCache(tup); - /* * Delete any comments or security labels associated with the database. */ @@ -1711,6 +1701,31 @@ dropdb(const char *dbname, bool missing_ok, bool force) */ dropDatabaseDependencies(db_id); + /* + * Tell the cumulative stats system to forget it immediately, too. + */ + pgstat_drop_database(db_id); + + tup = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(db_id)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for database %u", db_id); + datform = (Form_pg_database) GETSTRUCT(tup); + + /* + * Subsequent actions are not transactional (consider + * DropDatabaseBuffers() discarding modified buffers). But we might crash + * or get interrupted below. To prevent accessing a database with invalid + * contents, mark the database as invalid using an in-place update. + */ + datform->datconnlimit = -2; + heap_inplace_update(pgdbrel, tup); + + /* + * Also delete the tuple - transactionally. If this transaction commits, + * the row will be gone, but if we fail, dropdb() can be invoked again. + */ + CatalogTupleDelete(pgdbrel, &tup->t_self); + /* * Drop db-specific replication slots. */ @@ -1723,11 +1738,6 @@ dropdb(const char *dbname, bool missing_ok, bool force) */ DropDatabaseBuffers(db_id); - /* - * Tell the cumulative stats system to forget it immediately, too. - */ - pgstat_drop_database(db_id); - /* * Tell checkpointer to forget any pending fsync and unlink requests for * files in the database; else the fsyncs will fail at next checkpoint, or @@ -2339,6 +2349,14 @@ AlterDatabase(ParseState *pstate, AlterDatabaseStmt *stmt, bool isTopLevel) datform = (Form_pg_database) GETSTRUCT(tuple); dboid = datform->oid; + if (datform->datconnlimit == -2) + { + ereport(FATAL, + errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("database \"%s\" is invalid", stmt->dbname), + errdetail("Use DROP DATABASE to drop invalid databases")); + } + if (!object_ownercheck(DatabaseRelationId, dboid, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE, stmt->dbname); diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 53420f4974f..876b5cc1fec 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -1120,6 +1120,7 @@ InitPostgres(const char *in_dbname, Oid dboid, if (!bootstrap) { HeapTuple tuple; + Form_pg_database datform; tuple = GetDatabaseTuple(dbname); if (!HeapTupleIsValid(tuple) || @@ -1129,6 +1130,16 @@ InitPostgres(const char *in_dbname, Oid dboid, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("It seems to have just been dropped or renamed."))); + + datform = (Form_pg_database) GETSTRUCT(tuple); + + if (datform->datconnlimit == -2) + { + ereport(FATAL, + errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("database \"%s\" is invalid", dbname), + errdetail("Use DROP DATABASE to drop invalid databases")); + } } /* diff --git a/src/test/recovery/meson.build b/src/test/recovery/meson.build index 20089580100..e7328e48944 100644 --- a/src/test/recovery/meson.build +++ b/src/test/recovery/meson.build @@ -42,6 +42,7 @@ tests += { 't/034_create_database.pl', 't/035_standby_logical_decoding.pl', 't/036_truncated_dropped.pl', + 't/037_invalid_database.pl', ], }, } diff --git a/src/test/recovery/t/037_invalid_database.pl b/src/test/recovery/t/037_invalid_database.pl new file mode 100644 index 00000000000..af309601b3b --- /dev/null +++ b/src/test/recovery/t/037_invalid_database.pl @@ -0,0 +1,48 @@ +# Copyright (c) 2023, PostgreSQL Global Development Group +# +# Test we handle interrupted DROP DATABASE correctly. + +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +my $node = PostgreSQL::Test::Cluster->new('node'); +$node->init; +$node->start; + + +# First verify that we can't connect to or ALTER an invalid database. Just +# mark the database as invalid ourselves, thats a lot easier than hitting the +# required race conditions. +# +# FIXME: The test always outputs stuff to STDERR, that needs to be fixed. Also +# likely worth ensuring that the expected errors are generated. + +$node->safe_psql("postgres", qq( +CREATE DATABASE invalid; +UPDATE pg_database SET datconnlimit = -2 WHERE datname = 'invalid'; +)); + +ok($node->psql('invalid', '') == 2, + "can't connect to invalid database"); + +diag $node->psql('postgres', 'ALTER DATABASE invalid CONNECTION LIMIT 10'); +ok($node->psql('postgres', 'ALTER DATABASE invalid CONNECTION LIMIT 10') == 2, + "can't ALTER invalid database"); + +# But we need to be able to drop an invalid database. +ok($node->psql('postgres', 'DROP DATABASE invalid') == 0, + "can DROP invalid database"); + +# Ensure database is gone +diag $node->psql('postgres', 'DROP DATABASE invalid'); +ok($node->psql('postgres', 'DROP DATABASE invalid') == 3, + "can't drop already dropped database"); + + +# FIXME: It'd be good to test the actual interruption path. But it's not +# immediately obvious how. + +done_testing(); -- 2.38.0