Hello. Added -hackers. At Wed, 15 Apr 2020 12:14:25 +0200, "Peter J. Holzer" <hjp-pgsql@xxxxxx> wrote in > On 2020-04-15 12:01:46 +0200, Peter J. Holzer wrote: > > I'm trying to restore a backup on a different machine and it terminates > > with the not really helpful messages: > > > > pg_restore: [directory archiver] could not close data file: Success > > pg_restore: [parallel archiver] a worker process died unexpectedly > [...] > > My guess is that maybe one of the data files is damaged > > As is often the case the matter became obvious a few minutes after > writing the mail. > > There were indeed two file with length 0 in the dump. That happened > because the backup failed because it couldn't obtain a lock on a table. > > I nicer error message (something like "cannot decompress '13503.dat.gz': > Empty file") would have helped. Unfortunately, just emptying .dat.gz file doesn't worked for me. Anyway the message is emitted the following way. pg_backup_directoy.c: > if (cfclose(cfp) !=0) > fatal("could not close data file: %m"); %m doesn't work for some kinds of errors about compressed files but cfclose conseals the true cause. I'm surprised to find an old thread about the same issue. https://www.postgresql.org/message-id/20160307.174354.251049100.horiguchi.kyotaro%40lab.ntt.co.jp But I don't think it's not acceptable that use fake errno for gzclose, but cfclose properly passes-through the error code from gzclose, so it is enought that the caller should recognize the difference. Please find the attached. regards. -- Kyotaro Horiguchi NTT Open Source Software Center
diff --git a/src/bin/pg_dump/compress_io.c b/src/bin/pg_dump/compress_io.c index 1417401086..3a8394d7f2 100644 --- a/src/bin/pg_dump/compress_io.c +++ b/src/bin/pg_dump/compress_io.c @@ -645,6 +645,13 @@ cfgets(cfp *fp, char *buf, int len) return fgets(buf, len, fp->uncompressedfp); } +/* + * cfclose close the stream + * + * Returns 0 if successfully closed the cfp. Most of errors are reported as -1 + * and errno is set. Otherwise the return value is the return value from + * gzclose and errno doesn't hold a meangful value. + */ int cfclose(cfp *fp) { @@ -665,6 +672,11 @@ cfclose(cfp *fp) #endif { result = fclose(fp->uncompressedfp); + + /* normalize error return, just in case EOF is not -1 */ + if (result != 0) + result = -1; + fp->uncompressedfp = NULL; } free_keep_errno(fp); diff --git a/src/bin/pg_dump/pg_backup_directory.c b/src/bin/pg_dump/pg_backup_directory.c index c9cce5ed8a..ecc6aa5fbb 100644 --- a/src/bin/pg_dump/pg_backup_directory.c +++ b/src/bin/pg_dump/pg_backup_directory.c @@ -108,6 +108,7 @@ void InitArchiveFmt_Directory(ArchiveHandle *AH) { lclContext *ctx; + int ret; /* Assuming static functions, this can be copied for each format. */ AH->ArchiveEntryPtr = _ArchiveEntry; @@ -218,8 +219,14 @@ InitArchiveFmt_Directory(ArchiveHandle *AH) ReadToc(AH); /* Nothing else in the file, so close it again... */ - if (cfclose(tocFH) != 0) - fatal("could not close TOC file: %m"); + ret = cfclose(tocFH); + if (ret < 0) + { + if (ret == -1) + fatal("could not close TOC file: %m"); + else + fatal("could not close TOC file: zlib error (%d)", ret); + } ctx->dataFH = NULL; } } @@ -378,6 +385,7 @@ _PrintFileData(ArchiveHandle *AH, char *filename) char *buf; size_t buflen; cfp *cfp; + int ret; if (!filename) return; @@ -396,8 +404,15 @@ _PrintFileData(ArchiveHandle *AH, char *filename) } free(buf); - if (cfclose(cfp) !=0) - fatal("could not close data file: %m"); + + ret = cfclose(cfp); + if (ret < 0) + { + if (ret == -1) + fatal("could not close data file: %m"); + else + fatal("could not close data file: zlib error (%d)", ret); + } } /* @@ -429,6 +444,7 @@ _LoadBlobs(ArchiveHandle *AH) lclContext *ctx = (lclContext *) AH->formatData; char fname[MAXPGPATH]; char line[MAXPGPATH]; + int ret; StartRestoreBlobs(AH); @@ -460,9 +476,16 @@ _LoadBlobs(ArchiveHandle *AH) fatal("error reading large object TOC file \"%s\"", fname); - if (cfclose(ctx->blobsTocFH) != 0) - fatal("could not close large object TOC file \"%s\": %m", - fname); + ret = cfclose(ctx->blobsTocFH); + if (ret < 0) + { + if (ret == -1) + fatal("could not close large object TOC file \"%s\": %m", + fname); + else + fatal("could not close large object TOC file \"%s\": zlib error (%d)", + fname, ret); + } ctx->blobsTocFH = NULL; @@ -555,6 +578,7 @@ _CloseArchive(ArchiveHandle *AH) { cfp *tocFH; char fname[MAXPGPATH]; + int ret; setFilePath(AH, fname, "toc.dat"); @@ -576,8 +600,14 @@ _CloseArchive(ArchiveHandle *AH) WriteHead(AH); AH->format = archDirectory; WriteToc(AH); - if (cfclose(tocFH) != 0) - fatal("could not close TOC file: %m"); + ret = cfclose(tocFH); + if (ret < 0) + { + if (ret == -1) + fatal("could not close TOC file: %m"); + else + fatal("could not close TOC file: zlib error (%d)", ret); + } WriteDataChunks(AH, ctx->pstate); ParallelBackupEnd(AH, ctx->pstate);