On Monday 28 December 2009 23:54:51 Andres Freund wrote: > On Saturday 12 December 2009 21:38:41 Andres Freund wrote: > > On Saturday 12 December 2009 21:36:27 Michael Clemmons wrote: > > > If ppl think its worth it I'll create a ticket > > > > Thanks, no need. I will post a patch tomorrow or so. > > Well. It was a long day... > > Anyway. > In this patch I delay the fsync done in copy_file and simply do a second > pass over the directory in copy_dir and fsync everything in that pass. > Including the directory - which was not done before and actually might be > necessary in some cases. > I added a posix_fadvise(..., FADV_DONTNEED) to make it more likely that the > copied file reaches storage before the fsync. Without the speed benefits > were quite a bit smaller and essentially random (which seems sensible). > > This speeds up CREATE DATABASE from ~9 seconds to something around 0.8s on > my laptop. Still slower than with fsync off (~0.25) but quite a worthy > improvement. > > The benefits are obviously bigger if the template database includes > anything added. Obviously the patch would be helpfull. Andres
From bd80748883d1328a71607a447677b0bfb1f54ab0 Mon Sep 17 00:00:00 2001 From: Andres Freund <andres@xxxxxxxxxxx> Date: Mon, 28 Dec 2009 23:43:57 +0100 Subject: [PATCH] Delay fsyncing files during copying in CREATE DATABASE - this dramatically speeds up CREATE DATABASE on non battery backed rotational storage. Additionally fsync() the directory to ensure all metadata reaches storage. --- src/port/copydir.c | 58 +++++++++++++++++++++++++++++++++++++++++++++------ 1 files changed, 51 insertions(+), 7 deletions(-) diff --git a/src/port/copydir.c b/src/port/copydir.c index a70477e..cde3dc7 100644 *** a/src/port/copydir.c --- b/src/port/copydir.c *************** *** 37,42 **** --- 37,43 ---- static void copy_file(char *fromfile, char *tofile); + static void fsync_fname(char *fname); /* *************** copydir(char *fromdir, char *todir, bool *** 64,69 **** --- 65,73 ---- (errcode_for_file_access(), errmsg("could not open directory \"%s\": %m", fromdir))); + /* + * Copy all the files + */ while ((xlde = ReadDir(xldir, fromdir)) != NULL) { struct stat fst; *************** copydir(char *fromdir, char *todir, bool *** 89,96 **** else if (S_ISREG(fst.st_mode)) copy_file(fromfile, tofile); } - FreeDir(xldir); } /* --- 93,120 ---- else if (S_ISREG(fst.st_mode)) copy_file(fromfile, tofile); } FreeDir(xldir); + + /* + * Be paranoid here and fsync all files to ensure we catch problems. + */ + xldir = AllocateDir(fromdir); + if (xldir == NULL) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open directory \"%s\": %m", fromdir))); + + while ((xlde = ReadDir(xldir, fromdir)) != NULL) + { + struct stat fst; + + if (strcmp(xlde->d_name, ".") == 0 || + strcmp(xlde->d_name, "..") == 0) + continue; + + snprintf(tofile, MAXPGPATH, "%s/%s", todir, xlde->d_name); + fsync_fname(tofile); + } } /* *************** copy_file(char *fromfile, char *tofile) *** 150,162 **** } /* ! * Be paranoid here to ensure we catch problems. */ ! if (pg_fsync(dstfd) != 0) ! ereport(ERROR, ! (errcode_for_file_access(), ! errmsg("could not fsync file \"%s\": %m", tofile))); ! if (close(dstfd)) ereport(ERROR, (errcode_for_file_access(), --- 174,185 ---- } /* ! * We tell the kernel here to write the data back in order to make ! * the later fsync cheaper. */ ! #if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED) ! posix_fadvise(dstfd, 0, 0, POSIX_FADV_DONTNEED); ! #endif if (close(dstfd)) ereport(ERROR, (errcode_for_file_access(), *************** copy_file(char *fromfile, char *tofile) *** 166,168 **** --- 189,212 ---- pfree(buffer); } + + /* + * fsync a file + */ + static void + fsync_fname(char *fname) + { + int fd = BasicOpenFile(fname, O_RDWR| PG_BINARY, + S_IRUSR | S_IWUSR); + + if (fd < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create file \"%s\": %m", fname))); + + if (pg_fsync(fd) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", fname))); + close(fd); + } -- 1.6.5.12.gd65df24
-- Sent via pgsql-performance mailing list (pgsql-performance@xxxxxxxxxxxxxx) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-performance