On Thu, Jul 14, 2011 at 08:24:33AM -0600, Eric Blake wrote: > Required for a coming patch where iohelper will operate on O_DIRECT > fds. There, the user-space memory must be aligned to file system > boundaries (at least page-aligned works best, and some file systems > prefer 64k). Made tougher by the fact that VIR_ALLOC won't work > on void *, but posix_memalign won't work on char * and isn't > available everywhere. > > This patch makes some simplifying assumptions - namely, output > to an O_DIRECT fd will only be attempted on an empty seekable > file (hence, no need to worry about preserving existing data > on a partial block, and ftruncate will work to undo the effects > of having to round up the size of the last block written). > > * configure.ac (AC_CHECK_FUNCS_ONCE): Check for posix_memalign. > * src/util/iohelper.c (runIO): Use aligned memory, and handle > quirks of O_DIRECT on last write. > --- > > This one took me the longest time to get right, so a careful > review is appreciated. > > configure.ac | 6 +++--- > src/util/iohelper.c | 45 ++++++++++++++++++++++++++++++++++++++++++--- > 2 files changed, 45 insertions(+), 6 deletions(-) > > diff --git a/configure.ac b/configure.ac > index e9d5be4..9e39f44 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -121,9 +121,9 @@ AC_CHECK_SIZEOF([long]) > > dnl Availability of various common functions (non-fatal if missing), > dnl and various less common threadsafe functions > -AC_CHECK_FUNCS_ONCE([cfmakeraw regexec sched_getaffinity getuid getgid \ > - geteuid initgroups posix_fallocate mmap kill \ > - getmntent_r getgrnam_r getpwuid_r]) > +AC_CHECK_FUNCS_ONCE([cfmakeraw geteuid getgid getgrnam_r getmntent_r \ > + getpwuid_r getuid initgroups kill mmap posix_fallocate posix_memalign \ > + regexec sched_getaffinity]) > > dnl Availability of pthread functions (if missing, win32 threading is > dnl assumed). Because of $LIB_PTHREAD, we cannot use AC_CHECK_FUNCS_ONCE. > diff --git a/src/util/iohelper.c b/src/util/iohelper.c > index 1185bde..82f62e1 100644 > --- a/src/util/iohelper.c > +++ b/src/util/iohelper.c > @@ -74,17 +74,32 @@ cleanup: > static int > runIO(const char *path, int fd, int oflags, unsigned long long length) > { > - char *buf = NULL; > + void *base = NULL; /* Location to be freed */ > + char *buf = NULL; /* Aligned location within base */ > size_t buflen = 1024*1024; > + intptr_t alignMask = 64*1024 - 1; > int ret = -1; > int fdin, fdout; > const char *fdinname, *fdoutname; > unsigned long long total = 0; > + bool direct = O_DIRECT && ((oflags & O_DIRECT) != 0); > + bool shortRead = false; /* true if we hit a short read */ > + off_t end = 0; > > - if (VIR_ALLOC_N(buf, buflen) < 0) { > +#if HAVE_POSIX_MEMALIGN > + if (posix_memalign(&base, alignMask + 1, buflen)) { > virReportOOMError(); > goto cleanup; > } > + buf = base; > +#else > + if (VIR_ALLOC_N(buf, buflen + alignMask) < 0) { > + virReportOOMError(); > + goto cleanup; > + } > + base = buf; > + buf = (char *) (((intptr_t) base + alignMask) & alignMask); > +#endif > > switch (oflags & O_ACCMODE) { > case O_RDONLY: > @@ -98,6 +113,13 @@ runIO(const char *path, int fd, int oflags, unsigned long long length) > fdinname = "stdin"; > fdout = fd; > fdoutname = path; > + /* To make the implementation simpler, we give up on any > + * attempt to use O_DIRECT in a non-trivial manner. */ > + if (direct && (end = lseek(fd, 0, SEEK_END)) != 0) { > + virReportSystemError(end < 0 ? errno : EINVAL, "%s", > + _("O_DIRECT needs empty seekable file")); > + goto cleanup; > + } > break; > > case O_RDWR: > @@ -124,12 +146,29 @@ runIO(const char *path, int fd, int oflags, unsigned long long length) > } > if (got == 0) > break; /* End of file before end of requested data */ > + if (got < buflen || (buflen & alignMask)) { > + /* O_DIRECT can handle at most one short read, at end of file */ > + if (direct && shortRead) { > + virReportSystemError(EINVAL, "%s", > + _("Too many short reads for O_DIRECT")); > + } > + shortRead = true; > + } > > total += got; > + if (fdout == fd && direct && shortRead) { > + end = total; > + memset(buf + got, 0, buflen - got); > + got = (got + alignMask) & ~alignMask; > + } > if (safewrite(fdout, buf, got) < 0) { > virReportSystemError(errno, _("Unable to write %s"), fdoutname); > goto cleanup; > } > + if (end && ftruncate(fd, end) < 0) { > + virReportSystemError(errno, _("Unable to truncate %s"), fdoutname); > + goto cleanup; > + } > } > > ret = 0; > @@ -141,7 +180,7 @@ cleanup: > ret = -1; > } > > - VIR_FREE(buf); > + VIR_FREE(base); > return ret; > } ACK, I'm wondering if we should move the guts of iohelper out into a function in src/util which we can unit test. Or perhaps its easy enough to just unit test the fdstream code and thus iohelper Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :| -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list