O_DIRECT has stringent requirements. Rather than make lots of changes at each site that wants to use O_DIRECT, it is easier to offload the work through a helper process that mirrors the I/O between a pipe and the actual direct fd, so that the other end of the pipe no longer has to worry about constraints. Plus, if the kernel ever gains better posix_fadvise support, then we only have to touch a single file to let all callers benefit from a more efficient way to avoid file system caching. * src/util/virfile.h (virFileDirectFdFlag, virFileDirectFdNew) (virFileDirectFdClose, virFileDirectFdFree): New prototypes. * src/util/virdirect.c: Implement new wrapper object. * src/libvirt_private.syms (virfile.h): Export new symbols. * cfg.mk (useless_free_options): Add to list. * po/POTFILES.in: Add new translations. --- v2: move everything into virfile.c rather than creating a new file, with renames everywhere. Fail rather than silently ignore systems where O_DIRECT is 0. Add comments to take advantage of posix_fadvise rather than O_DIRECT if the Linux kernel ever gets fixed. Add virFileDirectFdFlag as a better probe of which cache-bypass mechanism is actually being used. Better documentation. cfg.mk | 1 + po/POTFILES.in | 1 + src/libvirt_private.syms | 4 + src/util/virfile.c | 175 +++++++++++++++++++++++++++++++++++++++++++++- src/util/virfile.h | 15 ++++ 5 files changed, 195 insertions(+), 1 deletions(-) diff --git a/cfg.mk b/cfg.mk index 0a624f1..773a3df 100644 --- a/cfg.mk +++ b/cfg.mk @@ -119,6 +119,7 @@ useless_free_options = \ --name=virDomainSoundDefFree \ --name=virDomainVideoDefFree \ --name=virDomainWatchdogDefFree \ + --name=virFileDirectFdFree \ --name=virHashFree \ --name=virInterfaceDefFree \ --name=virInterfaceIpDefFree \ diff --git a/po/POTFILES.in b/po/POTFILES.in index 5782cbf..7bb08b0 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -122,6 +122,7 @@ src/util/storage_file.c src/util/sysinfo.c src/util/util.c src/util/viraudit.c +src/util/virfile.c src/util/virterror.c src/util/xml.c src/vbox/vbox_MSCOMGlue.c diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index d42e0a0..0bab64d 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1082,6 +1082,10 @@ virAuditSend; # virfile.h virFileClose; +virFileDirectFdClose; +virFileDirectFdFlag; +virFileDirectFdFree; +virFileDirectFdNew; virFileFclose; virFileFdopen; diff --git a/src/util/virfile.c b/src/util/virfile.c index 6576921..8b32518 100644 --- a/src/util/virfile.c +++ b/src/util/virfile.c @@ -23,10 +23,24 @@ */ #include <config.h> +#include "internal.h" +#include "virfile.h" + +#include <fcntl.h> +#include <sys/stat.h> #include <unistd.h> -#include "virfile.h" +#include "command.h" +#include "configmake.h" +#include "memory.h" +#include "virterror_internal.h" + +#define VIR_FROM_THIS VIR_FROM_NONE +#define virFileError(code, ...) \ + virReportErrorHelper(VIR_FROM_THIS, code, __FILE__, \ + __FUNCTION__, __LINE__, __VA_ARGS__) + int virFileClose(int *fdptr, bool preserve_errno) { @@ -78,3 +92,162 @@ FILE *virFileFdopen(int *fdptr, const char *mode) return file; } + + +/* Opaque type for managing a wrapper around an O_DIRECT fd. For now, + * read-write is not supported, just a single direction. */ +struct _virFileDirectFd { + virCommandPtr cmd; /* Child iohelper process to do the I/O. */ +}; + +/** + * virFileDirectFdFlag: + * + * Returns 0 if the kernel can avoid file system cache pollution + * without any additional flags, O_DIRECT if the original fd must be + * opened in direct mode, or -1 if there is no support for bypassing + * the file system cache. + */ +int +virFileDirectFdFlag(void) +{ + /* XXX For now, Linux posix_fadvise is not powerful enough to + * avoid O_DIRECT. */ + return O_DIRECT ? O_DIRECT : -1; +} + +/** + * virFileDirectFdNew: + * @fd: pointer to fd to wrap + * @name: name of fd, for diagnostics + * + * Update *FD (created with virFileDirectFdFlag() among the flags to + * open()) to ensure that all I/O to that file will bypass the system + * cache. This must be called after open() and optional fchown() or + * fchmod(), but before any seek or I/O, and only on seekable fd. The + * file must be O_RDONLY (to read the entire existing file) or + * O_WRONLY (to write to an empty file). In some cases, *FD is + * changed to a non-seekable pipe; in this case, the caller must not + * do anything further with the original fd. + * + * On success, the new wrapper object is returned, which must be later + * freed with virFileDirectFdFree(). On failure, *FD is unchanged, an + * error message is output, and NULL is returned. + */ +virFileDirectFdPtr +virFileDirectFdNew(int *fd, const char *name) +{ + virFileDirectFdPtr ret = NULL; + bool output = false; + int pipefd[2] = { -1, -1 }; + int mode = -1; + + /* XXX support posix_fadvise rather than spawning a child, if the + * kernel support for that is decent enough. */ + + if (!O_DIRECT) { + virFileError(VIR_ERR_INTERNAL_ERROR, "%s", + _("O_DIRECT unsupported on this platform")); + return NULL; + } + + if (VIR_ALLOC(ret) < 0) { + virReportOOMError(); + return NULL; + } + +#ifdef F_GETFL + /* Mingw lacks F_GETFL, but it also lacks O_DIRECT so didn't get + * here in the first place. All other platforms reach this + * line. */ + mode = fcntl(*fd, F_GETFL); +#endif + + if (mode < 0) { + virFileError(VIR_ERR_INTERNAL_ERROR, _("invalid fd %d for %s"), + *fd, name); + goto error; + } else if ((mode & O_ACCMODE) == O_WRONLY) { + output = true; + } else if ((mode & O_ACCMODE) != O_RDONLY) { + virFileError(VIR_ERR_INTERNAL_ERROR, _("unexpected mode %x for %s"), + mode & O_ACCMODE, name); + goto error; + } + + if (pipe2(pipefd, O_CLOEXEC) < 0) { + virFileError(VIR_ERR_INTERNAL_ERROR, + _("unable to create pipe for %s"), name); + goto error; + } + + ret->cmd = virCommandNewArgList(LIBEXECDIR "/libvirt_iohelper", + name, "0", NULL); + if (output) { + virCommandSetInputFD(ret->cmd, pipefd[0]); + virCommandSetOutputFD(ret->cmd, fd); + virCommandAddArg(ret->cmd, "1"); + } else { + virCommandSetInputFD(ret->cmd, *fd); + virCommandSetOutputFD(ret->cmd, &pipefd[1]); + virCommandAddArg(ret->cmd, "0"); + } + + if (virCommandRunAsync(ret->cmd, NULL) < 0) + goto error; + + if (VIR_CLOSE(pipefd[!output]) < 0) { + virFileError(VIR_ERR_INTERNAL_ERROR, "%s", _("unable to close pipe")); + goto error; + } + + VIR_FORCE_CLOSE(*fd); + *fd = pipefd[output]; + return ret; + +error: + VIR_FORCE_CLOSE(pipefd[0]); + VIR_FORCE_CLOSE(pipefd[1]); + virFileDirectFdFree(ret); + return NULL; +} + +/** + * virFileDirectFdClose: + * @dfd: direct fd wrapper, or NULL + * + * If DFD is valid, then ensure that I/O has completed, which may + * include reaping a child process. Return 0 if all data for the + * wrapped fd is complete, or -1 on failure with an error emitted. + * This function intentionally returns 0 when DFD is NULL, so that + * callers can conditionally create a virFileDirectFd wrapper but + * unconditionally call the cleanup code. To avoid deadlock, only + * call this after closing the fd resulting from virFileDirectFdNew(). + */ +int +virFileDirectFdClose(virFileDirectFdPtr dfd) +{ + if (!dfd) + return 0; + + return virCommandWait(dfd->cmd, NULL); +} + +/** + * virFileDirectFdFree: + * @dfd: direct fd wrapper, or NULL + * + * Free all remaining resources associated with DFD. If + * virFileDirectFdClose() was not previously called, then this may + * discard some previous I/O. To avoid deadlock, only call this after + * closing the fd resulting from virFileDirectFdNew(). + */ +void +virFileDirectFdFree(virFileDirectFdPtr dfd) +{ + if (!dfd) + return; + + virCommandFree(dfd->cmd); + VIR_FREE(dfd); +} diff --git a/src/util/virfile.h b/src/util/virfile.h index d11f902..0906568 100644 --- a/src/util/virfile.h +++ b/src/util/virfile.h @@ -50,4 +50,19 @@ FILE *virFileFdopen(int *fdptr, const char *mode) ATTRIBUTE_RETURN_CHECK; # define VIR_FORCE_CLOSE(FD) ignore_value(virFileClose(&(FD), true)) # define VIR_FORCE_FCLOSE(FILE) ignore_value(virFileFclose(&(FILE), true)) +/* Opaque type for managing a wrapper around an O_DIRECT fd. */ +struct _virFileDirectFd; + +typedef struct _virFileDirectFd virFileDirectFd; +typedef virFileDirectFd *virFileDirectFdPtr; + +int virFileDirectFdFlag(void); + +virFileDirectFdPtr virFileDirectFdNew(int *fd, const char *name) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK; + +int virFileDirectFdClose(virFileDirectFdPtr dfd); + +void virFileDirectFdFree(virFileDirectFdPtr dfd); + #endif /* __VIR_FILES_H */ -- 1.7.4.4 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list