Sanlock is a project that implements a disk-paxos locking algorithm. This is suitable for cluster deployments with shared storage. * src/Makefile.am: Add dlopen plugin for sanlock * src/locking/lock_driver_sanlock.c: Sanlock driver --- libvirt.spec.in | 11 + po/POTFILES.in | 1 + src/Makefile.am | 12 + src/libvirt_private.syms | 1 + src/locking/lock_driver_sanlock.c | 413 +++++++++++++++++++++++++++++++++++++ 5 files changed, 438 insertions(+), 0 deletions(-) create mode 100644 src/locking/lock_driver_sanlock.c diff --git a/libvirt.spec.in b/libvirt.spec.in index e85f68f..73213ea 100644 --- a/libvirt.spec.in +++ b/libvirt.spec.in @@ -77,6 +77,7 @@ %define with_dtrace 0%{!?_without_dtrace:0} %define with_cgconfig 0%{!?_without_cgconfig:0} %define with_referential 0%{!?_without_referential:1} +%define with_sanlock 0%{!?_without_sanlock:0} # Non-server/HV driver defaults which are always enabled %define with_python 0%{!?_without_python:1} @@ -180,6 +181,7 @@ %if 0%{?fedora} >= 13 || 0%{?rhel} >= 6 %define with_dtrace 1 +%define with_sanlock 1 %endif # Pull in cgroups config system @@ -435,6 +437,9 @@ BuildRequires: systemtap-sdt-devel %if %{with_referential} BuildRequires: referential-devel %endif +%if %{with_sanlock} +BuildRequires: sanlock-devel +%endif %if %{with_storage_fs} # For mount/umount in FS driver @@ -718,6 +723,8 @@ rm -f $RPM_BUILD_ROOT%{_libdir}/*.la rm -f $RPM_BUILD_ROOT%{_libdir}/*.a rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.la rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.a +rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.la +rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.a %if %{with_network} install -d -m 0755 $RPM_BUILD_ROOT%{_datadir}/lib/libvirt/dnsmasq/ @@ -1004,6 +1011,10 @@ fi %attr(0755, root, root) %{_libexecdir}/libvirt_lxc %endif +%if %{with_sanlock} +%attr(0755, root, root) %{_libdir}/libvirt/lock-driver/sanlock.so +%endif + %attr(0755, root, root) %{_libexecdir}/libvirt_parthelper %attr(0755, root, root) %{_libexecdir}/libvirt_iohelper %attr(0755, root, root) %{_sbindir}/libvirtd diff --git a/po/POTFILES.in b/po/POTFILES.in index 9c3d287..c3b45f9 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -31,6 +31,7 @@ src/fdstream.c src/interface/netcf_driver.c src/internal.h src/libvirt.c +src/locking/lock_driver_sanlock.c src/locking/lock_manager.c src/lxc/lxc_container.c src/lxc/lxc_conf.c diff --git a/src/Makefile.am b/src/Makefile.am index 1e5a72e..edf017d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -99,6 +99,9 @@ DRIVER_SOURCES = \ locking/lock_driver_nop.h locking/lock_driver_nop.c \ locking/domain_lock.h locking/domain_lock.c +LOCK_DRIVER_SANLOCK_SOURCES = \ + locking/lock_driver_sanlock.c + # XML configuration format handling sources # Domain driver generic impl APIs @@ -1159,6 +1162,15 @@ libvirt_qemu_la_CFLAGS = $(AM_CFLAGS) libvirt_qemu_la_LIBADD = libvirt.la $(CYGWIN_EXTRA_LIBADD) EXTRA_DIST += $(LIBVIRT_QEMU_SYMBOL_FILE) + +lockdriverdir = $(libdir)/libvirt/lock-driver +lockdriver_LTLIBRARIES = sanlock.la + +sanlock_la_SOURCES = $(LOCK_DRIVER_SANLOCK_SOURCES) +sanlock_la_CFLAGS = $(AM_CLFAGS) +sanlock_la_LDFLAGS = -module -avoid-version +sanlock_la_LIBADD = -lsanlock + libexec_PROGRAMS = if WITH_LIBVIRTD diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index a3fe2f1..e61ea13 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -650,6 +650,7 @@ virVMOperationTypeToString; # memory.h virAlloc; virAllocN; +virAllocVar; virExpandN; virFree; virReallocN; diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c new file mode 100644 index 0000000..6a31fdf --- /dev/null +++ b/src/locking/lock_driver_sanlock.c @@ -0,0 +1,413 @@ +/* + * lock_driver_sanlock.c: A lock driver for Sanlock + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <config.h> + +#include <stdlib.h> +#include <stdint.h> +#include <unistd.h> +#include <string.h> +#include <stdio.h> +#include <errno.h> +#include <sys/types.h> + +#include <sanlock.h> +#include <sanlock_resource.h> + +#include "lock_driver.h" +#include "logging.h" +#include "virterror_internal.h" +#include "memory.h" +#include "util.h" +#include "files.h" + +#define VIR_FROM_THIS VIR_FROM_LOCKING + +#define virLockError(code, ...) \ + virReportErrorHelper(VIR_FROM_THIS, code, __FILE__, \ + __FUNCTION__, __LINE__, __VA_ARGS__) + +typedef struct _virLockManagerSanlockPrivate virLockManagerSanlockPrivate; +typedef virLockManagerSanlockPrivate *virLockManagerSanlockPrivatePtr; + +struct _virLockManagerSanlockPrivate { + char vm_name[SANLK_NAME_LEN]; + char vm_uuid[VIR_UUID_BUFLEN]; + unsigned int vm_id; + unsigned int vm_pid; + unsigned int flags; + bool hasRWDisks; + int res_count; + struct sanlk_resource *res_args[SANLK_MAX_RESOURCES]; +}; + +/* + * sanlock plugin for the libvirt virLockManager API + */ + +static int virLockManagerSanlockInit(unsigned int version ATTRIBUTE_UNUSED, + unsigned int flags) +{ + virCheckFlags(0, -1); + return 0; +} + +static int virLockManagerSanlockDeinit(void) +{ + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Unloading sanlock plugin is forbidden")); + return -1; +} + +static int virLockManagerSanlockNew(virLockManagerPtr lock, + unsigned int type, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags) +{ + virLockManagerParamPtr param; + virLockManagerSanlockPrivatePtr priv; + int i; + + virCheckFlags(0, -1); + + if (type != VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Unsupported object type %d"), type); + return -1; + } + + if (VIR_ALLOC(priv) < 0) { + virReportOOMError(); + return -1; + } + + priv->flags = flags; + + for (i = 0; i < nparams; i++) { + param = ¶ms[i]; + + if (STREQ(param->key, "uuid")) { + memcpy(priv->vm_uuid, param->value.uuid, 16); + } else if (STREQ(param->key, "name")) { + if (!virStrcpy(priv->vm_name, param->value.str, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Domain name '%s' exceeded %d characters"), + param->value.str, SANLK_NAME_LEN); + goto error; + } + } else if (STREQ(param->key, "pid")) { + priv->vm_pid = param->value.ui; + } else if (STREQ(param->key, "id")) { + priv->vm_id = param->value.ui; + } + } + + lock->privateData = priv; + return 0; + +error: + VIR_FREE(priv); + return -1; +} + +static void virLockManagerSanlockFree(virLockManagerPtr lock) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + int i; + + if (!priv) + return; + + for (i = 0; i < priv->res_count; i++) + VIR_FREE(priv->res_args[i]); + VIR_FREE(priv); + lock->privateData = NULL; +} + +static int virLockManagerSanlockAddResource(virLockManagerPtr lock, + unsigned int type, + const char *name, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + struct sanlk_resource *res; + int i; + + virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY | + VIR_LOCK_MANAGER_RESOURCE_SHARED, -1); + + if (priv->res_count == SANLK_MAX_RESOURCES) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Too many resources %d for object"), + SANLK_MAX_RESOURCES); + return -1; + } + + if (type == VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK) { + if (!(flags & (VIR_LOCK_MANAGER_RESOURCE_SHARED | + VIR_LOCK_MANAGER_RESOURCE_READONLY))) + priv->hasRWDisks = true; + return 0; + } + + if (type != VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE) + return 0; + + if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) { + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Readonly leases are not supported")); + return -1; + } + if (flags & VIR_LOCK_MANAGER_RESOURCE_SHARED) { + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Sharable leases are not supported")); + return -1; + } + + if (VIR_ALLOC_VAR(res, struct sanlk_disk, 1) < 0) { + virReportOOMError(); + return -1; + } + + res->num_disks = 1; + if (!virStrcpy(res->name, name, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Resource name '%s' exceeds %d characters"), + name, SANLK_NAME_LEN); + goto error; + } + + for (i = 0; i < nparams; i++) { + if (STREQ(params[i].key, "path")) { + if (!virStrcpy(res->disks[0].path, params[i].value.str, SANLK_PATH_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Lease path '%s' exceeds %d characters"), + params[i].value.str, SANLK_PATH_LEN); + goto error; + } + } else if (STREQ(params[i].key, "offset")) { + res->disks[0].offset = params[i].value.ul; + } else if (STREQ(params[i].key, "lockspace")) { + if (!virStrcpy(res->lockspace_name, params[i].value.str, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Resource lockspace '%s' exceeds %d characters"), + params[i].value.str, SANLK_NAME_LEN); + goto error; + } + } + } + + priv->res_args[priv->res_count] = res; + priv->res_count++; + return 0; + +error: + VIR_FREE(res); + return -1; +} + +static int virLockManagerSanlockAcquire(virLockManagerPtr lock, + const char *state, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + struct sanlk_options *opt; + struct sanlk_resource **res_args; + int res_count; + bool res_free = false; + int sock = -1; + int rv; + int i; + + virCheckFlags(VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY, -1); + + if (priv->res_count == 0 && + priv->hasRWDisks) { + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Read/write, exclusive access, disks were present, but no leases specified")); + return -1; + } + + if (VIR_ALLOC(opt) < 0) { + virReportOOMError(); + return -1; + } + + if (!virStrcpy(opt->owner_name, priv->vm_name, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Domain name '%s' exceeded %d characters"), + priv->vm_name, SANLK_NAME_LEN); + goto error; + } + + if (state && STRNEQ(state, "") && 0) { + if ((rv = sanlock_state_to_args((char *)state, + &res_count, + &res_args)) < 0) { + virReportSystemError(-rv, + _("Unable to parse lock state %s"), + state); + goto error; + } + res_free = true; + } else { + res_args = priv->res_args; + res_count = priv->res_count; + } + + VIR_DEBUG("Register sanlock %d", flags); + /* We only initialize 'sock' if we are in the real + * child process and we need it to be inherited + * + * If sock==-1, then sanlock auto-open/closes a + * temporary sock + */ + if (priv->vm_pid == getpid() && + (sock = sanlock_register()) < 0) { + virReportSystemError(-sock, "%s", + _("Failed to open socket to sanlock daemon")); + goto error; + } + + if (!(flags & VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY)) { + VIR_DEBUG("Acquiring object %u", priv->res_count); + if ((rv = sanlock_acquire(sock, priv->vm_pid, 0, + priv->res_count, priv->res_args, + opt)) < 0) { +#if 1 + virReportSystemError(-rv, "%s", + _("Failed to acquire lock")); +#else + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to acquire lock")); +#endif + goto error; + } + } + + VIR_FREE(opt); + + /* + * We are *intentionally* "leaking" sock file descriptor + * because we want it to be inherited by QEMU. When the + * sock FD finally closes upon QEMU exit (or crash) then + * sanlock will notice EOF and release the lock + */ + if (sock != -1 && + virSetInherit(sock, true) < 0) + goto error; + + VIR_DEBUG("Acquire completed fd=%d", sock); + + if (res_free) { + for (i = 0 ; i < res_count ; i++) { + VIR_FREE(res_args[i]); + } + VIR_FREE(res_args); + } + + return 0; + +error: + if (res_free) { + for (i = 0 ; i < res_count ; i++) { + VIR_FREE(res_args[i]); + } + VIR_FREE(res_args); + } + VIR_FREE(opt); + VIR_FORCE_CLOSE(sock); + return -1; +} + + +static int virLockManagerSanlockRelease(virLockManagerPtr lock, + char **state, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + int res_count; + int rv; + + virCheckFlags(0, -1); + + if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) { + virReportSystemError(-rv, "%s", + _("Failed to release lock")); + return -1; + } + + if (STREQ(*state, "")) + VIR_FREE(*state); + + if ((rv = sanlock_release(-1, priv->vm_pid, SANLK_REL_ALL, 0, NULL)) < 0) { + virReportSystemError(-rv, "%s", + _("Failed to release lock")); + return -1; + } + + return 0; +} + +static int virLockManagerSanlockInquire(virLockManagerPtr lock, + char **state, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + int rv, res_count; + + virCheckFlags(0, -1); + + VIR_DEBUG("pid=%d", priv->vm_pid); + + if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) { + virReportSystemError(-rv, "%s", + _("Failed to inquire lock")); + return -1; + } + + if (STREQ(*state, "")) + VIR_FREE(*state); + + return 0; +} + +virLockDriver virLockDriverImpl = +{ + .version = VIR_LOCK_MANAGER_VERSION, + + .flags = VIR_LOCK_MANAGER_USES_STATE, + + .drvInit = virLockManagerSanlockInit, + .drvDeinit = virLockManagerSanlockDeinit, + + .drvNew = virLockManagerSanlockNew, + .drvFree = virLockManagerSanlockFree, + + .drvAddResource = virLockManagerSanlockAddResource, + + .drvAcquire = virLockManagerSanlockAcquire, + .drvRelease = virLockManagerSanlockRelease, + .drvInquire = virLockManagerSanlockInquire, +}; -- 1.7.4.4 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list