Sanlock is a project that implements a disk-paxos locking algorithm. This is suitable for cluster deployments with shared storage. * src/Makefile.am: Add dlopen plugin for sanlock * src/locking/lock_driver_sanlock.c: Sanlock driver * configure.ac: Check for sanlock * libvirt.spec.in: Add a libvirt-lock-sanlock RPM --- configure.ac | 55 +++++ libvirt.spec.in | 34 +++ po/POTFILES.in | 1 + src/Makefile.am | 16 ++ src/libvirt_private.syms | 1 + src/locking/lock_driver_sanlock.c | 434 +++++++++++++++++++++++++++++++++++++ 6 files changed, 541 insertions(+), 0 deletions(-) create mode 100644 src/locking/lock_driver_sanlock.c diff --git a/configure.ac b/configure.ac index 7cffbf2..9ef98ad 100644 --- a/configure.ac +++ b/configure.ac @@ -929,6 +929,56 @@ AC_SUBST([YAJL_CFLAGS]) AC_SUBST([YAJL_LIBS]) +dnl SANLOCK https://fedorahosted.org/sanlock/ +AC_ARG_WITH([sanlock], + AC_HELP_STRING([--with-sanlock], [use SANLOCK for lock management @<:@default=check@:>@]), + [], + [with_sanlock=check]) + +SANLOCK_CFLAGS= +SANLOCK_LIBS= +if test "x$with_sanlock" != "xno"; then + if test "x$with_sanlock" != "xyes" && test "x$with_sanlock" != "xcheck"; then + SANLOCK_CFLAGS="-I$with_sanlock/include" + SANLOCK_LIBS="-L$with_sanlock/lib" + fi + fail=0 + old_cppflags="$CPPFLAGS" + old_libs="$LIBS" + CPPFLAGS="$CPPFLAGS $SANLOCK_CFLAGS" + LIBS="$LIBS $SANLOCK_LIBS" + AC_CHECK_HEADER([sanlock.h],[],[ + if test "x$with_sanlock" = "xcheck" ; then + with_sanlock=no + else + fail=1 + fi]) + if test "x$with_sanlock" != "xno" ; then + AC_CHECK_LIB([sanlock], [sanlock_acquire],[ + SANLOCK_LIBS="$SANLOCK_LIBS -lsanlock" + with_sanlock=yes + ],[ + if test "x$with_sanlock" = "xcheck" ; then + with_sanlock=no + else + fail=1 + fi + ]) + fi + test $fail = 1 && + AC_MSG_ERROR([You must install the SANLOCK development package in order to compile libvirt]) + CPPFLAGS="$old_cppflags" + LIBS="$old_libs" + if test "x$with_sanlock" = "xyes" ; then + AC_DEFINE_UNQUOTED([HAVE_SANLOCK], 1, + [whether SANLOCK is available for JSON parsing/formatting]) + fi +fi +AM_CONDITIONAL([HAVE_SANLOCK], [test "x$with_sanlock" = "xyes"]) +AC_SUBST([SANLOCK_CFLAGS]) +AC_SUBST([SANLOCK_LIBS]) + + dnl PolicyKit library POLKIT_CFLAGS= POLKIT_LIBS= @@ -2440,6 +2490,11 @@ AC_MSG_NOTICE([ yajl: $YAJL_CFLAGS $YAJL_LIBS]) else AC_MSG_NOTICE([ yajl: no]) fi +if test "$with_sanlock" != "no" ; then +AC_MSG_NOTICE([ sanlock: $SANLOCK_CFLAGS $SANLOCK_LIBS]) +else +AC_MSG_NOTICE([ sanlock: no]) +fi if test "$with_avahi" = "yes" ; then AC_MSG_NOTICE([ avahi: $AVAHI_CFLAGS $AVAHI_LIBS]) else diff --git a/libvirt.spec.in b/libvirt.spec.in index c01b759..fc9659d 100644 --- a/libvirt.spec.in +++ b/libvirt.spec.in @@ -77,6 +77,7 @@ %define with_dtrace 0%{!?_without_dtrace:0} %define with_cgconfig 0%{!?_without_cgconfig:0} %define with_referential 0%{!?_without_referential:1} +%define with_sanlock 0%{!?_without_sanlock:0} # Non-server/HV driver defaults which are always enabled %define with_python 0%{!?_without_python:1} @@ -163,6 +164,11 @@ %define with_yajl 0%{!?_without_yajl:%{server_drivers}} %endif +# Enable sanlock library for lock management with QEMU +%if 0%{?fedora} >= 15 || 0%{?rhel} >= 6 +%define with_sanlock 0%{!?_without_sanlock:%{server_drivers}} +%endif + # Enable libpcap library %if %{with_qemu} %define with_nwfilter 0%{!?_without_nwfilter:%{server_drivers}} @@ -333,6 +339,9 @@ BuildRequires: libpciaccess-devel >= 0.10.9 %if %{with_yajl} BuildRequires: yajl-devel %endif +%if %{with_sanlock} +BuildRequires: sanlock-devel +%endif %if %{with_libpcap} BuildRequires: libpcap-devel %endif @@ -487,6 +496,18 @@ Requires: xen-devel Includes and documentations for the C library providing an API to use the virtualization capabilities of recent versions of Linux (and other OSes). +%if %{with_sanlock} +%package lock-sanlock +Summary: Sanlock lock manager plugin for QEMU driver +Group: Development/Libraries +Requires: sanlock +Requires: %{name} = %{version}-%{release} + +%description lock-sanlock +Includes the Sanlock lock manager plugin for the QEMU +driver +%endif + %if %{with_python} %package python Summary: Python bindings for the libvirt library @@ -635,6 +656,10 @@ libvirt reference counting %define _without_yajl --without-yajl %endif +%if ! %{with_sanlock} +%define _without_sanlock --without-sanlock +%endif + %if ! %{with_libpcap} %define _without_libpcap --without-libpcap %endif @@ -692,6 +717,7 @@ libvirt reference counting %{?_without_hal} \ %{?_without_udev} \ %{?_without_yajl} \ + %{?_without_sanlock} \ %{?_without_libpcap} \ %{?_without_macvtap} \ %{?_without_audit} \ @@ -718,6 +744,8 @@ rm -f $RPM_BUILD_ROOT%{_libdir}/*.la rm -f $RPM_BUILD_ROOT%{_libdir}/*.a rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.la rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.a +rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.la +rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.a %if %{with_network} install -d -m 0755 $RPM_BUILD_ROOT%{_datadir}/lib/libvirt/dnsmasq/ @@ -1013,6 +1041,12 @@ fi %doc docs/*.xml %endif +%if %{with_sanlock} +%files lock-sanlock +%defattr(-, root, root) +%attr(0755, root, root) %{_libdir}/libvirt/lock-driver/sanlock.so +%endif + %files client -f %{name}.lang %defattr(-, root, root) %doc AUTHORS ChangeLog.gz NEWS README COPYING.LIB TODO diff --git a/po/POTFILES.in b/po/POTFILES.in index 9c3d287..c3b45f9 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -31,6 +31,7 @@ src/fdstream.c src/interface/netcf_driver.c src/internal.h src/libvirt.c +src/locking/lock_driver_sanlock.c src/locking/lock_manager.c src/lxc/lxc_container.c src/lxc/lxc_conf.c diff --git a/src/Makefile.am b/src/Makefile.am index 1e5a72e..15d6ee7 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -99,6 +99,9 @@ DRIVER_SOURCES = \ locking/lock_driver_nop.h locking/lock_driver_nop.c \ locking/domain_lock.h locking/domain_lock.c +LOCK_DRIVER_SANLOCK_SOURCES = \ + locking/lock_driver_sanlock.c + # XML configuration format handling sources # Domain driver generic impl APIs @@ -1159,6 +1162,19 @@ libvirt_qemu_la_CFLAGS = $(AM_CFLAGS) libvirt_qemu_la_LIBADD = libvirt.la $(CYGWIN_EXTRA_LIBADD) EXTRA_DIST += $(LIBVIRT_QEMU_SYMBOL_FILE) + +if HAVE_SANLOCK +lockdriverdir = $(libdir)/libvirt/lock-driver +lockdriver_LTLIBRARIES = sanlock.la + +sanlock_la_SOURCES = $(LOCK_DRIVER_SANLOCK_SOURCES) +sanlock_la_CFLAGS = $(AM_CLFAGS) +sanlock_la_LDFLAGS = -module -avoid-version +sanlock_la_LIBADD = -lsanlock +else +EXTRA_DIST += $(LOCK_DRIVER_SANLOCK_SOURCES) +endif + libexec_PROGRAMS = if WITH_LIBVIRTD diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 780b090..136f887 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -652,6 +652,7 @@ virVMOperationTypeToString; # memory.h virAlloc; virAllocN; +virAllocVar; virExpandN; virFree; virReallocN; diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c new file mode 100644 index 0000000..7e0610d --- /dev/null +++ b/src/locking/lock_driver_sanlock.c @@ -0,0 +1,434 @@ +/* + * lock_driver_sanlock.c: A lock driver for Sanlock + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <config.h> + +#include <stdlib.h> +#include <stdint.h> +#include <unistd.h> +#include <string.h> +#include <stdio.h> +#include <errno.h> +#include <sys/types.h> + +#include <sanlock.h> +#include <sanlock_resource.h> + +#include "lock_driver.h" +#include "logging.h" +#include "virterror_internal.h" +#include "memory.h" +#include "util.h" +#include "files.h" + +#define VIR_FROM_THIS VIR_FROM_LOCKING + +#define virLockError(code, ...) \ + virReportErrorHelper(VIR_FROM_THIS, code, __FILE__, \ + __FUNCTION__, __LINE__, __VA_ARGS__) + +typedef struct _virLockManagerSanlockPrivate virLockManagerSanlockPrivate; +typedef virLockManagerSanlockPrivate *virLockManagerSanlockPrivatePtr; + +struct _virLockManagerSanlockPrivate { + char vm_name[SANLK_NAME_LEN]; + char vm_uuid[VIR_UUID_BUFLEN]; + unsigned int vm_id; + unsigned int vm_pid; + unsigned int flags; + bool hasRWDisks; + int res_count; + struct sanlk_resource *res_args[SANLK_MAX_RESOURCES]; +}; + +/* + * sanlock plugin for the libvirt virLockManager API + */ + +static int virLockManagerSanlockInit(unsigned int version ATTRIBUTE_UNUSED, + unsigned int flags) +{ + virCheckFlags(0, -1); + return 0; +} + +static int virLockManagerSanlockDeinit(void) +{ + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Unloading sanlock plugin is forbidden")); + return -1; +} + +static int virLockManagerSanlockNew(virLockManagerPtr lock, + unsigned int type, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags) +{ + virLockManagerParamPtr param; + virLockManagerSanlockPrivatePtr priv; + int i; + + virCheckFlags(0, -1); + + if (type != VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Unsupported object type %d"), type); + return -1; + } + + if (VIR_ALLOC(priv) < 0) { + virReportOOMError(); + return -1; + } + + priv->flags = flags; + + for (i = 0; i < nparams; i++) { + param = ¶ms[i]; + + if (STREQ(param->key, "uuid")) { + memcpy(priv->vm_uuid, param->value.uuid, 16); + } else if (STREQ(param->key, "name")) { + if (!virStrcpy(priv->vm_name, param->value.str, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Domain name '%s' exceeded %d characters"), + param->value.str, SANLK_NAME_LEN); + goto error; + } + } else if (STREQ(param->key, "pid")) { + priv->vm_pid = param->value.ui; + } else if (STREQ(param->key, "id")) { + priv->vm_id = param->value.ui; + } + } + + lock->privateData = priv; + return 0; + +error: + VIR_FREE(priv); + return -1; +} + +static void virLockManagerSanlockFree(virLockManagerPtr lock) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + int i; + + if (!priv) + return; + + for (i = 0; i < priv->res_count; i++) + VIR_FREE(priv->res_args[i]); + VIR_FREE(priv); + lock->privateData = NULL; +} + +static int virLockManagerSanlockAddResource(virLockManagerPtr lock, + unsigned int type, + const char *name, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + struct sanlk_resource *res; + int i; + + virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY | + VIR_LOCK_MANAGER_RESOURCE_SHARED, -1); + + if (priv->res_count == SANLK_MAX_RESOURCES) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Too many resources %d for object"), + SANLK_MAX_RESOURCES); + return -1; + } + + if (type == VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK) { + if (!(flags & (VIR_LOCK_MANAGER_RESOURCE_SHARED | + VIR_LOCK_MANAGER_RESOURCE_READONLY))) + priv->hasRWDisks = true; + return 0; + } + + if (type != VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE) + return 0; + + if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) { + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Readonly leases are not supported")); + return -1; + } + if (flags & VIR_LOCK_MANAGER_RESOURCE_SHARED) { + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Sharable leases are not supported")); + return -1; + } + + if (VIR_ALLOC_VAR(res, struct sanlk_disk, 1) < 0) { + virReportOOMError(); + return -1; + } + + res->num_disks = 1; + if (!virStrcpy(res->name, name, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Resource name '%s' exceeds %d characters"), + name, SANLK_NAME_LEN); + goto error; + } + + for (i = 0; i < nparams; i++) { + if (STREQ(params[i].key, "path")) { + if (!virStrcpy(res->disks[0].path, params[i].value.str, SANLK_PATH_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Lease path '%s' exceeds %d characters"), + params[i].value.str, SANLK_PATH_LEN); + goto error; + } + } else if (STREQ(params[i].key, "offset")) { + res->disks[0].offset = params[i].value.ul; + } else if (STREQ(params[i].key, "lockspace")) { + if (!virStrcpy(res->lockspace_name, params[i].value.str, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Resource lockspace '%s' exceeds %d characters"), + params[i].value.str, SANLK_NAME_LEN); + goto error; + } + } + } + + priv->res_args[priv->res_count] = res; + priv->res_count++; + return 0; + +error: + VIR_FREE(res); + return -1; +} + +static int virLockManagerSanlockAcquire(virLockManagerPtr lock, + const char *state, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + struct sanlk_options *opt; + struct sanlk_resource **res_args; + int res_count; + bool res_free = false; + int sock = -1; + int rv; + int i; + + virCheckFlags(VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY, -1); + + if (priv->res_count == 0 && + priv->hasRWDisks) { + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Read/write, exclusive access, disks were present, but no leases specified")); + return -1; + } + + if (VIR_ALLOC(opt) < 0) { + virReportOOMError(); + return -1; + } + + if (!virStrcpy(opt->owner_name, priv->vm_name, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Domain name '%s' exceeded %d characters"), + priv->vm_name, SANLK_NAME_LEN); + goto error; + } + + if (state && STRNEQ(state, "") && 0) { + if ((rv = sanlock_state_to_args((char *)state, + &res_count, + &res_args)) < 0) { + if (rv <= -200) + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Unable to parse lock state %s: error %d"), + state, rv); + else + virReportSystemError(-rv, + _("Unable to parse lock state %s"), + state); + goto error; + } + res_free = true; + } else { + res_args = priv->res_args; + res_count = priv->res_count; + } + + VIR_DEBUG("Register sanlock %d", flags); + /* We only initialize 'sock' if we are in the real + * child process and we need it to be inherited + * + * If sock==-1, then sanlock auto-open/closes a + * temporary sock + */ + if (priv->vm_pid == getpid() && + (sock = sanlock_register()) < 0) { + if (sock <= -200) + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Failed to open socket to sanlock daemon: error %d"), + sock); + else + virReportSystemError(-sock, "%s", + _("Failed to open socket to sanlock daemon")); + goto error; + } + + if (!(flags & VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY)) { + VIR_DEBUG("Acquiring object %u", priv->res_count); + if ((rv = sanlock_acquire(sock, priv->vm_pid, 0, + priv->res_count, priv->res_args, + opt)) < 0) { + if (rv <= -200) + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Failed to acquire lock: error %d"), rv); + else + virReportSystemError(-rv, "%s", + _("Failed to acquire lock")); + goto error; + } + } + + VIR_FREE(opt); + + /* + * We are *intentionally* "leaking" sock file descriptor + * because we want it to be inherited by QEMU. When the + * sock FD finally closes upon QEMU exit (or crash) then + * sanlock will notice EOF and release the lock + */ + if (sock != -1 && + virSetInherit(sock, true) < 0) + goto error; + + VIR_DEBUG("Acquire completed fd=%d", sock); + + if (res_free) { + for (i = 0 ; i < res_count ; i++) { + VIR_FREE(res_args[i]); + } + VIR_FREE(res_args); + } + + return 0; + +error: + if (res_free) { + for (i = 0 ; i < res_count ; i++) { + VIR_FREE(res_args[i]); + } + VIR_FREE(res_args); + } + VIR_FREE(opt); + VIR_FORCE_CLOSE(sock); + return -1; +} + + +static int virLockManagerSanlockRelease(virLockManagerPtr lock, + char **state, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + int res_count; + int rv; + + virCheckFlags(0, -1); + + if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) { + if (rv <= -200) + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Failed to inquire lock: error %d"), rv); + else + virReportSystemError(-rv, "%s", + _("Failed to inquire lock")); + return -1; + } + + if (STREQ(*state, "")) + VIR_FREE(*state); + + if ((rv = sanlock_release(-1, priv->vm_pid, SANLK_REL_ALL, 0, NULL)) < 0) { + if (rv <= -200) + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Failed to release lock: error %d"), rv); + else + virReportSystemError(-rv, "%s", + _("Failed to release lock")); + return -1; + } + + return 0; +} + +static int virLockManagerSanlockInquire(virLockManagerPtr lock, + char **state, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + int rv, res_count; + + virCheckFlags(0, -1); + + VIR_DEBUG("pid=%d", priv->vm_pid); + + if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) { + if (rv <= -200) + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Failed to inquire lock: error %d"), rv); + else + virReportSystemError(-rv, "%s", + _("Failed to inquire lock")); + return -1; + } + + if (STREQ(*state, "")) + VIR_FREE(*state); + + return 0; +} + +virLockDriver virLockDriverImpl = +{ + .version = VIR_LOCK_MANAGER_VERSION, + + .flags = VIR_LOCK_MANAGER_USES_STATE, + + .drvInit = virLockManagerSanlockInit, + .drvDeinit = virLockManagerSanlockDeinit, + + .drvNew = virLockManagerSanlockNew, + .drvFree = virLockManagerSanlockFree, + + .drvAddResource = virLockManagerSanlockAddResource, + + .drvAcquire = virLockManagerSanlockAcquire, + .drvRelease = virLockManagerSanlockRelease, + .drvInquire = virLockManagerSanlockInquire, +}; -- 1.7.4.4 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list