On Thu, May 19, 2011 at 07:24:25AM -0400, Daniel P. Berrange wrote: > Sanlock is a project that implements a disk-paxos locking > algorithm. This is suitable for cluster deployments with > shared storage. > > * src/Makefile.am: Add dlopen plugin for sanlock > * src/locking/lock_driver_sanlock.c: Sanlock driver > --- > libvirt.spec.in | 11 + > po/POTFILES.in | 1 + > src/Makefile.am | 12 + > src/libvirt_private.syms | 1 + > src/locking/lock_driver_sanlock.c | 413 +++++++++++++++++++++++++++++++++++++ > 5 files changed, 438 insertions(+), 0 deletions(-) > create mode 100644 src/locking/lock_driver_sanlock.c > > diff --git a/libvirt.spec.in b/libvirt.spec.in > index e85f68f..73213ea 100644 > --- a/libvirt.spec.in > +++ b/libvirt.spec.in > @@ -77,6 +77,7 @@ > %define with_dtrace 0%{!?_without_dtrace:0} > %define with_cgconfig 0%{!?_without_cgconfig:0} > %define with_referential 0%{!?_without_referential:1} > +%define with_sanlock 0%{!?_without_sanlock:0} > > # Non-server/HV driver defaults which are always enabled > %define with_python 0%{!?_without_python:1} > @@ -180,6 +181,7 @@ > > %if 0%{?fedora} >= 13 || 0%{?rhel} >= 6 > %define with_dtrace 1 > +%define with_sanlock 1 > %endif > > # Pull in cgroups config system > @@ -435,6 +437,9 @@ BuildRequires: systemtap-sdt-devel > %if %{with_referential} > BuildRequires: referential-devel > %endif > +%if %{with_sanlock} > +BuildRequires: sanlock-devel > +%endif Hum ... weird [root@paphio ~]# yum install sanlock-devel .. No package sanlock-devel available. Error: Nothing to do [root@paphio ~]# cat /etc/fedora-release Fedora release 14 (Laughlin) [root@paphio ~]# are you sure about the dep ? > %if %{with_storage_fs} > # For mount/umount in FS driver > @@ -718,6 +723,8 @@ rm -f $RPM_BUILD_ROOT%{_libdir}/*.la > rm -f $RPM_BUILD_ROOT%{_libdir}/*.a > rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.la > rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.a > +rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.la > +rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.a > > %if %{with_network} > install -d -m 0755 $RPM_BUILD_ROOT%{_datadir}/lib/libvirt/dnsmasq/ > @@ -1004,6 +1011,10 @@ fi > %attr(0755, root, root) %{_libexecdir}/libvirt_lxc > %endif > > +%if %{with_sanlock} > +%attr(0755, root, root) %{_libdir}/libvirt/lock-driver/sanlock.so > +%endif > + > %attr(0755, root, root) %{_libexecdir}/libvirt_parthelper > %attr(0755, root, root) %{_libexecdir}/libvirt_iohelper > %attr(0755, root, root) %{_sbindir}/libvirtd > diff --git a/po/POTFILES.in b/po/POTFILES.in > index 9c3d287..c3b45f9 100644 > --- a/po/POTFILES.in > +++ b/po/POTFILES.in > @@ -31,6 +31,7 @@ src/fdstream.c > src/interface/netcf_driver.c > src/internal.h > src/libvirt.c > +src/locking/lock_driver_sanlock.c > src/locking/lock_manager.c > src/lxc/lxc_container.c > src/lxc/lxc_conf.c > diff --git a/src/Makefile.am b/src/Makefile.am > index 1e5a72e..edf017d 100644 > --- a/src/Makefile.am > +++ b/src/Makefile.am > @@ -99,6 +99,9 @@ DRIVER_SOURCES = \ > locking/lock_driver_nop.h locking/lock_driver_nop.c \ > locking/domain_lock.h locking/domain_lock.c > > +LOCK_DRIVER_SANLOCK_SOURCES = \ > + locking/lock_driver_sanlock.c > + > > # XML configuration format handling sources > # Domain driver generic impl APIs > @@ -1159,6 +1162,15 @@ libvirt_qemu_la_CFLAGS = $(AM_CFLAGS) > libvirt_qemu_la_LIBADD = libvirt.la $(CYGWIN_EXTRA_LIBADD) > EXTRA_DIST += $(LIBVIRT_QEMU_SYMBOL_FILE) > > + > +lockdriverdir = $(libdir)/libvirt/lock-driver > +lockdriver_LTLIBRARIES = sanlock.la > + > +sanlock_la_SOURCES = $(LOCK_DRIVER_SANLOCK_SOURCES) > +sanlock_la_CFLAGS = $(AM_CLFAGS) > +sanlock_la_LDFLAGS = -module -avoid-version > +sanlock_la_LIBADD = -lsanlock > + > libexec_PROGRAMS = > > if WITH_LIBVIRTD > diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms > index a3fe2f1..e61ea13 100644 > --- a/src/libvirt_private.syms > +++ b/src/libvirt_private.syms > @@ -650,6 +650,7 @@ virVMOperationTypeToString; > # memory.h > virAlloc; > virAllocN; > +virAllocVar; > virExpandN; > virFree; > virReallocN; > diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c > new file mode 100644 > index 0000000..6a31fdf > --- /dev/null > +++ b/src/locking/lock_driver_sanlock.c > @@ -0,0 +1,413 @@ > +/* > + * lock_driver_sanlock.c: A lock driver for Sanlock > + * > + * Copyright (C) 2010-2011 Red Hat, Inc. > + * > + * This library is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * This library is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with this library; if not, write to the Free Software > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * Author: Daniel P. Berrange <berrange@xxxxxxxxxx> > + */ > + > +#include <config.h> > + > +#include <stdlib.h> > +#include <stdint.h> > +#include <unistd.h> > +#include <string.h> > +#include <stdio.h> > +#include <errno.h> > +#include <sys/types.h> > + > +#include <sanlock.h> > +#include <sanlock_resource.h> > + > +#include "lock_driver.h" > +#include "logging.h" > +#include "virterror_internal.h" > +#include "memory.h" > +#include "util.h" > +#include "files.h" > + > +#define VIR_FROM_THIS VIR_FROM_LOCKING > + > +#define virLockError(code, ...) \ > + virReportErrorHelper(VIR_FROM_THIS, code, __FILE__, \ > + __FUNCTION__, __LINE__, __VA_ARGS__) > + > +typedef struct _virLockManagerSanlockPrivate virLockManagerSanlockPrivate; > +typedef virLockManagerSanlockPrivate *virLockManagerSanlockPrivatePtr; > + > +struct _virLockManagerSanlockPrivate { > + char vm_name[SANLK_NAME_LEN]; > + char vm_uuid[VIR_UUID_BUFLEN]; > + unsigned int vm_id; > + unsigned int vm_pid; > + unsigned int flags; > + bool hasRWDisks; > + int res_count; > + struct sanlk_resource *res_args[SANLK_MAX_RESOURCES]; > +}; > + > +/* > + * sanlock plugin for the libvirt virLockManager API > + */ > + > +static int virLockManagerSanlockInit(unsigned int version ATTRIBUTE_UNUSED, > + unsigned int flags) > +{ > + virCheckFlags(0, -1); > + return 0; > +} > + > +static int virLockManagerSanlockDeinit(void) > +{ > + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", > + _("Unloading sanlock plugin is forbidden")); > + return -1; > +} > + > +static int virLockManagerSanlockNew(virLockManagerPtr lock, > + unsigned int type, > + size_t nparams, > + virLockManagerParamPtr params, > + unsigned int flags) > +{ > + virLockManagerParamPtr param; > + virLockManagerSanlockPrivatePtr priv; > + int i; > + > + virCheckFlags(0, -1); > + > + if (type != VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN) { > + virLockError(VIR_ERR_INTERNAL_ERROR, > + _("Unsupported object type %d"), type); > + return -1; > + } > + > + if (VIR_ALLOC(priv) < 0) { > + virReportOOMError(); > + return -1; > + } > + > + priv->flags = flags; > + > + for (i = 0; i < nparams; i++) { > + param = ¶ms[i]; > + > + if (STREQ(param->key, "uuid")) { > + memcpy(priv->vm_uuid, param->value.uuid, 16); > + } else if (STREQ(param->key, "name")) { > + if (!virStrcpy(priv->vm_name, param->value.str, SANLK_NAME_LEN)) { > + virLockError(VIR_ERR_INTERNAL_ERROR, > + _("Domain name '%s' exceeded %d characters"), > + param->value.str, SANLK_NAME_LEN); > + goto error; > + } > + } else if (STREQ(param->key, "pid")) { > + priv->vm_pid = param->value.ui; > + } else if (STREQ(param->key, "id")) { > + priv->vm_id = param->value.ui; > + } > + } > + > + lock->privateData = priv; > + return 0; > + > +error: > + VIR_FREE(priv); > + return -1; > +} > + > +static void virLockManagerSanlockFree(virLockManagerPtr lock) > +{ > + virLockManagerSanlockPrivatePtr priv = lock->privateData; > + int i; > + > + if (!priv) > + return; > + > + for (i = 0; i < priv->res_count; i++) > + VIR_FREE(priv->res_args[i]); > + VIR_FREE(priv); > + lock->privateData = NULL; > +} > + > +static int virLockManagerSanlockAddResource(virLockManagerPtr lock, > + unsigned int type, > + const char *name, > + size_t nparams, > + virLockManagerParamPtr params, > + unsigned int flags) > +{ > + virLockManagerSanlockPrivatePtr priv = lock->privateData; > + struct sanlk_resource *res; > + int i; > + > + virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY | > + VIR_LOCK_MANAGER_RESOURCE_SHARED, -1); > + > + if (priv->res_count == SANLK_MAX_RESOURCES) { > + virLockError(VIR_ERR_INTERNAL_ERROR, > + _("Too many resources %d for object"), > + SANLK_MAX_RESOURCES); > + return -1; > + } > + > + if (type == VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK) { > + if (!(flags & (VIR_LOCK_MANAGER_RESOURCE_SHARED | > + VIR_LOCK_MANAGER_RESOURCE_READONLY))) > + priv->hasRWDisks = true; > + return 0; > + } > + > + if (type != VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE) > + return 0; > + > + if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) { > + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", > + _("Readonly leases are not supported")); > + return -1; > + } > + if (flags & VIR_LOCK_MANAGER_RESOURCE_SHARED) { > + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", > + _("Sharable leases are not supported")); > + return -1; > + } > + > + if (VIR_ALLOC_VAR(res, struct sanlk_disk, 1) < 0) { > + virReportOOMError(); > + return -1; > + } > + > + res->num_disks = 1; > + if (!virStrcpy(res->name, name, SANLK_NAME_LEN)) { > + virLockError(VIR_ERR_INTERNAL_ERROR, > + _("Resource name '%s' exceeds %d characters"), > + name, SANLK_NAME_LEN); > + goto error; > + } > + > + for (i = 0; i < nparams; i++) { > + if (STREQ(params[i].key, "path")) { > + if (!virStrcpy(res->disks[0].path, params[i].value.str, SANLK_PATH_LEN)) { > + virLockError(VIR_ERR_INTERNAL_ERROR, > + _("Lease path '%s' exceeds %d characters"), > + params[i].value.str, SANLK_PATH_LEN); > + goto error; > + } > + } else if (STREQ(params[i].key, "offset")) { > + res->disks[0].offset = params[i].value.ul; > + } else if (STREQ(params[i].key, "lockspace")) { > + if (!virStrcpy(res->lockspace_name, params[i].value.str, SANLK_NAME_LEN)) { > + virLockError(VIR_ERR_INTERNAL_ERROR, > + _("Resource lockspace '%s' exceeds %d characters"), > + params[i].value.str, SANLK_NAME_LEN); > + goto error; > + } > + } > + } > + > + priv->res_args[priv->res_count] = res; > + priv->res_count++; > + return 0; > + > +error: > + VIR_FREE(res); > + return -1; > +} > + > +static int virLockManagerSanlockAcquire(virLockManagerPtr lock, > + const char *state, > + unsigned int flags) > +{ > + virLockManagerSanlockPrivatePtr priv = lock->privateData; > + struct sanlk_options *opt; > + struct sanlk_resource **res_args; > + int res_count; > + bool res_free = false; > + int sock = -1; > + int rv; > + int i; > + > + virCheckFlags(VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY, -1); > + > + if (priv->res_count == 0 && > + priv->hasRWDisks) { > + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", > + _("Read/write, exclusive access, disks were present, but no leases specified")); > + return -1; > + } > + > + if (VIR_ALLOC(opt) < 0) { > + virReportOOMError(); > + return -1; > + } > + > + if (!virStrcpy(opt->owner_name, priv->vm_name, SANLK_NAME_LEN)) { > + virLockError(VIR_ERR_INTERNAL_ERROR, > + _("Domain name '%s' exceeded %d characters"), > + priv->vm_name, SANLK_NAME_LEN); > + goto error; > + } > + > + if (state && STRNEQ(state, "") && 0) { > + if ((rv = sanlock_state_to_args((char *)state, > + &res_count, > + &res_args)) < 0) { > + virReportSystemError(-rv, > + _("Unable to parse lock state %s"), > + state); > + goto error; > + } > + res_free = true; > + } else { > + res_args = priv->res_args; > + res_count = priv->res_count; > + } > + > + VIR_DEBUG("Register sanlock %d", flags); > + /* We only initialize 'sock' if we are in the real > + * child process and we need it to be inherited > + * > + * If sock==-1, then sanlock auto-open/closes a > + * temporary sock > + */ > + if (priv->vm_pid == getpid() && > + (sock = sanlock_register()) < 0) { > + virReportSystemError(-sock, "%s", > + _("Failed to open socket to sanlock daemon")); > + goto error; > + } > + > + if (!(flags & VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY)) { > + VIR_DEBUG("Acquiring object %u", priv->res_count); > + if ((rv = sanlock_acquire(sock, priv->vm_pid, 0, > + priv->res_count, priv->res_args, > + opt)) < 0) { Hum ... > +#if 1 > + virReportSystemError(-rv, "%s", > + _("Failed to acquire lock")); > +#else > + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", > + _("Failed to acquire lock")); > +#endif this probably is worth some kind of comment or cleaned up > + goto error; > + } > + } > + > + VIR_FREE(opt); > + > + /* > + * We are *intentionally* "leaking" sock file descriptor > + * because we want it to be inherited by QEMU. When the > + * sock FD finally closes upon QEMU exit (or crash) then > + * sanlock will notice EOF and release the lock > + */ > + if (sock != -1 && > + virSetInherit(sock, true) < 0) > + goto error; > + > + VIR_DEBUG("Acquire completed fd=%d", sock); > + > + if (res_free) { > + for (i = 0 ; i < res_count ; i++) { > + VIR_FREE(res_args[i]); > + } > + VIR_FREE(res_args); > + } > + > + return 0; > + > +error: > + if (res_free) { > + for (i = 0 ; i < res_count ; i++) { > + VIR_FREE(res_args[i]); > + } > + VIR_FREE(res_args); > + } > + VIR_FREE(opt); > + VIR_FORCE_CLOSE(sock); > + return -1; > +} > + > + > +static int virLockManagerSanlockRelease(virLockManagerPtr lock, > + char **state, > + unsigned int flags) > +{ > + virLockManagerSanlockPrivatePtr priv = lock->privateData; > + int res_count; > + int rv; > + > + virCheckFlags(0, -1); > + > + if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) { > + virReportSystemError(-rv, "%s", > + _("Failed to release lock")); > + return -1; > + } > + > + if (STREQ(*state, "")) > + VIR_FREE(*state); > + > + if ((rv = sanlock_release(-1, priv->vm_pid, SANLK_REL_ALL, 0, NULL)) < 0) { > + virReportSystemError(-rv, "%s", > + _("Failed to release lock")); > + return -1; > + } > + > + return 0; > +} > + > +static int virLockManagerSanlockInquire(virLockManagerPtr lock, > + char **state, > + unsigned int flags) > +{ > + virLockManagerSanlockPrivatePtr priv = lock->privateData; > + int rv, res_count; > + > + virCheckFlags(0, -1); > + > + VIR_DEBUG("pid=%d", priv->vm_pid); > + > + if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) { > + virReportSystemError(-rv, "%s", > + _("Failed to inquire lock")); > + return -1; > + } > + > + if (STREQ(*state, "")) > + VIR_FREE(*state); > + > + return 0; > +} > + > +virLockDriver virLockDriverImpl = > +{ > + .version = VIR_LOCK_MANAGER_VERSION, > + > + .flags = VIR_LOCK_MANAGER_USES_STATE, > + > + .drvInit = virLockManagerSanlockInit, > + .drvDeinit = virLockManagerSanlockDeinit, > + > + .drvNew = virLockManagerSanlockNew, > + .drvFree = virLockManagerSanlockFree, > + > + .drvAddResource = virLockManagerSanlockAddResource, > + > + .drvAcquire = virLockManagerSanlockAcquire, > + .drvRelease = virLockManagerSanlockRelease, > + .drvInquire = virLockManagerSanlockInquire, > +}; I'm a bit puzzled by the new dependancy, and this might prevent me from building rc1 of 0.9.2 if pushed as-is, but ACK in principle. Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@xxxxxxxxxxxx | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/ -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list