On Fri, May 27, 2011 at 05:37:51PM +0800, Daniel Veillard wrote: > On Thu, May 19, 2011 at 07:24:25AM -0400, Daniel P. Berrange wrote: > > Sanlock is a project that implements a disk-paxos locking > > algorithm. This is suitable for cluster deployments with > > shared storage. > > > > * src/Makefile.am: Add dlopen plugin for sanlock > > * src/locking/lock_driver_sanlock.c: Sanlock driver > > --- > > libvirt.spec.in | 11 + > > po/POTFILES.in | 1 + > > src/Makefile.am | 12 + > > src/libvirt_private.syms | 1 + > > src/locking/lock_driver_sanlock.c | 413 +++++++++++++++++++++++++++++++++++++ > > 5 files changed, 438 insertions(+), 0 deletions(-) > > create mode 100644 src/locking/lock_driver_sanlock.c > > > > diff --git a/libvirt.spec.in b/libvirt.spec.in > > index e85f68f..73213ea 100644 > > --- a/libvirt.spec.in > > +++ b/libvirt.spec.in > > @@ -77,6 +77,7 @@ > > %define with_dtrace 0%{!?_without_dtrace:0} > > %define with_cgconfig 0%{!?_without_cgconfig:0} > > %define with_referential 0%{!?_without_referential:1} > > +%define with_sanlock 0%{!?_without_sanlock:0} > > > > # Non-server/HV driver defaults which are always enabled > > %define with_python 0%{!?_without_python:1} > > @@ -180,6 +181,7 @@ > > > > %if 0%{?fedora} >= 13 || 0%{?rhel} >= 6 > > %define with_dtrace 1 > > +%define with_sanlock 1 > > %endif > > > > # Pull in cgroups config system > > @@ -435,6 +437,9 @@ BuildRequires: systemtap-sdt-devel > > %if %{with_referential} > > BuildRequires: referential-devel > > %endif > > +%if %{with_sanlock} > > +BuildRequires: sanlock-devel > > +%endif > > Hum ... weird > > [root@paphio ~]# yum install sanlock-devel > .. > No package sanlock-devel available. > Error: Nothing to do > [root@paphio ~]# cat /etc/fedora-release > Fedora release 14 (Laughlin) > [root@paphio ~]# > > are you sure about the dep ? > > > %if %{with_storage_fs} > > # For mount/umount in FS driver > > @@ -718,6 +723,8 @@ rm -f $RPM_BUILD_ROOT%{_libdir}/*.la > > rm -f $RPM_BUILD_ROOT%{_libdir}/*.a > > rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.la > > rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.a > > +rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.la > > +rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.a > > > > %if %{with_network} > > install -d -m 0755 $RPM_BUILD_ROOT%{_datadir}/lib/libvirt/dnsmasq/ > > @@ -1004,6 +1011,10 @@ fi > > %attr(0755, root, root) %{_libexecdir}/libvirt_lxc > > %endif > > > > +%if %{with_sanlock} > > +%attr(0755, root, root) %{_libdir}/libvirt/lock-driver/sanlock.so > > +%endif > > + > > %attr(0755, root, root) %{_libexecdir}/libvirt_parthelper > > %attr(0755, root, root) %{_libexecdir}/libvirt_iohelper > > %attr(0755, root, root) %{_sbindir}/libvirtd > > diff --git a/po/POTFILES.in b/po/POTFILES.in > > index 9c3d287..c3b45f9 100644 > > --- a/po/POTFILES.in > > +++ b/po/POTFILES.in > > @@ -31,6 +31,7 @@ src/fdstream.c > > src/interface/netcf_driver.c > > src/internal.h > > src/libvirt.c > > +src/locking/lock_driver_sanlock.c > > src/locking/lock_manager.c > > src/lxc/lxc_container.c > > src/lxc/lxc_conf.c > > diff --git a/src/Makefile.am b/src/Makefile.am > > index 1e5a72e..edf017d 100644 > > --- a/src/Makefile.am > > +++ b/src/Makefile.am > > @@ -99,6 +99,9 @@ DRIVER_SOURCES = \ > > locking/lock_driver_nop.h locking/lock_driver_nop.c \ > > locking/domain_lock.h locking/domain_lock.c > > > > +LOCK_DRIVER_SANLOCK_SOURCES = \ > > + locking/lock_driver_sanlock.c > > + > > > > # XML configuration format handling sources > > # Domain driver generic impl APIs > > @@ -1159,6 +1162,15 @@ libvirt_qemu_la_CFLAGS = $(AM_CFLAGS) > > libvirt_qemu_la_LIBADD = libvirt.la $(CYGWIN_EXTRA_LIBADD) > > EXTRA_DIST += $(LIBVIRT_QEMU_SYMBOL_FILE) > > > > + > > +lockdriverdir = $(libdir)/libvirt/lock-driver > > +lockdriver_LTLIBRARIES = sanlock.la > > + > > +sanlock_la_SOURCES = $(LOCK_DRIVER_SANLOCK_SOURCES) > > +sanlock_la_CFLAGS = $(AM_CLFAGS) > > +sanlock_la_LDFLAGS = -module -avoid-version > > +sanlock_la_LIBADD = -lsanlock > > + > > libexec_PROGRAMS = > > > > if WITH_LIBVIRTD > > diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms > > index a3fe2f1..e61ea13 100644 > > --- a/src/libvirt_private.syms > > +++ b/src/libvirt_private.syms > > @@ -650,6 +650,7 @@ virVMOperationTypeToString; > > # memory.h > > virAlloc; > > virAllocN; > > +virAllocVar; > > virExpandN; > > virFree; > > virReallocN; > > diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c > > new file mode 100644 > > index 0000000..6a31fdf > > --- /dev/null > > +++ b/src/locking/lock_driver_sanlock.c > > @@ -0,0 +1,413 @@ > > +/* > > + * lock_driver_sanlock.c: A lock driver for Sanlock > > + * > > + * Copyright (C) 2010-2011 Red Hat, Inc. > > + * > > + * This library is free software; you can redistribute it and/or > > + * modify it under the terms of the GNU Lesser General Public > > + * License as published by the Free Software Foundation; either > > + * version 2.1 of the License, or (at your option) any later version. > > + * > > + * This library is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + * Lesser General Public License for more details. > > + * > > + * You should have received a copy of the GNU Lesser General Public > > + * License along with this library; if not, write to the Free Software > > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > > > * Author: Daniel P. Berrange <berrange@xxxxxxxxxx> > > > + */ > > + > > +#include <config.h> > > + > > +#include <stdlib.h> > > +#include <stdint.h> > > +#include <unistd.h> > > +#include <string.h> > > +#include <stdio.h> > > +#include <errno.h> > > +#include <sys/types.h> > > + > > +#include <sanlock.h> > > +#include <sanlock_resource.h> > > + > > +#include "lock_driver.h" > > +#include "logging.h" > > +#include "virterror_internal.h" > > +#include "memory.h" > > +#include "util.h" > > +#include "files.h" > > + > > +#define VIR_FROM_THIS VIR_FROM_LOCKING > > + > > +#define virLockError(code, ...) \ > > + virReportErrorHelper(VIR_FROM_THIS, code, __FILE__, \ > > + __FUNCTION__, __LINE__, __VA_ARGS__) > > + > > +typedef struct _virLockManagerSanlockPrivate virLockManagerSanlockPrivate; > > +typedef virLockManagerSanlockPrivate *virLockManagerSanlockPrivatePtr; > > + > > +struct _virLockManagerSanlockPrivate { > > + char vm_name[SANLK_NAME_LEN]; > > + char vm_uuid[VIR_UUID_BUFLEN]; > > + unsigned int vm_id; > > + unsigned int vm_pid; > > + unsigned int flags; > > + bool hasRWDisks; > > + int res_count; > > + struct sanlk_resource *res_args[SANLK_MAX_RESOURCES]; > > +}; > > + > > +/* > > + * sanlock plugin for the libvirt virLockManager API > > + */ > > + > > +static int virLockManagerSanlockInit(unsigned int version ATTRIBUTE_UNUSED, > > + unsigned int flags) > > +{ > > + virCheckFlags(0, -1); > > + return 0; > > +} > > + > > +static int virLockManagerSanlockDeinit(void) > > +{ > > + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", > > + _("Unloading sanlock plugin is forbidden")); > > + return -1; > > +} > > + > > +static int virLockManagerSanlockNew(virLockManagerPtr lock, > > + unsigned int type, > > + size_t nparams, > > + virLockManagerParamPtr params, > > + unsigned int flags) > > +{ > > + virLockManagerParamPtr param; > > + virLockManagerSanlockPrivatePtr priv; > > + int i; > > + > > + virCheckFlags(0, -1); > > + > > + if (type != VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN) { > > + virLockError(VIR_ERR_INTERNAL_ERROR, > > + _("Unsupported object type %d"), type); > > + return -1; > > + } > > + > > + if (VIR_ALLOC(priv) < 0) { > > + virReportOOMError(); > > + return -1; > > + } > > + > > + priv->flags = flags; > > + > > + for (i = 0; i < nparams; i++) { > > + param = ¶ms[i]; > > + > > + if (STREQ(param->key, "uuid")) { > > + memcpy(priv->vm_uuid, param->value.uuid, 16); > > + } else if (STREQ(param->key, "name")) { > > + if (!virStrcpy(priv->vm_name, param->value.str, SANLK_NAME_LEN)) { > > + virLockError(VIR_ERR_INTERNAL_ERROR, > > + _("Domain name '%s' exceeded %d characters"), > > + param->value.str, SANLK_NAME_LEN); > > + goto error; > > + } > > + } else if (STREQ(param->key, "pid")) { > > + priv->vm_pid = param->value.ui; > > + } else if (STREQ(param->key, "id")) { > > + priv->vm_id = param->value.ui; > > + } > > + } > > + > > + lock->privateData = priv; > > + return 0; > > + > > +error: > > + VIR_FREE(priv); > > + return -1; > > +} > > + > > +static void virLockManagerSanlockFree(virLockManagerPtr lock) > > +{ > > + virLockManagerSanlockPrivatePtr priv = lock->privateData; > > + int i; > > + > > + if (!priv) > > + return; > > + > > + for (i = 0; i < priv->res_count; i++) > > + VIR_FREE(priv->res_args[i]); > > + VIR_FREE(priv); > > + lock->privateData = NULL; > > +} > > + > > +static int virLockManagerSanlockAddResource(virLockManagerPtr lock, > > + unsigned int type, > > + const char *name, > > + size_t nparams, > > + virLockManagerParamPtr params, > > + unsigned int flags) > > +{ > > + virLockManagerSanlockPrivatePtr priv = lock->privateData; > > + struct sanlk_resource *res; > > + int i; > > + > > + virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY | > > + VIR_LOCK_MANAGER_RESOURCE_SHARED, -1); > > + > > + if (priv->res_count == SANLK_MAX_RESOURCES) { > > + virLockError(VIR_ERR_INTERNAL_ERROR, > > + _("Too many resources %d for object"), > > + SANLK_MAX_RESOURCES); > > + return -1; > > + } > > + > > + if (type == VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK) { > > + if (!(flags & (VIR_LOCK_MANAGER_RESOURCE_SHARED | > > + VIR_LOCK_MANAGER_RESOURCE_READONLY))) > > + priv->hasRWDisks = true; > > + return 0; > > + } > > + > > + if (type != VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE) > > + return 0; > > + > > + if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) { > > + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", > > + _("Readonly leases are not supported")); > > + return -1; > > + } > > + if (flags & VIR_LOCK_MANAGER_RESOURCE_SHARED) { > > + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", > > + _("Sharable leases are not supported")); > > + return -1; > > + } > > + > > + if (VIR_ALLOC_VAR(res, struct sanlk_disk, 1) < 0) { > > + virReportOOMError(); > > + return -1; > > + } > > + > > + res->num_disks = 1; > > + if (!virStrcpy(res->name, name, SANLK_NAME_LEN)) { > > + virLockError(VIR_ERR_INTERNAL_ERROR, > > + _("Resource name '%s' exceeds %d characters"), > > + name, SANLK_NAME_LEN); > > + goto error; > > + } > > + > > + for (i = 0; i < nparams; i++) { > > + if (STREQ(params[i].key, "path")) { > > + if (!virStrcpy(res->disks[0].path, params[i].value.str, SANLK_PATH_LEN)) { > > + virLockError(VIR_ERR_INTERNAL_ERROR, > > + _("Lease path '%s' exceeds %d characters"), > > + params[i].value.str, SANLK_PATH_LEN); > > + goto error; > > + } > > + } else if (STREQ(params[i].key, "offset")) { > > + res->disks[0].offset = params[i].value.ul; > > + } else if (STREQ(params[i].key, "lockspace")) { > > + if (!virStrcpy(res->lockspace_name, params[i].value.str, SANLK_NAME_LEN)) { > > + virLockError(VIR_ERR_INTERNAL_ERROR, > > + _("Resource lockspace '%s' exceeds %d characters"), > > + params[i].value.str, SANLK_NAME_LEN); > > + goto error; > > + } > > + } > > + } > > + > > + priv->res_args[priv->res_count] = res; > > + priv->res_count++; > > + return 0; > > + > > +error: > > + VIR_FREE(res); > > + return -1; > > +} > > + > > +static int virLockManagerSanlockAcquire(virLockManagerPtr lock, > > + const char *state, > > + unsigned int flags) > > +{ > > + virLockManagerSanlockPrivatePtr priv = lock->privateData; > > + struct sanlk_options *opt; > > + struct sanlk_resource **res_args; > > + int res_count; > > + bool res_free = false; > > + int sock = -1; > > + int rv; > > + int i; > > + > > + virCheckFlags(VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY, -1); > > + > > + if (priv->res_count == 0 && > > + priv->hasRWDisks) { > > + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", > > + _("Read/write, exclusive access, disks were present, but no leases specified")); > > + return -1; > > + } > > + > > + if (VIR_ALLOC(opt) < 0) { > > + virReportOOMError(); > > + return -1; > > + } > > + > > + if (!virStrcpy(opt->owner_name, priv->vm_name, SANLK_NAME_LEN)) { > > + virLockError(VIR_ERR_INTERNAL_ERROR, > > + _("Domain name '%s' exceeded %d characters"), > > + priv->vm_name, SANLK_NAME_LEN); > > + goto error; > > + } > > + > > + if (state && STRNEQ(state, "") && 0) { > > + if ((rv = sanlock_state_to_args((char *)state, > > + &res_count, > > + &res_args)) < 0) { > > + virReportSystemError(-rv, > > + _("Unable to parse lock state %s"), > > + state); > > + goto error; > > + } > > + res_free = true; > > + } else { > > + res_args = priv->res_args; > > + res_count = priv->res_count; > > + } > > + > > + VIR_DEBUG("Register sanlock %d", flags); > > + /* We only initialize 'sock' if we are in the real > > + * child process and we need it to be inherited > > + * > > + * If sock==-1, then sanlock auto-open/closes a > > + * temporary sock > > + */ > > + if (priv->vm_pid == getpid() && > > + (sock = sanlock_register()) < 0) { > > + virReportSystemError(-sock, "%s", > > + _("Failed to open socket to sanlock daemon")); > > + goto error; > > + } > > + > > + if (!(flags & VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY)) { > > + VIR_DEBUG("Acquiring object %u", priv->res_count); > > + if ((rv = sanlock_acquire(sock, priv->vm_pid, 0, > > + priv->res_count, priv->res_args, > > + opt)) < 0) { > > Hum ... > > > +#if 1 > > + virReportSystemError(-rv, "%s", > > + _("Failed to acquire lock")); > > +#else > > + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", > > + _("Failed to acquire lock")); > > +#endif > > this probably is worth some kind of comment or cleaned up > > > + goto error; > > + } > > + } > > + > > + VIR_FREE(opt); > > + > > + /* > > + * We are *intentionally* "leaking" sock file descriptor > > + * because we want it to be inherited by QEMU. When the > > + * sock FD finally closes upon QEMU exit (or crash) then > > + * sanlock will notice EOF and release the lock > > + */ > > + if (sock != -1 && > > + virSetInherit(sock, true) < 0) > > + goto error; > > + > > + VIR_DEBUG("Acquire completed fd=%d", sock); > > + > > + if (res_free) { > > + for (i = 0 ; i < res_count ; i++) { > > + VIR_FREE(res_args[i]); > > + } > > + VIR_FREE(res_args); > > + } > > + > > + return 0; > > + > > +error: > > + if (res_free) { > > + for (i = 0 ; i < res_count ; i++) { > > + VIR_FREE(res_args[i]); > > + } > > + VIR_FREE(res_args); > > + } > > + VIR_FREE(opt); > > + VIR_FORCE_CLOSE(sock); > > + return -1; > > +} > > + > > + > > +static int virLockManagerSanlockRelease(virLockManagerPtr lock, > > + char **state, > > + unsigned int flags) > > +{ > > + virLockManagerSanlockPrivatePtr priv = lock->privateData; > > + int res_count; > > + int rv; > > + > > + virCheckFlags(0, -1); > > + > > + if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) { > > + virReportSystemError(-rv, "%s", > > + _("Failed to release lock")); > > + return -1; > > + } > > + > > + if (STREQ(*state, "")) > > + VIR_FREE(*state); > > + > > + if ((rv = sanlock_release(-1, priv->vm_pid, SANLK_REL_ALL, 0, NULL)) < 0) { > > + virReportSystemError(-rv, "%s", > > + _("Failed to release lock")); > > + return -1; > > + } > > + > > + return 0; > > +} > > + > > +static int virLockManagerSanlockInquire(virLockManagerPtr lock, > > + char **state, > > + unsigned int flags) > > +{ > > + virLockManagerSanlockPrivatePtr priv = lock->privateData; > > + int rv, res_count; > > + > > + virCheckFlags(0, -1); > > + > > + VIR_DEBUG("pid=%d", priv->vm_pid); > > + > > + if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) { > > + virReportSystemError(-rv, "%s", > > + _("Failed to inquire lock")); > > + return -1; > > + } > > + > > + if (STREQ(*state, "")) > > + VIR_FREE(*state); > > + > > + return 0; > > +} > > + > > +virLockDriver virLockDriverImpl = > > +{ > > + .version = VIR_LOCK_MANAGER_VERSION, > > + > > + .flags = VIR_LOCK_MANAGER_USES_STATE, > > + > > + .drvInit = virLockManagerSanlockInit, > > + .drvDeinit = virLockManagerSanlockDeinit, > > + > > + .drvNew = virLockManagerSanlockNew, > > + .drvFree = virLockManagerSanlockFree, > > + > > + .drvAddResource = virLockManagerSanlockAddResource, > > + > > + .drvAcquire = virLockManagerSanlockAcquire, > > + .drvRelease = virLockManagerSanlockRelease, > > + .drvInquire = virLockManagerSanlockInquire, > > +}; > > I'm a bit puzzled by the new dependancy, and this might prevent me > from building rc1 of 0.9.2 if pushed as-is, > > but ACK in principle. This is last weeks v4 posting. There is a v5 I posted this week which addresses the things you mention here. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :| -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list