On 11/09/2018 03:59 AM, John Ferlan wrote: > > > On 10/17/18 5:06 AM, Michal Privoznik wrote: >> Trying to use virlockd to lock metadata turns out to be too big >> gun. Since we will always spawn a separate process for relabeling >> we are safe to use thread unsafe POSIX locks and take out >> virtlockd completely out of the picture. >> >> Signed-off-by: Michal Privoznik <mprivozn@xxxxxxxxxx> >> --- >> src/security/security_dac.c | 10 +- >> src/security/security_manager.c | 225 +++++++++++++++++--------------- >> src/security/security_manager.h | 17 ++- >> src/security/security_selinux.c | 9 +- >> 4 files changed, 139 insertions(+), 122 deletions(-) >> >> diff --git a/src/security/security_dac.c b/src/security/security_dac.c >> index 580d800bb1..ad2561a241 100644 >> --- a/src/security/security_dac.c >> +++ b/src/security/security_dac.c >> @@ -204,6 +204,7 @@ virSecurityDACTransactionRun(pid_t pid ATTRIBUTE_UNUSED, >> void *opaque) >> { >> virSecurityDACChownListPtr list = opaque; >> + virSecurityManagerMetadataLockStatePtr state; >> const char **paths = NULL; >> size_t npaths = 0; >> size_t i; >> @@ -216,14 +217,10 @@ virSecurityDACTransactionRun(pid_t pid ATTRIBUTE_UNUSED, >> for (i = 0; i < list->nItems; i++) { >> const char *p = list->items[i]->path; >> >> - if (!p || >> - virFileIsDir(p)) >> - continue; >> - >> VIR_APPEND_ELEMENT_COPY_INPLACE(paths, npaths, p); >> } >> >> - if (virSecurityManagerMetadataLock(list->manager, paths, npaths) < 0) >> + if (!(state = virSecurityManagerMetadataLock(list->manager, paths, npaths))) >> goto cleanup; >> >> for (i = 0; i < list->nItems; i++) { >> @@ -246,8 +243,7 @@ virSecurityDACTransactionRun(pid_t pid ATTRIBUTE_UNUSED, >> break; >> } >> >> - if (virSecurityManagerMetadataUnlock(list->manager, paths, npaths) < 0) >> - goto cleanup; >> + virSecurityManagerMetadataUnlock(list->manager, &state); >> >> if (rv < 0) >> goto cleanup; >> diff --git a/src/security/security_manager.c b/src/security/security_manager.c >> index c6c80e6165..b675f8ab46 100644 >> --- a/src/security/security_manager.c >> +++ b/src/security/security_manager.c >> @@ -21,6 +21,10 @@ >> */ >> #include <config.h> >> >> +#include <sys/types.h> >> +#include <sys/stat.h> >> +#include <fcntl.h> >> + >> #include "security_driver.h" >> #include "security_stack.h" >> #include "security_dac.h" >> @@ -30,14 +34,11 @@ >> #include "virlog.h" >> #include "locking/lock_manager.h" >> #include "virfile.h" >> -#include "virtime.h" >> >> #define VIR_FROM_THIS VIR_FROM_SECURITY >> >> VIR_LOG_INIT("security.security_manager"); >> >> -virMutex lockManagerMutex = VIR_MUTEX_INITIALIZER; >> - >> struct _virSecurityManager { >> virObjectLockable parent; >> >> @@ -47,10 +48,6 @@ struct _virSecurityManager { >> void *privateData; >> >> virLockManagerPluginPtr lockPlugin; >> - /* This is a FD that represents a connection to virtlockd so >> - * that connection is kept open in between MetdataLock() and >> - * MetadataUnlock() calls. */ >> - int clientfd; >> }; >> >> static virClassPtr virSecurityManagerClass; >> @@ -66,7 +63,6 @@ void virSecurityManagerDispose(void *obj) >> mgr->drv->close(mgr); >> >> virObjectUnref(mgr->lockPlugin); >> - VIR_FORCE_CLOSE(mgr->clientfd); >> >> VIR_FREE(mgr->privateData); >> } >> @@ -119,7 +115,6 @@ virSecurityManagerNewDriver(virSecurityDriverPtr drv, >> mgr->flags = flags; >> mgr->virtDriver = virtDriver; >> VIR_STEAL_PTR(mgr->privateData, privateData); >> - mgr->clientfd = -1; >> >> if (drv->open(mgr) < 0) >> goto error; >> @@ -1276,129 +1271,153 @@ virSecurityManagerRestoreTPMLabels(virSecurityManagerPtr mgr, >> } >> >> >> -static virLockManagerPtr >> -virSecurityManagerNewLockManager(virSecurityManagerPtr mgr, >> - const char * const *paths, >> - size_t npaths) >> +struct _virSecurityManagerMetadataLockState { > > LockPrivate? > > When I first saw State I had a different thought. > > I like this better than the previous model... Keeping track of fds is > like other models used. How do these locks handle restarts? Are the > locks free'd if the daemon dies? Yes they are. Since this runs in a child (also the child body is very short) then when the parent (libvirtd) is killed the children are killed too. The POSIX locks I'm using here are tied to [PID,inode] pair. Therefore, if PID dies the lock is released. > >> + size_t nfds; >> + int *fds; >> +}; >> + >> + >> +static int >> +cmpstringp(const void *p1, const void *p2) >> { >> - virLockManagerPtr lock; >> - virLockManagerParam params[] = { >> - { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UUID, >> - .key = "uuid", >> - }, >> - { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, >> - .key = "name", >> - .value = { .cstr = "libvirtd-sec" }, >> - }, >> - { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, >> - .key = "pid", >> - .value = { .iv = getpid() }, >> - }, >> - }; >> - const unsigned int flags = 0; >> - size_t i; >> + const char *s1 = *(char * const *) p1; >> + const char *s2 = *(char * const *) p2; >> >> - if (virGetHostUUID(params[0].value.uuid) < 0) >> - return NULL; >> + if (!s1 && !s2) >> + return 0; >> >> - if (!(lock = virLockManagerNew(virLockManagerPluginGetDriver(mgr->lockPlugin), >> - VIR_LOCK_MANAGER_OBJECT_TYPE_DAEMON, >> - ARRAY_CARDINALITY(params), >> - params, >> - flags))) >> - return NULL; >> + if (!s1 || !s2) >> + return s2 ? -1 : 1; >> >> - for (i = 0; i < npaths; i++) { >> - if (virLockManagerAddResource(lock, >> - VIR_LOCK_MANAGER_RESOURCE_TYPE_METADATA, >> - paths[i], 0, NULL, 0) < 0) >> - goto error; >> - } >> - >> - return lock; >> - error: >> - virLockManagerFree(lock); >> - return NULL; >> + /* from man 3 qsort */ >> + return strcmp(s1, s2); >> } >> >> +#define METADATA_OFFSET 1 >> +#define METADATA_LEN 1 >> >> -/* How many seconds should we try to acquire the lock before >> - * giving up. */ >> -#define LOCK_ACQUIRE_TIMEOUT 60 >> - >> -int >> -virSecurityManagerMetadataLock(virSecurityManagerPtr mgr, >> - const char * const *paths, >> +/** >> + * virSecurityManagerMetadataLock: >> + * @mgr: security manager object >> + * @paths: paths to lock >> + * @npaths: number of items in @paths array >> + * >> + * Lock passed @paths for metadata change. The returned state >> + * should be passed to virSecurityManagerMetadataUnlock. >> + * >> + * NOTE: this function is not thread safe (because of usage of >> + * POSIX locks). >> + * >> + * Returns: state on success, >> + * NULL on failure. >> + */ >> +virSecurityManagerMetadataLockStatePtr >> +virSecurityManagerMetadataLock(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, >> + const char **paths, >> size_t npaths) >> { >> - virLockManagerPtr lock; >> - virTimeBackOffVar timebackoff; >> - int fd = -1; >> - int rv = -1; >> - int ret = -1; >> + size_t i = 0; >> + size_t nfds = 0; >> + int *fds = NULL; >> + virSecurityManagerMetadataLockStatePtr ret = NULL; >> >> - virMutexLock(&lockManagerMutex); >> + if (VIR_ALLOC_N(fds, npaths) < 0) >> + return NULL; >> >> - if (!(lock = virSecurityManagerNewLockManager(mgr, paths, npaths))) >> - goto cleanup; >> + /* Sort paths to lock in order to avoid deadlocks. */ >> + qsort(paths, npaths, sizeof(*paths), cmpstringp); > > Shouldn't this be the job of the caller to know or set order to avoid > deadlocks? IOW: Why is it important to sort here? It's not clear > how/why it avoids deadlocks. Imagine you have two domains which are starting up. One has disks A,B and the other has B,A. Two processes are forked to start relabeling over the disks. The first child locks A, the second locks B. Then they want to lock the second disk, but this won't succeed. However, if the paths are sorted firstly then there won't be any deadlock: both children will lock A first and B second (regardless of disk position in domain XML). > >> >> - if (virTimeBackOffStart(&timebackoff, 1, LOCK_ACQUIRE_TIMEOUT * 1000) < 0) >> - goto cleanup; >> - while (virTimeBackOffWait(&timebackoff)) { >> - rv = virLockManagerAcquire(lock, NULL, >> - VIR_LOCK_MANAGER_ACQUIRE_ROLLBACK, >> - VIR_DOMAIN_LOCK_FAILURE_DEFAULT, &fd); >> + for (i = 0; i < npaths; i++) { >> + const char *p = paths[i]; >> + struct stat sb; >> + int retries = 10 * 1000; >> + int fd; >> + >> + if (!p || stat(p, &sb) < 0) >> + continue; >> + >> + if (S_ISDIR(sb.st_mode)) { >> + /* Directories can't be locked */ >> + continue; >> + } >> + >> + if ((fd = open(p, O_RDWR)) < 0) { > > Is RDWR required for locking? Good question, I'll investigate. It shouldn't be necessary. > >> + if (S_ISSOCK(sb.st_mode)) { >> + /* Sockets can be opened only if there exists the >> + * other side that listens. */ >> + continue; >> + } >> + >> + virReportSystemError(errno, >> + _("unable to open %s"), >> + p); >> + goto cleanup; >> + } >> + >> + do { >> + if (virFileLock(fd, false, >> + METADATA_OFFSET, METADATA_LEN, false) < 0) { > > If lockd dies somewhere in the middle of this "transaction" - all those > fd's close and unlock, right? Mr doom and gloom thinking what could go > wrong. Yes. closing a file if it is locked results in it being unlocked. Even if it is a different FD. For instance: fd = open(path); virFileLock(fd, ...); close(fd); /* releases the lock */ but also: fd1 = open(path); fd2 = open(path); virFileLock(fd1, ...); close(fd2); /* also releases the lock */ This is what makes POSIX locks impossible to use from a multithreaded app (imagine those two open()s happening from different threads). This is the whole reason we need to undergo fork() torture. > >> + if (retries && (errno == EACCES || errno == EAGAIN)) { >> + /* File is locked. Try again. */ >> + retries--; >> + usleep(1000); >> + continue; >> + } else { >> + virReportSystemError(errno, >> + _("unable to lock %s for metadata change"), >> + p); >> + VIR_FORCE_CLOSE(fd); >> + goto cleanup; >> + } >> + } > > I dunno, I liked the virTimeBackOffStart better... This 1 second wait > just seems like it could be a perf. bottleneck someday. I've looked at > the code for a while and probably just need more thinking time on it. Okay, I'll leave it in. Michal -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list