Hi, everyone: Just to remind everyone: someone thought that it would be desirable to have iwhd to start necessary background services without a need to edit any configuration files (or ideally, without reading any manuals). My attempts to implement this mandate quickly showed that the need to confirm what of CLD and chunkd actually worked favours locating the actual code in the tabled tree, with iwhd only invoking that. Hence, tabled-autostart in this patch. This patch is tested and works, so I'm throwing it here as an RFC. Next I'm going to comment on it and explain why I think it may be a bad idea in general (or may be not). -- Pete diff --git a/server/.gitignore b/server/.gitignore index fafb1ca..67313e2 100644 --- a/server/.gitignore +++ b/server/.gitignore @@ -1,4 +1,5 @@ tabled +tabled-autostart tdbadm diff --git a/server/Makefile.am b/server/Makefile.am index 5b53a0a..3b2dc57 100644 --- a/server/Makefile.am +++ b/server/Makefile.am @@ -1,7 +1,7 @@ INCLUDES = -I$(top_srcdir)/include @GLIB_CFLAGS@ @HAIL_CFLAGS@ -sbin_PROGRAMS = tabled tdbadm +sbin_PROGRAMS = tabled tabled-autostart tdbadm tabled_SOURCES = tabled.h \ bucket.c cldu.c config.c metarep.c object.c replica.c \ @@ -13,3 +13,5 @@ tabled_LDADD = ../lib/libtdb.a \ tdbadm_SOURCES = tdbadm.c tdbadm_LDADD = ../lib/libtdb.a @GLIB_LIBS@ @DB4_LIBS@ +tabled_autostart_SOURCES = tabled-autostart.c +tabled_autostart_LDADD = @HAIL_LIBS@ @GLIB_LIBS@ diff --git a/server/server.c b/server/server.c index 044ff51..55bff7d 100644 --- a/server/server.c +++ b/server/server.c @@ -68,6 +68,8 @@ struct server_socket { }; static struct argp_option options[] = { + { "autostart", 'a', NULL, 0, + "Launch minimally necessary services for testing" }, { "config", 'C', "/etc/tabled.conf", 0, "Configuration file" }, { "debug", 'D', "LEVEL", 0, diff --git a/server/tabled-autostart.c b/server/tabled-autostart.c new file mode 100644 index 0000000..0ece35c --- /dev/null +++ b/server/tabled-autostart.c @@ -0,0 +1,665 @@ +/* + * tabled-autostart + * + * This is a little tool that starts a self-contained single-node group + * on the local machine, in the current directory. + * + * This should have been written in Python. But neither tabled nor iwhd + * have Python components, so that would be a bother. + */ +#include <tabled-config.h> + +#include <errno.h> +#include <fcntl.h> +#include <netdb.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/wait.h> + +#include <glib.h> +#include <ncld.h> +#include <chunkc.h> + +#include "tabled.h" + +#define TAG "tabled-autostart" + +/* + * The "leading underscore in current directory" is a convention initiated + * by iwhd's filesystem back-end where all of it cohabitates with buckets. + */ +#define AUTO_HOST "localhost" +#define AUTO_DIR_CLD "_cld" +#define AUTO_DIR_CHUNK "_chunkd" +#define AUTO_DIR_TDB "_tabled" +#define AUTO_PID_CLD "_cld.pid" +#define AUTO_PID_CHUNK "_chunkd.pid" +#define AUTO_PID_TABLED "_tabled.pid" +#define AUTO_ACC_CLD "_cld.port" +#define AUTO_ACC_CHUNK "_chunkd.port" +#define AUTO_ACC_TABLED "_tabled.acc" +#define AUTO_CONF_CHUNK "_chunkd.conf" +#define AUTO_CONF_TDB "_tabled.conf" +#define AUTO_BIN_CLD "/usr/sbin/cld" +#define AUTO_BIN_CHUNK "/usr/sbin/chunkd" +#define AUTO_BIN_TABLED "/usr/sbin/tabled" +#define AUTO_CLD_USER "tabled" +#define AUTO_CLD_PASS "tabled" +#define AUTO_CHUNK_USER "tabled" +#define AUTO_CHUNK_PASS "tabled" + +static bool verbose; +static int auto_cld_port; +static int auto_chunkd_port; +enum { AUTO_TPORT_SZ = 10 }; +static char auto_tabled_port[AUTO_TPORT_SZ]; + +static char *auto_arg_cld[] = { + "cld", + "-d", AUTO_DIR_CLD, + "-P", AUTO_PID_CLD, + "-p", "auto", + "--port-file=" AUTO_ACC_CLD, + "-E", + NULL +}; + +static char *auto_arg_chunkd[] = { + "chunkd", + "-C", AUTO_CONF_CHUNK, + "-E", + NULL +}; + +static char *auto_arg_tabled[] = { + "tabled", + "-C", AUTO_CONF_TDB, + "-E", + NULL +}; + +static char auto_conf_chunkd[] = + "<Listen>" + " <Port>auto</Port>" + " <PortFile>" AUTO_ACC_CHUNK "</PortFile>" + "</Listen>\n" + "<PID>" AUTO_PID_CHUNK "</PID>\n" + "<Path>" AUTO_DIR_CHUNK "</Path>\n" + "<InfoPath>/chunk-default/19690720</InfoPath>\n" + "<NID>19690720</NID>\n" + "<CLD>" + " <PortFile>" AUTO_ACC_CLD "</PortFile>" + " <Host>" AUTO_HOST "</Host>" + "</CLD>\n"; + +static char auto_conf_tabled[] = + "<PID>" AUTO_PID_TABLED "</PID>\n" + "<Listen>" + " <Port>auto</Port>" + " <PortFile>" AUTO_ACC_TABLED "</PortFile>" + "</Listen>\n" + "<TDB>" AUTO_DIR_TDB "</TDB>\n" + "<TDBRepPort>auto</TDBRepPort>\n" + "<ChunkUser>" AUTO_CHUNK_USER "</ChunkUser>\n" + "<ChunkKey>" AUTO_CHUNK_PASS "</ChunkKey>\n" + "<CLD>" + " <PortFile>" AUTO_ACC_CLD "</PortFile>" + " <Host>" AUTO_HOST "</Host>" + "</CLD>\n"; + +/* + * The server_node and node_resolve are taken verbatim from wait-for-listen. + * Is this time to factor? + * + * We have ADDRSIZE in tabled.h. + */ + +struct server_node { + unsigned alen; + union { + struct sockaddr addr; + unsigned char x[ADDRSIZE]; + } a; +}; + +static int node_resolve(struct server_node *sn, + const char *hostname, const char *portstr) +{ + struct addrinfo hints; + struct addrinfo *res, *res0; + int rc; + + memset(&hints, 0, sizeof(struct addrinfo)); + hints.ai_family = PF_UNSPEC; + hints.ai_socktype = SOCK_DGRAM; + + rc = getaddrinfo(hostname, portstr, &hints, &res0); + if (rc) { + fprintf(stderr, "getaddrinfo(%s:%s) failed: %s\n", + hostname, portstr, gai_strerror(rc)); + exit(1); + } + + for (res = res0; res; res = res->ai_next) { + if (res->ai_family != AF_INET && res->ai_family != AF_INET6) + continue; + + if (res->ai_addrlen > ADDRSIZE) /* should not happen */ + continue; + + memcpy(&sn->a.addr, res->ai_addr, res->ai_addrlen); + sn->alen = res->ai_addrlen; + + freeaddrinfo(res0); + return 0; + } + + freeaddrinfo(res0); + return -1; +} + +/* + * Read a port number from a port file, fill buffer. + * Unlike cld_readport, host is included as well, and we use strings. + */ +static int tb_readport(const char *fname, char *buf, size_t len) +{ + int fd; + char *s; + int rc; + + if (len < 3) + return -EDOM; + if ((fd = open(fname, O_RDONLY)) == -1) + return -errno; + rc = read(fd, buf, len-1); + close(fd); + if (rc < 0) + return -errno; + if (rc == 0) + return -EPIPE; + buf[rc] = 0; + + s = strchr(buf, '\n'); + if (s) { + *s = 0; + rc = s - buf; + } + + return rc; +} + +static int auto_mkdir(const char *name) +{ + struct stat statb; + + if (mkdir(name, 0777) < 0) { + if (errno == EEXIST) { + if (stat(name, &statb) < 0) { + fprintf(stderr, TAG ": stat(%s) failed: %s\n", + name, strerror(errno)); + return -1; + } + if (!S_ISDIR(statb.st_mode)) { + fprintf(stderr, TAG + ": path `%s' is not a directory\n", + name); + return -1; + } + return 0; + } + fprintf(stderr, TAG ": cannot create directory `%s'\n", name); + return -1; + } + return 0; +} + +static int auto_prepare_area(void) +{ + + if (auto_mkdir(AUTO_DIR_CLD) < 0) { + return -1; + } + if (auto_mkdir(AUTO_DIR_CHUNK) < 0) { + return -1; + } + if (auto_mkdir(AUTO_DIR_TDB) < 0) { + return -1; + } + return 0; +} + +/* + * An almost exact copy of cld_readport, actually. Only the value range + * is different (some Linux boxes offer large PID values). + */ +static int auto_read_pid(const char *fname) +{ + enum { buflen = 20 }; + char buf[buflen]; + int fd; + int rc; + long pid; + + if ((fd = open(fname, O_RDONLY)) == -1) + return -errno; + rc = read(fd, buf, buflen-1); + close(fd); + if (rc < 0) + return -errno; + if (rc == 0) + return -EPIPE; + buf[rc] = 0; + + pid = strtol(buf, NULL, 10); + if (pid <= 0 || pid >= INT_MAX) + return -EDOM; + + return (int)pid; +} + +static int auto_write_conf(const char *file, const char *conf) +{ + ssize_t len, rc; + int fd; + + len = strlen(conf); + + fd = open(file, O_WRONLY|O_CREAT|O_TRUNC, 0644); + if (fd < 0) { + fprintf(stderr, TAG ": cannot create %s: %s\n", + file, strerror(errno)); + return -1; + } + + rc = write(fd, conf, len); + if (rc < 0) { + fprintf(stderr, TAG ": cannot write %s: %s\n", + file, strerror(errno)); + close(fd); + return -1; + } + if (rc < len) { + fprintf(stderr, TAG ": short write to %s\n", file); + close(fd); + return -1; + } + + close(fd); + return 0; +} + +static int auto_spawn(const char *prog, char *argv[]) +{ + struct stat statb; + pid_t pid; + + /* + * The stat check is purely so that common errors, such as ENOENT + * if the program is not available, were printed before the fork. + * This serves no security purpose but only makes stderr more tidy. + */ + if (stat(prog, &statb) < 0) { + fprintf(stderr, TAG ": stat(%s) failed: %s\n", + prog, strerror(errno)); + return -1; + } + if (!S_ISREG(statb.st_mode)) { + fprintf(stderr, TAG ": path `%s' is not a regular file\n", + prog); + return -1; + } + + pid = fork(); + if (pid < 0) { + fprintf(stderr, TAG ": fork failed: %s\n", strerror(errno)); + return -1; + } + + if (pid == 0) { + execvp(prog, argv); + fprintf(stderr, TAG ": failed to run command `%s': %s\n", + prog, strerror(errno)); + exit(3); + } + + if (verbose) + fprintf(stderr, "waiting for child...\n"); + if (waitpid(pid,NULL,0) < 0) { + fprintf(stderr, TAG ": waitpid failed: %s\n", strerror(errno)); + } + /* TBD: check identity/status from waitpid */ + if (verbose) + fprintf(stderr, "...child exited\n"); + + return 0; +} + +/* + * Looks like we never return -1 in practice, but the theory is: + * 1 = service is not running + * 0 = service is running ok + * -1 = some error, we're unable to determine if it's running + */ +static int auto_test_cld(int *gport) +{ + int pid; + int port; + int err; + struct ncld_sess *nsess; + + /* + * Test if a pidfile exists and a process is present. + * If not, do not attempt to establish a CLD session, + * because ncld_sess_open takes one minute to time out. + */ + pid = auto_read_pid(AUTO_PID_CLD); + if (pid < 0) + return 1; + + if (kill(pid, 0) < 0) { + /* + * Return "not running" on EPERM too (probably a roll-over). + */ + return 1; + } + + /* + * With PID checks done, read the accessor file and contact the daemon. + */ + port = cld_readport(AUTO_ACC_CLD); + if (port < 0) + return 1; + + if (verbose) + fprintf(stderr, "trying to connect to cld (port %u) ...\n", + port); + nsess = ncld_sess_open(AUTO_HOST, port, &err, NULL, NULL, + AUTO_CLD_USER, AUTO_CLD_PASS, NULL); + if (!nsess) + return 1; + + ncld_sess_close(nsess); + + *gport = port; + return 0; +} + +static int auto_test_chunkd(int *gport) +{ + int port; + struct st_client *stc; + + /* + * Using cld_readport to read all sorts of portfiles is a tradition. + */ + port = cld_readport(AUTO_ACC_CHUNK); + if (port < 0) + return 1; + + if (verbose) + fprintf(stderr, "trying to connect to chunkd (port %u) ...\n", + port); + stc = stc_new(AUTO_HOST, port, AUTO_CHUNK_USER, AUTO_CHUNK_PASS, false); + if (!stc) + return 1; + stc_free(stc); + + *gport = port; + return 0; +} + +static int auto_test_tabled(char *gport, int gpsize) +{ + struct server_node snode, *sn = &snode; + const char *accname = AUTO_ACC_TABLED; + char accbuf[80]; + char *s; + int sfd; + int rc; + + rc = tb_readport(accname, accbuf, sizeof(accbuf)); + if (rc < 0) + return 1; + + s = strchr(accbuf, ':'); + if (!s) + s = "80"; + else + *s++ = 0; + + memset(sn, 0, sizeof(struct server_node)); + if (node_resolve(sn, accbuf, s) != 0) { + /* + * Not sure if this is ever possible. To be honest, if the DNS + * goes belly up, tabled can work around with <ForceHost>. + * But most likely nothing else would work anyway. So abend. + */ + fprintf(stderr, TAG + ": unable to resolve host %s port %s\n", accbuf, s); + return -1; + } + + if (verbose) + fprintf(stderr, + "trying to connect to tabled (host %s port %s) ...\n", + accbuf, s); + + sfd = socket(sn->a.addr.sa_family, SOCK_STREAM, 0); + if (sfd < 0) { + fprintf(stderr, TAG ": socket: %s\n", strerror(errno)); + return -1; + } + + rc = connect(sfd, &sn->a.addr, sn->alen); + if (rc != 0) { + // if (errno != ECONNREFUSED) { + fprintf(stderr, TAG ": connect: %s\n", strerror(errno)); + // } + close(sfd); + return 1; + } + + close(sfd); + + strncpy(gport, s, gpsize-1); + gport[gpsize-1] = 0; + return 0; +} + +static int auto_wait_cld(void) +{ + struct timespec ts; + int cnt; + int rc; + + cnt = 0; + for (;;) { + rc = auto_test_cld(&auto_cld_port); + if (rc == 0) + break; + if (++cnt >= 5) { /* should not take long */ + fprintf(stderr, TAG ": failed to verify cld" + " using portfile `%s'\n", + AUTO_ACC_CLD); + return -1; + } + + ts.tv_sec = 1; + ts.tv_nsec = 0; + nanosleep(&ts, NULL); + } + return 0; +} + +static int auto_wait_chunkd(void) +{ + struct timespec ts; + int cnt; + int rc; + + cnt = 0; + for (;;) { + rc = auto_test_chunkd(&auto_chunkd_port); + if (rc == 0) + break; + if (++cnt >= 10) { + fprintf(stderr, TAG ": failed to verify chunkd" + " using portfile `%s'\n", + AUTO_ACC_CHUNK); + return -1; + } + + ts.tv_sec = 1; + ts.tv_nsec = 0; + nanosleep(&ts, NULL); + } + + return 0; +} + +static int auto_wait_tabled(void) +{ + struct timespec ts; + time_t start_time; + int rc; + + start_time = time(NULL); + for (;;) { + rc = auto_test_tabled(auto_tabled_port, AUTO_TPORT_SZ); + if (rc < 0) + return -1; + if (rc == 0) + break; + /* + * Vote in DB4 replication takes about 12-13s. + * In addition we may have retries when tabled polls for + * Chunk daemons to come up. On busy boxes we may miss 20s. + */ + if (time(NULL) >= start_time + 25) { + fprintf(stderr, TAG ": failed to verify tabled" + " using portfile `%s'\n", + AUTO_ACC_TABLED); + return -1; + } + + ts.tv_sec = 1; + ts.tv_nsec = 0; + nanosleep(&ts, NULL); + } + + if (verbose) + fprintf(stderr, "tabled went up after %ld s\n", + (long)time(NULL) - start_time); + return 0; +} + +static int auto_start(void) +{ + int rc; + + if (auto_prepare_area() < 0) + return -1; + + rc = auto_test_cld(&auto_cld_port); + if (rc < 0) + return -1; + if (rc) { + if (verbose) + fprintf(stderr, "auto-starting cld\n"); + + /* + * N.B. There's a time window when PID file is written, but + * port is not. On a restart it's easy to end using a stale + * port, with the resulting minute-long delay. Since we're + * about to cld, it's safe to remove the old accessor here. + */ + unlink(AUTO_ACC_CLD); + + if (auto_spawn(AUTO_BIN_CLD, auto_arg_cld) < 0) + return -1; + if (auto_wait_cld() < 0) + return -1; + } + + if (verbose) + fprintf(stderr, "cld listens on port %u\n", auto_cld_port); + + rc = auto_test_chunkd(&auto_chunkd_port); + if (rc < 0) + return -1; + if (rc) { + if (verbose) + fprintf(stderr, "auto-starting chunkd\n"); + unlink(AUTO_ACC_CHUNK); + if (auto_write_conf(AUTO_CONF_CHUNK, auto_conf_chunkd) < 0) + return -1; + if (auto_spawn(AUTO_BIN_CHUNK, auto_arg_chunkd) < 0) + return -1; + if (auto_wait_chunkd() < 0) + return -1; + } + if (verbose) + fprintf(stderr, "chunkd listens on port %u\n", + auto_chunkd_port); + + rc = auto_test_tabled(auto_tabled_port, AUTO_TPORT_SZ); + if (rc < 0) + return -1; + if (rc) { + if (verbose) + fprintf(stderr, "auto-starting tabled\n"); + unlink(AUTO_ACC_TABLED); + if (auto_write_conf(AUTO_CONF_TDB, auto_conf_tabled) < 0) + return -1; + if (auto_spawn(AUTO_BIN_TABLED, auto_arg_tabled) < 0) + return -1; + if (auto_wait_tabled() < 0) + return -1; + } + if (verbose) + fprintf(stderr, "tabled listens on port %s\n", + auto_tabled_port); + + return 0; +} + +static void Usage(void) +{ + fprintf(stderr, "Usage: tabled-autostart [-v]\n"); + exit(1); +} + +int main(int argc, char *argv[]) +{ + char *arg; + + argv++; + while ((arg = *argv++) != NULL) { + if (*arg == '-') { + switch (arg[1]) { + case 'v': + verbose = true; + break; + default: + Usage(); + } + } else { + Usage(); + } + } + + g_thread_init(NULL); /* ncld needs this, sadly */ + + if (auto_start() != 0) + exit(2); /* Part of API */ + + return 0; +} diff --git a/test/Makefile.am b/test/Makefile.am index cc4e6fe..59fb28d 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -7,6 +7,7 @@ EXTRA_DIST = \ users.data \ chunkd-test.conf \ tabled-test.conf \ + tabled-test-bis.conf \ prep-db \ start-daemon \ pid-exists \ @@ -27,6 +28,7 @@ TESTS = \ hdr-content-type \ hdr-meta \ list-keys \ + metadata-rep \ stop-daemon \ clean-db -- To unsubscribe from this list: send the line "unsubscribe hail-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html