As it turned out, event_loopbreak() does not awaken the thread that exectutes event_dispatch(), but our code expected that it would. One easily noticeable effect was that there was a noticeable delay between the state transition to DB master and listening on sockets. I knew the mysterious delay existed for a while, but never got around to investigate. For ncld API, I moved the processing of CLD packets to its own thread and suddenly everything else froze. Apparently the existing code only works because of extra packets of CLD protocol. For a fix, dispose of event_loopbreak and use a loopback pipe. Also gone is state_tdb_new. That thing was just disgusting. Notice that we still have one event_loopbreak remaining, because it works correctly thanks to UNIX signal awakening the polling thread. Signed-Off-By: Pete Zaitcev <zaitcev@xxxxxxxxxx> --- server/server.c | 90 +++++++++++++++++++++++++++++++++------------- server/tabled.h | 4 +- 2 files changed, 68 insertions(+), 26 deletions(-) commit fb1aaf280f14e020e6a0110b828f4879b5eaa11e Author: Master <zaitcev@xxxxxxxxxxxxxxxxxx> Date: Wed Feb 3 19:48:42 2010 -0700 Replace event_loopbreak with ev_pipe. diff --git a/server/server.c b/server/server.c index e5580c5..b205340 100644 --- a/server/server.c +++ b/server/server.c @@ -89,7 +89,6 @@ static error_t parse_opt (int key, char *arg, struct argp_state *state); static const struct argp argp = { options, parse_opt, NULL, doc }; static bool server_running = true; -static bool dump_stats; static bool use_syslog = true; int debugging = 0; @@ -99,6 +98,12 @@ struct server tabled_srv = { struct tabledb tdb; +enum { + TT_CMD_DUMP, + TT_CMD_TDBST_MASTER, + TT_CMD_TDBST_SLAVE +}; + struct compiled_pat patterns[] = { [pat_auth] = { "^AWS (\\w+):(\\S+)", 0, }, @@ -361,8 +366,8 @@ static void term_signal(int signo) static void stats_signal(int signo) { - dump_stats = true; - event_loopbreak(); + static const unsigned char cmd = TT_CMD_DUMP; + write(tabled_srv.ev_pipe[1], &cmd, 1); } #define X(stat) \ @@ -1353,6 +1358,7 @@ static void tdb_checkpoint(int fd, short events, void *userdata) static void tdb_state_cb(enum db_event event) { + unsigned char cmd; switch (event) { case TDB_EV_ELECTED: @@ -1369,25 +1375,20 @@ static void tdb_state_cb(enum db_event event) * This callback runs on the context of the replication * manager thread, and calling any of our functions thus * turns our program into a multi-threaded one. Instead - * we do a loopbreak and postpone the processing. + * we signal them main thread to do the processing. */ if (tabled_srv.state_tdb != ST_TDB_INIT && tabled_srv.state_tdb != ST_TDB_OPEN) { if (event == TDB_EV_MASTER) - tabled_srv.state_tdb_new = ST_TDB_MASTER; + cmd = TT_CMD_TDBST_MASTER; else - tabled_srv.state_tdb_new = ST_TDB_SLAVE; - if (debugging) { - applog(LOG_DEBUG, "TDB state > %s", - state_name_tdb[tabled_srv.state_tdb_new]); - } - event_loopbreak(); + cmd = TT_CMD_TDBST_SLAVE; + write(tabled_srv.ev_pipe[1], &cmd, 1); } break; default: applog(LOG_WARNING, "API confusion with TDB, event 0x%x", event); tabled_srv.state_tdb = ST_TDB_OPEN; /* wrong, stub for now */ - tabled_srv.state_tdb_new = ST_TDB_INIT; } } @@ -1727,6 +1728,8 @@ static void tdb_state_process(enum st_tdb new_state) { unsigned int db_flags; + if (debugging) + applog(LOG_DEBUG, "TDB state > %s", state_name_tdb[new_state]); if ((new_state == ST_TDB_MASTER || new_state == ST_TDB_SLAVE) && tabled_srv.state_tdb == ST_TDB_ACTIVE) { @@ -1744,6 +1747,38 @@ static void tdb_state_process(enum st_tdb new_state) } } +static void internal_event(int fd, short events, void *userdata) +{ + unsigned char cmd; + ssize_t rrc; + + rrc = read(tabled_srv.ev_pipe[0], &cmd, 1); + if (rrc < 0) { + applog(LOG_WARNING, "pipe read error: %s", strerror(errno)); + abort(); + } + if (rrc < 1) { + applog(LOG_WARNING, "pipe short read"); + abort(); + } + + if (cmd == TT_CMD_DUMP) { + stats_dump(); + } else if (cmd == TT_CMD_TDBST_MASTER) { + if (tabled_srv.state_tdb != ST_TDB_MASTER) { + tdb_state_process(ST_TDB_MASTER); + tabled_srv.state_tdb = ST_TDB_MASTER; + } + } else if (cmd == TT_CMD_TDBST_SLAVE) { + if (tabled_srv.state_tdb != ST_TDB_SLAVE) { + tdb_state_process(ST_TDB_SLAVE); + tabled_srv.state_tdb = ST_TDB_SLAVE; + } + } else { + applog(LOG_WARNING, "internal error: command 0x%x", cmd); + } +} + int main (int argc, char *argv[]) { error_t aprc; @@ -1820,9 +1855,22 @@ int main (int argc, char *argv[]) signal(SIGTERM, term_signal); signal(SIGUSR1, stats_signal); + /* + * Prepare the libevent paraphernalia + */ tabled_srv.evbase_main = event_init(); event_base_rep = event_base_new(); evtimer_set(&tabled_srv.chkpt_timer, tdb_checkpoint, NULL); + if (pipe(tabled_srv.ev_pipe) < 0) { + applogerr("pipe"); + goto err_evpipe; + } + event_set(&tabled_srv.pevt, tabled_srv.ev_pipe[0], EV_READ | EV_PERSIST, + internal_event, NULL); + if (event_add(&tabled_srv.pevt, NULL) < 0) { + applog(LOG_WARNING, "pevt event_add"); + goto err_pevt; + } /* set up server networking */ rc = net_open(); @@ -1839,21 +1887,9 @@ int main (int argc, char *argv[]) applog(LOG_INFO, "initialized (%s)", (tabled_srv.flags & SFL_FOREGROUND)? "fg": "bg"); - while (server_running) { + while (server_running) event_dispatch(); - if (dump_stats) { - dump_stats = false; - stats_dump(); - } - - if (tabled_srv.state_tdb_new != ST_TDB_INIT && - tabled_srv.state_tdb_new != tabled_srv.state_tdb) { - tdb_state_process(tabled_srv.state_tdb_new); - tabled_srv.state_tdb = tabled_srv.state_tdb_new; - } - } - applog(LOG_INFO, "shutting down"); rc = 0; @@ -1871,6 +1907,10 @@ err_out_net: tdb_fini(&tdb); } /* err_tdb_init: */ +err_pevt: + close(tabled_srv.ev_pipe[0]); + close(tabled_srv.ev_pipe[1]); +err_evpipe: unlink(tabled_srv.pid_file); close(tabled_srv.pid_fd); err_out: diff --git a/server/tabled.h b/server/tabled.h index a2ffd8b..91cccae 100644 --- a/server/tabled.h +++ b/server/tabled.h @@ -226,6 +226,8 @@ struct server { int pid_fd; /* fd of pid_file */ GMutex *bigmutex; struct event_base *evbase_main; + int ev_pipe[2]; + struct event pevt; char *config; /* config file (static) */ @@ -248,7 +250,7 @@ struct server { int num_stor; /* number of storage_node's */ uint64_t object_count; - enum st_tdb state_tdb, state_tdb_new; + enum st_tdb state_tdb; enum st_net state_net; struct event chkpt_timer; /* db4 checkpoint timer */ -- To unsubscribe from this list: send the line "unsubscribe hail-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html