At certain network and disk speeds, tabled can blow its stack by filling it with (essentially) endless recursion: #2 0x000000000040c077 in cli_write_free (cli=<value optimized out>, tmp= 0x7bb910, done=<value optimized out>) at server.c:397 #3 0x000000000040ca55 in cli_writable (cli=0x686e90) at server.c:525 #4 0x000000000040da65 in cli_write_start (cli=0x686e90) at server.c:561 #5 0x0000000000408ad5 in object_get_poke (cli=0x686e90) at object.c:1039 #6 0x000000000040c077 in cli_write_free (cli=<value optimized out>, tmp= 0x7bb8d0, done=<value optimized out>) at server.c:397 #7 0x000000000040ca55 in cli_writable (cli=0x686e90) at server.c:525 #8 0x000000000040da65 in cli_write_start (cli=0x686e90) at server.c:561 #9 0x0000000000408ad5 in object_get_poke (cli=0x686e90) at object.c:1039 #10 0x000000000040c077 in cli_write_free (cli=<value optimized out>, tmp= 0x7bb890, done=<value optimized out>) at server.c:397 The fix is to deliver callbacks only from the top level. Callbacks must be delivered every time a send is completed, which amounts to every call to is_writeable(). Since there is a large number of callers to it, we found it advantageous to run callbacks from every source of events. In other words, every function that is passed to event_set must invoke cli_write_run_compl. Mind that storage.c contains calls to event_set. Signed-off-by: Pete Zaitcev <zaitcev@xxxxxxxxxx> --- server/object.c | 4 +++ server/server.c | 52 +++++++++++++++++++++++++++++++++++----------- server/tabled.h | 6 +++++ 3 files changed, 50 insertions(+), 12 deletions(-) commit 88959fb8c4cc8232f92805618c5bcaeee9099390 Author: Pete Zaitcev <zaitcev@xxxxxxxxx> Date: Thu Apr 1 19:04:13 2010 -0600 Fix the endless recursion when reading long objects. diff --git a/server/object.c b/server/object.c index d61a446..6d32316 100644 --- a/server/object.c +++ b/server/object.c @@ -994,6 +994,9 @@ static bool object_get_poke(struct client *cli) char *buf; ssize_t bytes; + /* XXX flow control - what treshold? */ + /* if (cli_wqueued(cli) >= 1000000000) return false; */ + buf = malloc(CLI_DATA_BUF_SZ); if (!buf) return false; @@ -1052,6 +1055,7 @@ static bool object_get_more(struct client *cli, void *cb_data, bool done) static void object_get_event(struct open_chunk *ochunk) { object_get_poke(ochunk->cli); + cli_write_run_compl(ochunk->cli); } bool object_get_body(struct client *cli, const char *user, const char *bucket, diff --git a/server/server.c b/server/server.c index 72db151..600c8de 100644 --- a/server/server.c +++ b/server/server.c @@ -380,6 +380,8 @@ static void stats_dump(void) tabled_srv.stats.event, tabled_srv.stats.tcp_accept, tabled_srv.stats.opt_write); + applog(LOG_INFO, "DEBUG: max_write_buf %lu", + tabled_srv.stats.max_write_buf); stor_stats(); rep_stats(); } @@ -405,16 +407,24 @@ bool stat_status(struct client *cli, GList *content) content = g_list_append(content, str); if (asprintf(&str, "<p>Stats: " - "poll %lu event %lu tcp_accept %lu opt_write %lu</p>\r\n", + "poll %lu event %lu tcp_accept %lu opt_write %lu</p>\r\n" + "<p>Debug: max_write_buf %lu</p>\r\n", tabled_srv.stats.poll, tabled_srv.stats.event, tabled_srv.stats.tcp_accept, - tabled_srv.stats.opt_write) < 0) + tabled_srv.stats.opt_write, + tabled_srv.stats.max_write_buf) < 0) return false; content = g_list_append(content, str); return true; } +static void cli_write_complete(struct client *cli, struct client_write *tmp) +{ + list_del(&tmp->node); + list_add_tail(&tmp->node, &cli->write_compl_q); +} + static bool cli_write_free(struct client *cli, struct client_write *tmp, bool done) { @@ -433,11 +443,28 @@ static void cli_write_free_all(struct client *cli) { struct client_write *wr, *tmp; + list_for_each_entry_safe(wr, tmp, &cli->write_compl_q, node) { + cli_write_free(cli, wr, true); + } list_for_each_entry_safe(wr, tmp, &cli->write_q, node) { cli_write_free(cli, wr, false); } } +bool cli_write_run_compl(struct client *cli) +{ + struct client_write *wr; + bool do_loop; + + do_loop = false; + while (!list_empty(&cli->write_compl_q)) { + wr = list_entry(cli->write_compl_q.next, struct client_write, + node); + do_loop |= cli_write_free(cli, wr, true); + } + return do_loop; +} + static void cli_free(struct client *cli) { cli_write_free_all(cli); @@ -455,6 +482,9 @@ static void cli_free(struct client *cli) req_free(&cli->req); + if (cli->write_cnt_max > tabled_srv.stats.max_write_buf) + tabled_srv.stats.max_write_buf = cli->write_cnt_max; + if (debugging) applog(LOG_INFO, "client %s ended", cli->addr_host); @@ -505,10 +535,6 @@ static void cli_writable(struct client *cli) struct client_write *tmp; ssize_t rc; struct iovec iov[CLI_MAX_WR_IOV]; - bool more_work; - -restart: - more_work = false; /* accumulate pending writes into iovec */ n_iov = 0; @@ -548,16 +574,13 @@ do_write: rc -= sz; /* if tmp->len reaches zero, write is complete, - * call callback and clean up + * so schedule it for clean up (cannot call callback + * right away or an endless recursion will result) */ if (tmp->togo == 0) - if (cli_write_free(cli, tmp, true)) - more_work = true; + cli_write_complete(cli, tmp); } - if (more_work) - goto restart; - /* if we emptied the queue, clear write notification */ if (list_empty(&cli->write_q)) { cli->writing = false; @@ -622,6 +645,8 @@ int cli_writeq(struct client *cli, const void *buf, unsigned int buflen, wr->cb_data = cb_data; list_add_tail(&wr->node, &cli->write_q); cli->write_cnt += buflen; + if (cli->write_cnt > cli->write_cnt_max) + cli->write_cnt_max = cli->write_cnt; return 0; } @@ -1275,6 +1300,7 @@ static struct client *cli_alloc(bool is_status) cli->state = evt_read_req; cli->evt_table = is_status? evt_funcs_status: evt_funcs_server; INIT_LIST_HEAD(&cli->write_q); + INIT_LIST_HEAD(&cli->write_compl_q); INIT_LIST_HEAD(&cli->out_ch); cli->req_ptr = cli->req_buf; memset(&cli->req, 0, sizeof(cli->req) - sizeof(cli->req.hdr)); @@ -1287,6 +1313,7 @@ static void tcp_cli_wr_event(int fd, short events, void *userdata) struct client *cli = userdata; cli_writable(cli); + cli_write_run_compl(cli); } static void tcp_cli_event(int fd, short events, void *userdata) @@ -1296,6 +1323,7 @@ static void tcp_cli_event(int fd, short events, void *userdata) do { loop = cli->evt_table[cli->state](cli, events); + loop |= cli_write_run_compl(cli); } while (loop); } diff --git a/server/tabled.h b/server/tabled.h index b4f51ed..3ef4a49 100644 --- a/server/tabled.h +++ b/server/tabled.h @@ -164,8 +164,11 @@ struct client { struct event write_ev; struct list_head write_q; /* list of async writes */ + struct list_head write_compl_q; /* list of done writes */ size_t write_cnt; /* water level */ bool writing; + /* some debugging stats */ + size_t write_cnt_max; unsigned int req_used; /* amount of req_buf in use */ char *req_ptr; /* start of unexamined data */ @@ -212,6 +215,8 @@ struct server_stats { unsigned long event; /* events dispatched */ unsigned long tcp_accept; /* TCP accepted cxns */ unsigned long opt_write; /* optimistic writes */ + + unsigned long max_write_buf; }; struct listen_cfg { @@ -325,6 +330,7 @@ extern int cli_writeq(struct client *cli, const void *buf, unsigned int buflen, extern size_t cli_wqueued(struct client *cli); extern bool cli_cb_free(struct client *cli, void *cb_data, bool done); extern bool cli_write_start(struct client *cli); +extern bool cli_write_run_compl(struct client *cli); extern int cli_req_avail(struct client *cli); extern void applog(int prio, const char *fmt, ...); extern int stor_update_cb(void); -- To unsubscribe from this list: send the line "unsubscribe hail-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html