[Patch 2/7] tabled: fix the endless recusion when reading long objects

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



At certain network and disk speeds, tabled can blow its stack by
filling it with (essentially) endless recursion:

#2  0x000000000040c077 in cli_write_free (cli=<value optimized out>, tmp=
    0x7bb910, done=<value optimized out>) at server.c:397
#3  0x000000000040ca55 in cli_writable (cli=0x686e90) at server.c:525
#4  0x000000000040da65 in cli_write_start (cli=0x686e90) at server.c:561
#5  0x0000000000408ad5 in object_get_poke (cli=0x686e90) at object.c:1039
#6  0x000000000040c077 in cli_write_free (cli=<value optimized out>, tmp=
    0x7bb8d0, done=<value optimized out>) at server.c:397
#7  0x000000000040ca55 in cli_writable (cli=0x686e90) at server.c:525
#8  0x000000000040da65 in cli_write_start (cli=0x686e90) at server.c:561
#9  0x0000000000408ad5 in object_get_poke (cli=0x686e90) at object.c:1039
#10 0x000000000040c077 in cli_write_free (cli=<value optimized out>, tmp=
    0x7bb890, done=<value optimized out>) at server.c:397

The fix is to deliver callbacks only from the top level.

Callbacks must be delivered every time a send is completed,
which amounts to every call to is_writeable(). Since there
is a large number of callers to it, we found it advantageous
to run callbacks from every source of events. In other words,
every function that is passed to event_set must invoke
cli_write_run_compl. Mind that storage.c contains calls to
event_set.

Signed-off-by: Pete Zaitcev <zaitcev@xxxxxxxxxx>

---
 server/object.c |    4 +++
 server/server.c |   52 +++++++++++++++++++++++++++++++++++-----------
 server/tabled.h |    6 +++++
 3 files changed, 50 insertions(+), 12 deletions(-)

commit 88959fb8c4cc8232f92805618c5bcaeee9099390
Author: Pete Zaitcev <zaitcev@xxxxxxxxx>
Date:   Thu Apr 1 19:04:13 2010 -0600

    Fix the endless recursion when reading long objects.

diff --git a/server/object.c b/server/object.c
index d61a446..6d32316 100644
--- a/server/object.c
+++ b/server/object.c
@@ -994,6 +994,9 @@ static bool object_get_poke(struct client *cli)
 	char *buf;
 	ssize_t bytes;
 
+	/* XXX flow control - what treshold? */
+	/*  if (cli_wqueued(cli) >= 1000000000) return false; */
+
 	buf = malloc(CLI_DATA_BUF_SZ);
 	if (!buf)
 		return false;
@@ -1052,6 +1055,7 @@ static bool object_get_more(struct client *cli, void *cb_data, bool done)
 static void object_get_event(struct open_chunk *ochunk)
 {
 	object_get_poke(ochunk->cli);
+	cli_write_run_compl(ochunk->cli);
 }
 
 bool object_get_body(struct client *cli, const char *user, const char *bucket,
diff --git a/server/server.c b/server/server.c
index 72db151..600c8de 100644
--- a/server/server.c
+++ b/server/server.c
@@ -380,6 +380,8 @@ static void stats_dump(void)
 	       tabled_srv.stats.event,
 	       tabled_srv.stats.tcp_accept,
 	       tabled_srv.stats.opt_write);
+	applog(LOG_INFO, "DEBUG: max_write_buf %lu",
+	       tabled_srv.stats.max_write_buf);
 	stor_stats();
 	rep_stats();
 }
@@ -405,16 +407,24 @@ bool stat_status(struct client *cli, GList *content)
 	content = g_list_append(content, str);
 	if (asprintf(&str,
 		     "<p>Stats: "
-		     "poll %lu event %lu tcp_accept %lu opt_write %lu</p>\r\n",
+		     "poll %lu event %lu tcp_accept %lu opt_write %lu</p>\r\n"
+		     "<p>Debug: max_write_buf %lu</p>\r\n",
 		     tabled_srv.stats.poll,
 		     tabled_srv.stats.event,
 		     tabled_srv.stats.tcp_accept,
-		     tabled_srv.stats.opt_write) < 0)
+		     tabled_srv.stats.opt_write,
+		     tabled_srv.stats.max_write_buf) < 0)
 		return false;
 	content = g_list_append(content, str);
 	return true;
 }
 
+static void cli_write_complete(struct client *cli, struct client_write *tmp)
+{
+	list_del(&tmp->node);
+	list_add_tail(&tmp->node, &cli->write_compl_q);
+}
+
 static bool cli_write_free(struct client *cli, struct client_write *tmp,
 			   bool done)
 {
@@ -433,11 +443,28 @@ static void cli_write_free_all(struct client *cli)
 {
 	struct client_write *wr, *tmp;
 
+	list_for_each_entry_safe(wr, tmp, &cli->write_compl_q, node) {
+		cli_write_free(cli, wr, true);
+	}
 	list_for_each_entry_safe(wr, tmp, &cli->write_q, node) {
 		cli_write_free(cli, wr, false);
 	}
 }
 
+bool cli_write_run_compl(struct client *cli)
+{
+	struct client_write *wr;
+	bool do_loop;
+
+	do_loop = false;
+	while (!list_empty(&cli->write_compl_q)) {
+		wr = list_entry(cli->write_compl_q.next, struct client_write,
+				node);
+		do_loop |= cli_write_free(cli, wr, true);
+	}
+	return do_loop;
+}
+
 static void cli_free(struct client *cli)
 {
 	cli_write_free_all(cli);
@@ -455,6 +482,9 @@ static void cli_free(struct client *cli)
 
 	req_free(&cli->req);
 
+	if (cli->write_cnt_max > tabled_srv.stats.max_write_buf)
+		tabled_srv.stats.max_write_buf = cli->write_cnt_max;
+
 	if (debugging)
 		applog(LOG_INFO, "client %s ended", cli->addr_host);
 
@@ -505,10 +535,6 @@ static void cli_writable(struct client *cli)
 	struct client_write *tmp;
 	ssize_t rc;
 	struct iovec iov[CLI_MAX_WR_IOV];
-	bool more_work;
-
-restart:
-	more_work = false;
 
 	/* accumulate pending writes into iovec */
 	n_iov = 0;
@@ -548,16 +574,13 @@ do_write:
 		rc -= sz;
 
 		/* if tmp->len reaches zero, write is complete,
-		 * call callback and clean up
+		 * so schedule it for clean up (cannot call callback
+		 * right away or an endless recursion will result)
 		 */
 		if (tmp->togo == 0)
-			if (cli_write_free(cli, tmp, true))
-				more_work = true;
+			cli_write_complete(cli, tmp);
 	}
 
-	if (more_work)
-		goto restart;
-
 	/* if we emptied the queue, clear write notification */
 	if (list_empty(&cli->write_q)) {
 		cli->writing = false;
@@ -622,6 +645,8 @@ int cli_writeq(struct client *cli, const void *buf, unsigned int buflen,
 	wr->cb_data = cb_data;
 	list_add_tail(&wr->node, &cli->write_q);
 	cli->write_cnt += buflen;
+	if (cli->write_cnt > cli->write_cnt_max)
+		cli->write_cnt_max = cli->write_cnt;
 
 	return 0;
 }
@@ -1275,6 +1300,7 @@ static struct client *cli_alloc(bool is_status)
 	cli->state = evt_read_req;
 	cli->evt_table = is_status? evt_funcs_status: evt_funcs_server;
 	INIT_LIST_HEAD(&cli->write_q);
+	INIT_LIST_HEAD(&cli->write_compl_q);
 	INIT_LIST_HEAD(&cli->out_ch);
 	cli->req_ptr = cli->req_buf;
 	memset(&cli->req, 0, sizeof(cli->req) - sizeof(cli->req.hdr));
@@ -1287,6 +1313,7 @@ static void tcp_cli_wr_event(int fd, short events, void *userdata)
 	struct client *cli = userdata;
 
 	cli_writable(cli);
+	cli_write_run_compl(cli);
 }
 
 static void tcp_cli_event(int fd, short events, void *userdata)
@@ -1296,6 +1323,7 @@ static void tcp_cli_event(int fd, short events, void *userdata)
 
 	do {
 		loop = cli->evt_table[cli->state](cli, events);
+		loop |= cli_write_run_compl(cli);
 	} while (loop);
 }
 
diff --git a/server/tabled.h b/server/tabled.h
index b4f51ed..3ef4a49 100644
--- a/server/tabled.h
+++ b/server/tabled.h
@@ -164,8 +164,11 @@ struct client {
 	struct event		write_ev;
 
 	struct list_head	write_q;	/* list of async writes */
+	struct list_head	write_compl_q;	/* list of done writes */
 	size_t			write_cnt;	/* water level */
 	bool			writing;
+	/* some debugging stats */
+	size_t			write_cnt_max;
 
 	unsigned int		req_used;	/* amount of req_buf in use */
 	char			*req_ptr;	/* start of unexamined data */
@@ -212,6 +215,8 @@ struct server_stats {
 	unsigned long		event;		/* events dispatched */
 	unsigned long		tcp_accept;	/* TCP accepted cxns */
 	unsigned long		opt_write;	/* optimistic writes */
+
+	unsigned long		max_write_buf;
 };
 
 struct listen_cfg {
@@ -325,6 +330,7 @@ extern int cli_writeq(struct client *cli, const void *buf, unsigned int buflen,
 extern size_t cli_wqueued(struct client *cli);
 extern bool cli_cb_free(struct client *cli, void *cb_data, bool done);
 extern bool cli_write_start(struct client *cli);
+extern bool cli_write_run_compl(struct client *cli);
 extern int cli_req_avail(struct client *cli);
 extern void applog(int prio, const char *fmt, ...);
 extern int stor_update_cb(void);
--
To unsubscribe from this list: send the line "unsubscribe hail-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Fedora Clound]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux