[tabled patch 2/3] Fix the flood of "read obj(N)"

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Sometimes during operation tabled starts flooding a message "read obj".

I thought it was something about replication thread fetching incomplete
objects, but actually it is caused by attempts to read from a closed
object. It is purely dictated by load levels.

This fix closes the obvious avenue, but leaves any "potential" ones.
I want to see if they ever hit.

Also, fix up messages a bit. In a redundant environment it is necessary
to know NIDs so that a broken Chunk can be isolated, so add that.

Signed-off-by: Pete Zaitcev <zaitcev@xxxxxxxxxx>

---
 server/object.c |   32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

commit 146b9efd8628343b7ecbfe879ce2c1b09e797bba
Author: Pete Zaitcev <zaitcev@xxxxxxxxx>
Date:   Thu Aug 5 20:40:29 2010 -0600

    Fix floods of "read obj(4FF1)".

diff --git a/server/object.c b/server/object.c
index d495d6e..f8e7b12 100644
--- a/server/object.c
+++ b/server/object.c
@@ -356,10 +356,14 @@ static bool object_put_end(struct client *cli)
 			if (debugging) {
 				/* FIXME how do we test for inline objects here? */
 				if (!stor_obj_test(ochunk, cli->out_objid))
-					applog(LOG_ERR, "Stat (%llX) failed",
+					applog(LOG_ERR,
+					       "Stat failed nid %u oid %llX",
+					       ochunk->node->id,
 					       (unsigned long long) cli->out_objid);
 				else
-					applog(LOG_DEBUG, "STORED %llX, size -",
+					applog(LOG_DEBUG,
+					       "STORED nid %u oid %llX, size -",
+					       ochunk->node->id,
 					       (unsigned long long) cli->out_objid);
 			}
 			obj_addr.nidv[nok] = GUINT32_TO_LE(ochunk->node->id);
@@ -705,8 +709,9 @@ static struct open_chunk *open_chunk1(struct storage_node *stnode,
 
 	rc = stor_put_start(ochunk, object_put_event, objid, content_len);
 	if (rc != 0) {
-		applog(LOG_WARNING, "Cannot start putting for %llX (%d)",
-		       (unsigned long long) objid, rc);
+		applog(LOG_WARNING,
+		       "Cannot start putting for nid %u oid %llX (%d)",
+		       ochunk->node->id, (unsigned long long) objid, rc);
 		goto err_start;
 	}
 
@@ -982,6 +987,7 @@ void cli_in_end(struct client *cli)
 		return;
 
 	stor_close(&cli->in_ce);
+	cli->in_len = 0;
 }
 
 static bool object_get_more(struct client *cli, void *cb_data, bool done);
@@ -995,6 +1001,13 @@ static bool object_get_poke(struct client *cli)
 	char *buf;
 	ssize_t bytes;
 
+	/* The checks for in_len in caller should protect us, but let's see. */
+	if (!cli->in_ce.stc) {
+		applog(LOG_ERR, "read on closed chunk, in_len %ld",
+		       (long) cli->in_len);
+		return false;
+	}
+
 	/* XXX flow control - what treshold? */
 	/*  if (cli_wqueued(cli) >= 1000000000) return false; */
 
@@ -1005,8 +1018,9 @@ static bool object_get_poke(struct client *cli)
 	bytes = stor_get_buf(&cli->in_ce, buf,
 			     MIN(cli->in_len, CLI_DATA_BUF_SZ));
 	if (bytes < 0) {
-		applog(LOG_ERR, "read obj(%llX) failed",
-		       (unsigned long long) cli->in_objid);
+		applog(LOG_ERR, "read failed nid %u oid %llX (%d)",
+		       cli->in_ce.node->id, (unsigned long long) cli->in_objid,
+		       (int) bytes);
 		goto err_out;
 	}
 	if (bytes == 0) {
@@ -1048,6 +1062,8 @@ static bool object_get_more(struct client *cli, void *cb_data, bool done)
 	/* do not queue more, if !completion or fd was closed early */
 	if (!done)	/* FIXME We used to test for input errors here. */
 		return false;
+	if (!cli->in_len)
+		return false;
 
 	return object_get_poke(cli);		/* won't hurt to try */
 }
@@ -1323,8 +1339,8 @@ static bool object_get_body(struct client *cli, const char *user,
 
 	bytes = stor_get_buf(&cli->in_ce, buf, MIN(cli->in_len, sizeof(buf)));
 	if (bytes < 0) {
-		applog(LOG_ERR, "read obj(%llX) failed",
-		       (unsigned long long) cli->in_objid);
+		applog(LOG_ERR, "read failed nid %u oid %llX",
+		       cli->in_ce.node->id, (unsigned long long) cli->in_objid);
 		goto err_out_in_end;
 	}
 	if (bytes == 0) {
--
To unsubscribe from this list: send the line "unsubscribe hail-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Fedora Clound]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux