Re: v2.3.15 IMAP connection hangs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Sep 15, 2009 at 02:35:48PM -0400, Rosenbaum, Larry M. wrote:
> > From: Bron Gondwana [mailto:brong@xxxxxxxxxxx]
> > 
> >...
> > That's kind of pointless, sorry.  I really need a trace of the imapd
> > process, since that's what's locking up.
> 
> Here's another trace of an imapd process:

Sorry I haven't replied before - I'm on holiday over the other side of
Australia for a week - so we were flying most of yesterday to get here!

> 23237:  poll(0xFFBFB860, 1, 1800000)                    = 1
> 23237:  time()                                          = 1253039218
> 23237:  time()                                          = 1253039218
> 23237:  poll(0xFFBFC858, 1, 1788000)                    = 1
> 23237:  time()                                          = 1253039218
> 23237:  time()                                          = 1253039218
> 23237:  read(0, " 0   l o g o u t\r\n", 4096)           = 10
> 23237:  time()                                          = 1253039218
> 23237:  poll(0xFFBFC858, 1, 1800000)    (sleeping...)
> 
> (At this point the trace paused, and the Telnet session got no response.
> I aborted the Telnet session and the trace continued as follows:)

This is the same path at the same point as last time.  Clearly something
going on!

The pain is, I can't see it - unless it's happening deep inside lib/prot.c
somewhere.  And the strace doesn't tell us anything - it will read up to
4096 characters straight off, and it's getting 10 of them, which should
be enough!

I'm wondering if something in the COMPRESS changes has broken an assuption.

I don't suppose you're able to gdb the process and get a backtrace from
that poll are you?  Sorry to keep asking for more stuff, I just can't
figure out any more than that from the truss output!
 
> 23237:  poll(0xFFBFC858, 1, 1800000)                    = 1

Yeah, so disconnect caused the poll to finish!

And a signal handler had fired, so it knows to close down...

> 23237:  close(11)                                       = 0
> 23237:  unlink("/usr/local/cyrus/data/proc/23237")      = 0
> 23237:  open("/dev/null", O_RDWR)                       = 11
> 23237:  shutdown(0, 0, 1)                               = 0
> 
> So what was it waiting for?

That's a really good question!  It feels to me that it had gone back into
the loop to get some more data.

Hmm - question: do you have zlib support compiled in?

[brong@utility1 ~]$ ldd /usr/cyrus/bin/imapd | grep libz
	libz.so.1 => /usr/lib/libz.so.1 (0x55990000)

Maybe there's a bug in the codepaths for COMPRESS when zlib doesn't
exist (because they're all #ifdef HAVE_ZLIB - maybe things don't work
nicely without it!)

Long shot - but you might want to try it with the attached patch applied.
It rewrites the zlib handling logic quite a lot, though it wasn't originally
designed to deal with your problem!  I'm going to push it to CVS soon.

Bron.


diff --git a/lib/prot.c b/lib/prot.c
index 6eb285d..1ae690d 100644
--- a/lib/prot.c
+++ b/lib/prot.c
@@ -93,7 +93,6 @@ int write;
     newstream = (struct protstream *) xzmalloc(sizeof(struct protstream));
     newstream->buf = (unsigned char *) 
 	xmalloc(sizeof(char) * (PROT_BUFSIZE));
-    newstream->buf_size = PROT_BUFSIZE;
     newstream->ptr = newstream->buf;
     newstream->maxplain = PROT_BUFSIZE;
     newstream->fd = fd;
@@ -103,6 +102,11 @@ int write;
     if(write)
 	newstream->cnt = PROT_BUFSIZE;
 
+#ifdef HAVE_ZLIB
+    newstream->zbuf = 0;
+    newstream->zbuf_size = 0;
+#endif
+
     return newstream;
 }
 
@@ -515,6 +519,34 @@ int prot_fill(struct protstream *s)
     if (s->eof || s->error) return EOF;
 
     do {
+#ifdef HAVE_ZLIB
+	/* check if there's anything in the zlib buffer already */
+	if (s->zstrm && s->zstrm->avail_in) {
+	    int zr;
+	    if (!s->zbuf) {
+		s->zbuf = (unsigned char *)xmalloc(sizeof(unsigned char) * (PROT_BUFSIZE));
+		s->zbuf_size = PROT_BUFSIZE;
+	    }
+	    s->zstrm->next_out = s->zbuf;
+	    s->zstrm->avail_out = s->zbuf_size;
+	    zr = inflate(s->zstrm, Z_SYNC_FLUSH);
+	    if (zr != Z_OK) {
+		/* Error decompressing */
+		syslog(LOG_ERR, "zlib error: %d %s", zr, s->zstrm->msg);
+		s->error = xstrdup("Error decompressing data");
+		return EOF;
+	    }
+
+	    if (s->zstrm->avail_out < PROT_BUFSIZE) {
+		/* inflated some data */
+		s->ptr = s->zbuf;
+		s->cnt = PROT_BUFSIZE - s->zstrm->avail_out;
+		syslog(LOG_DEBUG, "   => decompressed to %d bytes", s->cnt);
+		break;
+	    }
+	}
+#endif
+
 	/* wait until get input */
 	haveinput = 0;
 
@@ -633,74 +665,41 @@ int prot_fill(struct protstream *s)
 	}
 
 #ifdef HAVE_ZLIB
-	if (s->zstrm && s->cnt > 0) {
-	    /* Decompress the data */
-	    int zr = Z_OK;
+	if (s->zstrm) {
+	    /* transfer the stream to the input of the z_stream and loop */
 
 	    s->zstrm->next_in = s->ptr;
 	    s->zstrm->avail_in = s->cnt;
-	    s->zstrm->next_out = s->zbuf;
-	    s->zstrm->avail_out = s->zbuf_size;
-
-	    syslog(LOG_DEBUG, "inflate(%d bytes)", s->cnt);
+	    s->cnt = 0;
+	}
+#endif /* HAVE_ZLIB */
+    } while (!s->cnt);
 
-	    do {
-		if (!s->zstrm->avail_out) {
-		    /* Need more space to decompress */
-		    syslog(LOG_DEBUG,
-			   "growing decompress buffer from %d to %d bytes",
-			   s->zbuf_size, s->zbuf_size + PROT_BUFSIZE);
-
-		    s->zbuf = (unsigned char *)
-			xrealloc(s->zbuf, s->zbuf_size + PROT_BUFSIZE);
-		    s->zstrm->next_out = s->zbuf + s->zbuf_size;
-		    s->zstrm->avail_out = PROT_BUFSIZE;
-		    s->zbuf_size += PROT_BUFSIZE;
-		}
+    if (s->logfd != -1) {
+	time_t newtime;
+	char timebuf[20];
 
-		zr = inflate(s->zstrm, Z_SYNC_FLUSH);
-	    } while (zr == Z_OK && !s->zstrm->avail_out);
+	time(&newtime);
+	snprintf(timebuf, sizeof(timebuf), "<%ld<", newtime);
+	write(s->logfd, timebuf, strlen(timebuf));
 
-	    if (zr != Z_OK || s->zstrm->avail_in) {
-		/* Error decompressing */
-		s->error = xstrdup("Error decompressing data");
-		return EOF;
+	left = s->cnt;
+	ptr = s->ptr;
+	do {
+	    n = write(s->logfd, ptr, left);
+	    if (n == -1 && (errno != EINTR)) {
+		break;
 	    }
 
-	    s->ptr = s->zbuf;
-	    s->cnt = s->zbuf_size - s->zstrm->avail_out;
-
-	    syslog(LOG_DEBUG, "   => decompressed to %d bytes", s->cnt);
-	}
-#endif /* HAVE_ZLIB */
-	
-	if (s->cnt > 0) {
-	    if (s->logfd != -1) {
-		time_t newtime;
-		char timebuf[20];
-
-		time(&newtime);
-		snprintf(timebuf, sizeof(timebuf), "<%ld<", newtime);
-		write(s->logfd, timebuf, strlen(timebuf));
-
-		left = s->cnt;
-		ptr = s->ptr;
-		do {
-		    n = write(s->logfd, ptr, left);
-		    if (n == -1 && errno != EINTR) {
-			break;
-		    }
-		    if (n > 0) {
-			ptr += n;
-			left -= n;
-		    }
-		} while (left);
+	    if (n > 0) {
+		ptr += n;
+		left -= n;
 	    }
+	} while (left);
+    }
 
-	    s->cnt--;		/* we return the first char */
-	    return *s->ptr++;
-	}
-    } while (1);
+    s->cnt--;		/* we return the first char */
+    return *s->ptr++;
 }
 
 /*
@@ -751,20 +750,12 @@ static int prot_flush_encode(struct protstream *s,
 #ifdef HAVE_ZLIB
     if (s->zstrm) {
 	/* Compress the data */
-	unsigned long def_size = deflateBound(s->zstrm, left);
 	int zflush = s->boundary ? Z_FULL_FLUSH : Z_SYNC_FLUSH;
 	int zr = Z_OK;
 
-	if (def_size > s->zbuf_size) {
-	    /* Make sure buffer is large enough to hold compressed data.
-	     * Oversize the buffer, so we (hopefully) eliminate
-	     * multiple small incremental reallocations.
-	     */
-	    syslog(LOG_DEBUG, "growing compress buffer from %u to %lu bytes",
-		   s->zbuf_size, def_size + PROT_BUFSIZE);
-
-	    s->zbuf_size = def_size + PROT_BUFSIZE;
-	    s->zbuf = (unsigned char *) xrealloc(s->zbuf, s->zbuf_size);
+	if (!s->zbuf) {
+	    s->zbuf = (unsigned char *)xmalloc(sizeof(unsigned char) * (PROT_BUFSIZE));
+	    s->zbuf_size = PROT_BUFSIZE;
 	}
 
 	s->zstrm->next_in = ptr;
@@ -779,14 +770,34 @@ static int prot_flush_encode(struct protstream *s,
 	if (s->boundary) {
 	    /* Set (new) compression level */
 	    zr = deflateParams(s->zstrm, s->zlevel, Z_DEFAULT_STRATEGY);
+	    if (zr != Z_OK) {
+	    	s->error = xstrdup("Error setting compression level");
+	    	return EOF;
+	    }
 	}
-	if (zr == Z_OK) zr = deflate(s->zstrm, zflush);
 
-	if (zr != Z_OK || s->zstrm->avail_in) {
-	    /* Error compressing */
-	    s->error = xstrdup("Error compressing data");
-	    return EOF;
-	}
+	do {
+	    if (!s->zstrm->avail_out) {
+		syslog(LOG_DEBUG, "growing compress buffer from %u to %u bytes",
+		       s->zbuf_size, s->zbuf_size + PROT_BUFSIZE);
+
+		s->zbuf_size += PROT_BUFSIZE;
+		s->zbuf = (unsigned char *) xrealloc(s->zbuf, s->zbuf_size);
+	        s->zstrm->avail_out = PROT_BUFSIZE;
+	    }
+	    zr = deflate(s->zstrm, zflush);
+	    if (!(zr == Z_OK || zr == Z_STREAM_END || zr == Z_BUF_ERROR)) {
+		/* something went wrong */
+		s->error = xstrdup("Error compressing stream");
+		return EOF;
+	    }
+	} while (!s->zstrm->avail_out);
+	/* http://www.zlib.net/manual.html says:
+         * If deflate returns with avail_out == 0, this function must be 
+         * called again with the same value of the flush parameter and
+         * more output space (updated avail_out), until the flush is 
+         * complete (deflate returns with non-zero avail_out). 
+         */
 
 	ptr = s->zbuf;
 	left = s->zbuf_size - s->zstrm->avail_out;
diff --git a/lib/prot.h b/lib/prot.h
index 3b8ad75..9df0976 100644
--- a/lib/prot.h
+++ b/lib/prot.h
@@ -73,7 +73,6 @@ typedef void prot_readcallback_t(struct protstream *s, void *rock);
 struct protstream {
     /* The Buffer */
     unsigned char *buf;
-    unsigned buf_size;
     unsigned char *ptr; /* The end of data in the buffer */
     unsigned cnt; /* Space Remaining in buffer */
 
----
Cyrus Home Page: http://cyrusimap.web.cmu.edu/
Cyrus Wiki/FAQ: http://cyrusimap.web.cmu.edu/twiki
List Archives/Info: http://asg.web.cmu.edu/cyrus/mailing-list.html

[Index of Archives]     [Cyrus SASL]     [Squirrel Mail]     [Asterisk PBX]     [Video For Linux]     [Photo]     [Yosemite News]     [gtk]     [KDE]     [Gimp on Windows]     [Steve's Art]

  Powered by Linux