On Tue, Sep 15, 2009 at 02:35:48PM -0400, Rosenbaum, Larry M. wrote: > > From: Bron Gondwana [mailto:brong@xxxxxxxxxxx] > > > >... > > That's kind of pointless, sorry. I really need a trace of the imapd > > process, since that's what's locking up. > > Here's another trace of an imapd process: Sorry I haven't replied before - I'm on holiday over the other side of Australia for a week - so we were flying most of yesterday to get here! > 23237: poll(0xFFBFB860, 1, 1800000) = 1 > 23237: time() = 1253039218 > 23237: time() = 1253039218 > 23237: poll(0xFFBFC858, 1, 1788000) = 1 > 23237: time() = 1253039218 > 23237: time() = 1253039218 > 23237: read(0, " 0 l o g o u t\r\n", 4096) = 10 > 23237: time() = 1253039218 > 23237: poll(0xFFBFC858, 1, 1800000) (sleeping...) > > (At this point the trace paused, and the Telnet session got no response. > I aborted the Telnet session and the trace continued as follows:) This is the same path at the same point as last time. Clearly something going on! The pain is, I can't see it - unless it's happening deep inside lib/prot.c somewhere. And the strace doesn't tell us anything - it will read up to 4096 characters straight off, and it's getting 10 of them, which should be enough! I'm wondering if something in the COMPRESS changes has broken an assuption. I don't suppose you're able to gdb the process and get a backtrace from that poll are you? Sorry to keep asking for more stuff, I just can't figure out any more than that from the truss output! > 23237: poll(0xFFBFC858, 1, 1800000) = 1 Yeah, so disconnect caused the poll to finish! And a signal handler had fired, so it knows to close down... > 23237: close(11) = 0 > 23237: unlink("/usr/local/cyrus/data/proc/23237") = 0 > 23237: open("/dev/null", O_RDWR) = 11 > 23237: shutdown(0, 0, 1) = 0 > > So what was it waiting for? That's a really good question! It feels to me that it had gone back into the loop to get some more data. Hmm - question: do you have zlib support compiled in? [brong@utility1 ~]$ ldd /usr/cyrus/bin/imapd | grep libz libz.so.1 => /usr/lib/libz.so.1 (0x55990000) Maybe there's a bug in the codepaths for COMPRESS when zlib doesn't exist (because they're all #ifdef HAVE_ZLIB - maybe things don't work nicely without it!) Long shot - but you might want to try it with the attached patch applied. It rewrites the zlib handling logic quite a lot, though it wasn't originally designed to deal with your problem! I'm going to push it to CVS soon. Bron.
diff --git a/lib/prot.c b/lib/prot.c index 6eb285d..1ae690d 100644 --- a/lib/prot.c +++ b/lib/prot.c @@ -93,7 +93,6 @@ int write; newstream = (struct protstream *) xzmalloc(sizeof(struct protstream)); newstream->buf = (unsigned char *) xmalloc(sizeof(char) * (PROT_BUFSIZE)); - newstream->buf_size = PROT_BUFSIZE; newstream->ptr = newstream->buf; newstream->maxplain = PROT_BUFSIZE; newstream->fd = fd; @@ -103,6 +102,11 @@ int write; if(write) newstream->cnt = PROT_BUFSIZE; +#ifdef HAVE_ZLIB + newstream->zbuf = 0; + newstream->zbuf_size = 0; +#endif + return newstream; } @@ -515,6 +519,34 @@ int prot_fill(struct protstream *s) if (s->eof || s->error) return EOF; do { +#ifdef HAVE_ZLIB + /* check if there's anything in the zlib buffer already */ + if (s->zstrm && s->zstrm->avail_in) { + int zr; + if (!s->zbuf) { + s->zbuf = (unsigned char *)xmalloc(sizeof(unsigned char) * (PROT_BUFSIZE)); + s->zbuf_size = PROT_BUFSIZE; + } + s->zstrm->next_out = s->zbuf; + s->zstrm->avail_out = s->zbuf_size; + zr = inflate(s->zstrm, Z_SYNC_FLUSH); + if (zr != Z_OK) { + /* Error decompressing */ + syslog(LOG_ERR, "zlib error: %d %s", zr, s->zstrm->msg); + s->error = xstrdup("Error decompressing data"); + return EOF; + } + + if (s->zstrm->avail_out < PROT_BUFSIZE) { + /* inflated some data */ + s->ptr = s->zbuf; + s->cnt = PROT_BUFSIZE - s->zstrm->avail_out; + syslog(LOG_DEBUG, " => decompressed to %d bytes", s->cnt); + break; + } + } +#endif + /* wait until get input */ haveinput = 0; @@ -633,74 +665,41 @@ int prot_fill(struct protstream *s) } #ifdef HAVE_ZLIB - if (s->zstrm && s->cnt > 0) { - /* Decompress the data */ - int zr = Z_OK; + if (s->zstrm) { + /* transfer the stream to the input of the z_stream and loop */ s->zstrm->next_in = s->ptr; s->zstrm->avail_in = s->cnt; - s->zstrm->next_out = s->zbuf; - s->zstrm->avail_out = s->zbuf_size; - - syslog(LOG_DEBUG, "inflate(%d bytes)", s->cnt); + s->cnt = 0; + } +#endif /* HAVE_ZLIB */ + } while (!s->cnt); - do { - if (!s->zstrm->avail_out) { - /* Need more space to decompress */ - syslog(LOG_DEBUG, - "growing decompress buffer from %d to %d bytes", - s->zbuf_size, s->zbuf_size + PROT_BUFSIZE); - - s->zbuf = (unsigned char *) - xrealloc(s->zbuf, s->zbuf_size + PROT_BUFSIZE); - s->zstrm->next_out = s->zbuf + s->zbuf_size; - s->zstrm->avail_out = PROT_BUFSIZE; - s->zbuf_size += PROT_BUFSIZE; - } + if (s->logfd != -1) { + time_t newtime; + char timebuf[20]; - zr = inflate(s->zstrm, Z_SYNC_FLUSH); - } while (zr == Z_OK && !s->zstrm->avail_out); + time(&newtime); + snprintf(timebuf, sizeof(timebuf), "<%ld<", newtime); + write(s->logfd, timebuf, strlen(timebuf)); - if (zr != Z_OK || s->zstrm->avail_in) { - /* Error decompressing */ - s->error = xstrdup("Error decompressing data"); - return EOF; + left = s->cnt; + ptr = s->ptr; + do { + n = write(s->logfd, ptr, left); + if (n == -1 && (errno != EINTR)) { + break; } - s->ptr = s->zbuf; - s->cnt = s->zbuf_size - s->zstrm->avail_out; - - syslog(LOG_DEBUG, " => decompressed to %d bytes", s->cnt); - } -#endif /* HAVE_ZLIB */ - - if (s->cnt > 0) { - if (s->logfd != -1) { - time_t newtime; - char timebuf[20]; - - time(&newtime); - snprintf(timebuf, sizeof(timebuf), "<%ld<", newtime); - write(s->logfd, timebuf, strlen(timebuf)); - - left = s->cnt; - ptr = s->ptr; - do { - n = write(s->logfd, ptr, left); - if (n == -1 && errno != EINTR) { - break; - } - if (n > 0) { - ptr += n; - left -= n; - } - } while (left); + if (n > 0) { + ptr += n; + left -= n; } + } while (left); + } - s->cnt--; /* we return the first char */ - return *s->ptr++; - } - } while (1); + s->cnt--; /* we return the first char */ + return *s->ptr++; } /* @@ -751,20 +750,12 @@ static int prot_flush_encode(struct protstream *s, #ifdef HAVE_ZLIB if (s->zstrm) { /* Compress the data */ - unsigned long def_size = deflateBound(s->zstrm, left); int zflush = s->boundary ? Z_FULL_FLUSH : Z_SYNC_FLUSH; int zr = Z_OK; - if (def_size > s->zbuf_size) { - /* Make sure buffer is large enough to hold compressed data. - * Oversize the buffer, so we (hopefully) eliminate - * multiple small incremental reallocations. - */ - syslog(LOG_DEBUG, "growing compress buffer from %u to %lu bytes", - s->zbuf_size, def_size + PROT_BUFSIZE); - - s->zbuf_size = def_size + PROT_BUFSIZE; - s->zbuf = (unsigned char *) xrealloc(s->zbuf, s->zbuf_size); + if (!s->zbuf) { + s->zbuf = (unsigned char *)xmalloc(sizeof(unsigned char) * (PROT_BUFSIZE)); + s->zbuf_size = PROT_BUFSIZE; } s->zstrm->next_in = ptr; @@ -779,14 +770,34 @@ static int prot_flush_encode(struct protstream *s, if (s->boundary) { /* Set (new) compression level */ zr = deflateParams(s->zstrm, s->zlevel, Z_DEFAULT_STRATEGY); + if (zr != Z_OK) { + s->error = xstrdup("Error setting compression level"); + return EOF; + } } - if (zr == Z_OK) zr = deflate(s->zstrm, zflush); - if (zr != Z_OK || s->zstrm->avail_in) { - /* Error compressing */ - s->error = xstrdup("Error compressing data"); - return EOF; - } + do { + if (!s->zstrm->avail_out) { + syslog(LOG_DEBUG, "growing compress buffer from %u to %u bytes", + s->zbuf_size, s->zbuf_size + PROT_BUFSIZE); + + s->zbuf_size += PROT_BUFSIZE; + s->zbuf = (unsigned char *) xrealloc(s->zbuf, s->zbuf_size); + s->zstrm->avail_out = PROT_BUFSIZE; + } + zr = deflate(s->zstrm, zflush); + if (!(zr == Z_OK || zr == Z_STREAM_END || zr == Z_BUF_ERROR)) { + /* something went wrong */ + s->error = xstrdup("Error compressing stream"); + return EOF; + } + } while (!s->zstrm->avail_out); + /* http://www.zlib.net/manual.html says: + * If deflate returns with avail_out == 0, this function must be + * called again with the same value of the flush parameter and + * more output space (updated avail_out), until the flush is + * complete (deflate returns with non-zero avail_out). + */ ptr = s->zbuf; left = s->zbuf_size - s->zstrm->avail_out; diff --git a/lib/prot.h b/lib/prot.h index 3b8ad75..9df0976 100644 --- a/lib/prot.h +++ b/lib/prot.h @@ -73,7 +73,6 @@ typedef void prot_readcallback_t(struct protstream *s, void *rock); struct protstream { /* The Buffer */ unsigned char *buf; - unsigned buf_size; unsigned char *ptr; /* The end of data in the buffer */ unsigned cnt; /* Space Remaining in buffer */
---- Cyrus Home Page: http://cyrusimap.web.cmu.edu/ Cyrus Wiki/FAQ: http://cyrusimap.web.cmu.edu/twiki List Archives/Info: http://asg.web.cmu.edu/cyrus/mailing-list.html