Re: [PATCH 3/4] qemu: monitor: Produce better errors on monitor hangup

Ján Tomko <jtomko@xxxxxxxxxx> · Tue, 24 Sep 2013 14:19:44 +0200

On 09/19/2013 11:23 AM, Peter Krempa wrote:
> Change the monitor error code to add the ability to access the qemu log
> file using a file descriptor so that we can dig in it for a more useful
> error message. The error is now logged on monitor hangups and overwrites
> a possible lesser error. A hangup on the monitor usualy means that qemu
> has crashed and there's a significant chance it produced a useful error
> message.
> 
> The functionality will be latent until the next patch.
> ---
>  src/qemu/qemu_monitor.c | 70 ++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 64 insertions(+), 6 deletions(-)
> 
> diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
> index c3701fe..5369975 100644
> --- a/src/qemu/qemu_monitor.c
> +++ b/src/qemu/qemu_monitor.c
> @@ -32,6 +32,8 @@
>  #include "qemu_monitor.h"
>  #include "qemu_monitor_text.h"
>  #include "qemu_monitor_json.h"
> +#include "qemu_domain.h"
> +#include "qemu_process.h"
>  #include "virerror.h"
>  #include "viralloc.h"
>  #include "virlog.h"
> @@ -331,6 +333,37 @@ qemuMonitorOpenPty(const char *monitor)
>  }
> 
> 
> +/* Get a possible error from qemu's log. This function closes the
> + * corresponding log fd */
> +static char *
> +qemuMonitorGetErrorFromLog(qemuMonitorPtr mon)
> +{
> +    int len;
> +    char *logbuf = NULL;
> +    int orig_errno = errno;
> +
> +    if (mon->logfd < 0)
> +        return NULL;
> +
> +    if (VIR_ALLOC_N(logbuf, 4096) < 0)
> +        goto error;
> +
> +    if ((len = qemuProcessReadLog(mon->logfd, logbuf, 4096 - 1, 0, true)) <= 0)
> +        goto error;
> +
> +    errno = orig_errno;
> +    VIR_FORCE_CLOSE(mon->logfd);
> +    return logbuf;
> +
> +error:
> +    virResetLastError();

What error is this resetting? qemuProcessReadLog doesn't set any and you can
use VIR_ALLOC_N_QUIET for the other one.

> +    VIR_FREE(logbuf);
> +    VIR_FORCE_CLOSE(mon->logfd);
> +    errno = orig_errno;
> +    return NULL;
> +}
> +
> +
>  /* This method processes data that has been received
>   * from the monitor. Looking for async events and
>   * replies/errors.
> @@ -564,6 +597,7 @@ qemuMonitorIO(int watch, int fd, int events, void *opaque) {
>      qemuMonitorPtr mon = opaque;
>      bool error = false;
>      bool eof = false;
> +    bool hangup = false;
> 
>      virObjectRef(mon);
> 
> @@ -614,12 +648,14 @@ qemuMonitorIO(int watch, int fd, int events, void *opaque) {
>              }
>          }
> 
> -        if (!error &&
> -            events & VIR_EVENT_HANDLE_HANGUP) {
> -            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> -                           _("End of file from monitor"));
> -            eof = true;
> -            events &= ~VIR_EVENT_HANDLE_HANGUP;
> +        if (events & VIR_EVENT_HANDLE_HANGUP) {
> +            hangup = true;
> +            if (!error) {
> +                virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                               _("End of file from monitor"));
> +                eof = true;
> +                events &= ~VIR_EVENT_HANDLE_HANGUP;
> +            }
>          }
> 
>          if (!error && !eof &&
> @@ -638,6 +674,28 @@ qemuMonitorIO(int watch, int fd, int events, void *opaque) {
>      }
> 
>      if (error || eof) {
> +        if (hangup) {
> +            /* Check if an error message from qemu is available and if so, use
> +             * it to overwrite the actual message. It's done only on early

*in

> +             * startup phases where the message from qemu is certailny more

*certainly

> +             * interresting than a "connection reset by peer" message.

*interesting

> +             */
> +            char *qemuMessage;
> +
> +            if ((qemuMessage = qemuMonitorGetErrorFromLog(mon)) &&
> +                strlen(qemuMessage) > 0) {

qemuMonitorGetErrorFromLog returns NULL when nothing was read, strlen should
not be necessary.

> +
> +                virReportError(VIR_ERR_INTERNAL_ERROR,
> +                               _("early end of file from monitor: "
> +                                 "possible problem:\n%s"),
> +                               qemuMessage);
> +                virCopyLastError(&mon->lastError);
> +                virResetLastError();
> +            }
> +
> +            VIR_FREE(qemuMessage);
> +        }
> +
>          if (mon->lastError.code != VIR_ERR_OK) {
>              /* Already have an error, so clear any new error */
>              virResetLastError();
> 

Jan

--
libvir-list mailing list
libvir-list@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/libvir-list