On Wed, Jun 14, 2023 at 12:59:46AM +0200, Mathias Krause wrote: > This patch causes VM boot hangs for us. It took a while to identify as > the boot hangs were only ~1 out of 30 but it's clearly it. Reverting > the commit got me 100 boots in a row without any issue. FWIW I have quite a nice test program for catching these sorts of boot hangs, see attached. You need to change the VMLINUX define to point to your vmlinux or vmlinuz file. Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com virt-builder quickly builds VMs from scratch http://libguestfs.org/virt-builder.1.html
#include <stdio.h> #include <stdlib.h> #include <errno.h> #include <error.h> #include <unistd.h> #include <sys/wait.h> #include <pthread.h> #define NR_ITERATIONS 10000 #define MAX_THREADS 8 #define MAX_TIME 60 /* max time to wait for qemu to complete */ //#define VMLINUX "/home/rjones/d/linux/vmlinux" #define VMLINUX "vmlinux" //#define QEMU "/home/rjones/d/qemu/build/qemu-system-x86_64" #define QEMU "qemu-system-x86_64" #define QEMU_COMMAND \ QEMU " -no-user-config -nodefaults -display none " \ "-machine accel=kvm:tcg,graphics=off -cpu max,la57=off -m 1280 " \ "-no-reboot " \ "-rtc driftfix=slew -no-hpet -global kvm-pit.lost_tick_policy=discard " \ "-kernel " VMLINUX " " \ "-serial stdio " \ "-append \"panic=1 console=ttyS0 edd=off udevtimeout=6000 udev.event-timeout=6000 no_timer_check printk.time=1 cgroup_disable=memory usbcore.nousb cryptomgr.notests tsc=reliable 8250.nr_uarts=1 selinux=0 TERM=xterm-256color\"" static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; static unsigned iterations; static void *start_thread (void *); int main (int argc, char *argv[]) { long n, i; int r; pthread_t thread[MAX_THREADS]; n = sysconf (_SC_NPROCESSORS_ONLN); if (n == -1) error (EXIT_FAILURE, errno, "sysconf"); if (n > MAX_THREADS) n = MAX_THREADS; for (i = 0; i < n; ++i) { r = pthread_create (&thread[i], NULL, start_thread, NULL); if (r != 0) error (EXIT_FAILURE, r, "pthread_create"); } for (i = 0; i < n; ++i) { r = pthread_join (thread[i], NULL); if (r != 0) error (EXIT_FAILURE, r, "pthread_join"); } printf ("\n"); printf ("\n"); printf ("test ok\n"); exit (EXIT_SUCCESS); } static void * start_thread (void *vp) { pid_t pid; char tmp[] = "/tmp/kernel.out.XXXXXX"; char cmd[1024]; int i, r, status; if (mkstemp (tmp) == -1) error (EXIT_FAILURE, errno, "mkstemp: %s", tmp); snprintf (cmd, sizeof cmd, QEMU_COMMAND " >& %s", tmp); /* This basically runs a loop starting qemu. */ for (;;) { pthread_mutex_lock (&lock); if (iterations >= NR_ITERATIONS) { pthread_mutex_unlock (&lock); return NULL; } if (iterations <= MAX_THREADS) { // stagger the start times pthread_mutex_unlock (&lock); usleep (rand () % 3000000); pthread_mutex_lock (&lock); } iterations++; printf ("%d... ", iterations); fflush (stdout); pthread_mutex_unlock (&lock); pid = fork (); if (pid == -1) error (EXIT_FAILURE, errno, "fork"); if (pid == 0) { /* Child process, run qemu and wait. */ if (system (cmd) != 0) _exit (EXIT_FAILURE); else _exit (EXIT_SUCCESS); } /* In the parent wait up to MAX_TIME seconds. */ for (i = 0; i < MAX_TIME; ++i) { r = waitpid (pid, &status, WNOHANG); if (r == -1) error (EXIT_FAILURE, errno, "waitpid"); if (r > 0) break; sleep (1); } if (i == MAX_TIME || status != 0) { /* Something failed in qemu (or it didn't * exit), dump the whole log and exit with * error. */ printf ("\n"); printf ("\n"); snprintf (cmd, sizeof cmd, "tail -20 %s", tmp); system (cmd); fprintf (stderr, "*** ERROR OR HANG ***\n"); exit (EXIT_FAILURE); } } unlink (tmp); }