Hi Meelis,
On sun4c, the only solution I ever managed to get close to working was a
prom-based polling loop of the keyboard at the end of panic(). This only
works when the processor is still running.
'soft lockups' (normally caused by recursion issues when I have
encountered them) always ended in power-up reset because they stopped the
processor. I ended up changing the soft lockup code to do a panic before
things got too out of hand (ie always panic). This reduced the number of
times I had to power cycle my aging hardware.
It was still an issue for me last time I did any kernel testing (2.6.23
over a year ago) and if you are having the same issues I had (sparc
processor hardware lockup) a power up reset each time, untill you can find
and prevent the recursion issue, is the only option.
Please find attached my sun4c Panic handling patch. A bit old but may
still be usefull (Not in text as my email client makes a mess of in text
patches).
Regards
Mark
On Thu, 5 Feb 2009, Meelis Roos wrote:
Hello,
I started to debug my latest problem with Quad HME. First there is a
OF mapping problem, then on quad also a IRQ problem, these I did not
touch. Then came prontk recursion problem that caused panic, this I did
not touch yet. The panic told I cpuld return to PROM with Stop-A but I
couldn't, and this was the problem I tried to solve.
First, why tell the user to press Stop-A or break at all? Is it because
we can extract information from Break-T, Break-P etc? Otherwise the
panic code could drop to prom itself?
I tried the following patch and that did not work - probably because of
the same reason that Break did not work (interrupts off?). Seems the
local_irq_enable() is not enough - but what could help here? Is
prom_halt() the right thing? ... Kad a look of waht Stop-A does, tried
also the second patch... but if it would have worked, Stop-A wouldb
prpbaly also have.
This patch is of course not even close to merging, it's just a hack to
try it out.
diff --git a/kernel/panic.c b/kernel/panic.c
index 2a2ff36..ffe6f2a 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -22,6 +22,9 @@
#include <linux/random.h>
#include <linux/kallsyms.h>
#include <linux/dmi.h>
+#ifdef __sparc__
+#include <asm/oplib.h>
+#endif
int panic_on_oops;
static unsigned long tainted_mask;
@@ -120,13 +123,16 @@ NORET_TYPE void panic(const char * fmt, ...)
extern int stop_a_enabled;
/* Make sure the user can actually press Stop-A (L1-A) */
stop_a_enabled = 1;
- printk(KERN_EMERG "Press Stop-A (L1-A) to return to the boot prom\n");
+ printk(KERN_EMERG "Returning to the boot prom\n");
}
#endif
#if defined(CONFIG_S390)
disabled_wait(caller);
#endif
local_irq_enable();
+#ifdef __sparc__
+ prom_halt();
+#endif
for (i = 0;;) {
touch_softlockup_watchdog();
i += panic_blink(i);
And the other one:
diff --git a/kernel/panic.c b/kernel/panic.c
index 2a2ff36..df46a5c 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -22,6 +22,9 @@
#include <linux/random.h>
#include <linux/kallsyms.h>
#include <linux/dmi.h>
+#ifdef __sparc__
+#include <asm/oplib.h>
+#endif
int panic_on_oops;
static unsigned long tainted_mask;
@@ -120,13 +123,19 @@ NORET_TYPE void panic(const char * fmt, ...)
extern int stop_a_enabled;
/* Make sure the user can actually press Stop-A (L1-A) */
stop_a_enabled = 1;
- printk(KERN_EMERG "Press Stop-A (L1-A) to return to the boot prom\n");
+ printk(KERN_EMERG "Returning to the boot prom\n");
}
#endif
#if defined(CONFIG_S390)
disabled_wait(caller);
#endif
local_irq_enable();
+#ifdef __sparc__
+ prom_printf("\n");
+ flush_user_windows();
+
+ prom_cmdline();
+#endif
for (i = 0;;) {
touch_softlockup_watchdog();
i += panic_blink(i);
--
Meelis Roos (mroos@xxxxxxxx)
--
To unsubscribe from this list: send the line "unsubscribe sparclinux" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
diff -ruNpd linux-2.6.20.9/kernel/panic.c linux-test/kernel/panic.c
--- linux-2.6.20.9/kernel/panic.c 2007-04-28 15:02:21.000000000 +0100
+++ linux-test/kernel/panic.c 2007-04-28 04:09:28.000000000 +0100
@@ -106,6 +106,7 @@ NORET_TYPE void panic(const char * fmt,
* Delay timeout seconds before rebooting the machine.
* We can't use the "normal" timers since we just panicked..
*/
+ printk("\n");
printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout);
for (i = 0; i < panic_timeout*1000; ) {
touch_nmi_watchdog();
@@ -124,6 +125,7 @@ NORET_TYPE void panic(const char * fmt,
extern int stop_a_enabled;
/* Make sure the user can actually press Stop-A (L1-A) */
stop_a_enabled = 1;
+ printk ("\n");
printk(KERN_EMERG "Press Stop-A (L1-A) to return to the boot prom\n");
}
#endif
@@ -136,6 +138,33 @@ NORET_TYPE void panic(const char * fmt,
i += panic_blink(i);
mdelay(1);
i++;
+#ifdef __sparc__
+ {
+ /*
+ * L1-A processing only works if KBD stuff OK.
+ * So to debug before KBD up you require this.
+ * Maybe make this part of the early console stuf.
+ */
+ static int stop_l1;
+ int ch;
+ extern int prom_nbgetchar(void);
+
+ if ((ch = prom_nbgetchar()) != -1)
+ {
+ if ((ch == 0x7F) ||
+ (ch == 0xFE) || (ch == 0xFF))
+ stop_l1 = 0;
+ if ((ch & 0x7F) == 0x01)
+ stop_l1 = 1;
+ if (stop_l1 && ((ch & 0x7F) == 77))
+ {
+ extern void sun_do_break(void);
+
+ sun_do_break();
+ }
+ }
+ }
+#endif /* __sparc__ */
}
}
diff -ru -x '.git*' -x .mailmap linux-2.6/drivers/serial/sunzilog.c linux-test/drivers/serial/sunzilog.c
--- linux-2.6/drivers/serial/sunzilog.c 2007-06-15 22:08:45.000000000 +0100
+++ linux-test/drivers/serial/sunzilog.c 2007-06-03 00:35:03.000000000 +0100
@@ -429,6 +429,7 @@
/* Wait for BREAK to deassert to avoid potentially
* confusing the PROM.
*/
+ sun_do_break(); /* On Sun4c, the while loop hangs */
while (1) {
status = readb(&channel->control);
ZSDELAY();
diff -ru -x '.git*' -x .mailmap linux-2.6/kernel/softlockup.c linux-test/kernel/softlockup.c
--- linux-2.6/kernel/softlockup.c 2007-06-15 22:08:46.000000000 +0100
+++ linux-test/kernel/softlockup.c 2007-06-03 00:31:00.000000000 +0100
@@ -105,6 +105,7 @@
this_cpu);
dump_stack();
spin_unlock(&print_lock);
+ panic ("Sun4c can't cope with soft lockups\n");
}
}