From: David Daney <ddaney@xxxxxxxxxxxxxxxxxx> Signed-off-by: Tomaso Paoletti <tpaoletti@xxxxxxxxxxxxxxxxxx> Signed-off-by: Paul Gortmaker <Paul.Gortmaker@xxxxxxxxxxxxx> Signed-off-by: David Daney <ddaney@xxxxxxxxxxxxxxxxxx> --- arch/mips/kernel/irq-octeon.c | 464 ++++++++++++++++++++++++++++++++++ arch/mips/kernel/octeon_switch.S | 506 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 970 insertions(+), 0 deletions(-) create mode 100644 arch/mips/kernel/irq-octeon.c create mode 100644 arch/mips/kernel/octeon_switch.S diff --git a/arch/mips/kernel/irq-octeon.c b/arch/mips/kernel/irq-octeon.c new file mode 100644 index 0000000..738f3c6 --- /dev/null +++ b/arch/mips/kernel/irq-octeon.c @@ -0,0 +1,464 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2004-2007 Cavium Networks + */ +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/pci.h> + +#include <asm/irq_cpu.h> +#include <asm/mipsregs.h> +#include <asm/system.h> + +#include "../cavium-octeon/hal.h" + +DEFINE_RWLOCK(octeon_irq_ciu0_rwlock); +DEFINE_RWLOCK(octeon_irq_ciu1_rwlock); +DEFINE_SPINLOCK(octeon_irq_msi_lock); + +static void octeon_irq_core_ack(unsigned int irq) +{ + /* We don't need to disable IRQs to make these atomic since they are + already disabled earlier in the low level interrupt code */ + clear_c0_status(0x100 << irq); + /* The two user interrupts must be cleared manually */ + if (irq < 2) + clear_c0_cause(0x100 << irq); +} + +static void octeon_irq_core_eoi(unsigned int irq) +{ + irq_desc_t *desc = irq_desc + irq; + /* If an IRQ is being processed while we are disabling it the handler + will attempt to unmask the interrupt after it has been disabled */ + if (desc->status & IRQ_DISABLED) + return; + /* We don't need to disable IRQs to make these atomic since they are + already disabled earlier in the low level interrupt code */ + set_c0_status(0x100 << irq); +} + +static void octeon_irq_core_enable(unsigned int irq) +{ + /* We need to disable interrupts to make sure our updates are atomic */ + unsigned long flags; + local_irq_save(flags); + set_c0_status(0x100 << irq); + local_irq_restore(flags); +} + +static void octeon_irq_core_disable_local(unsigned int irq) +{ + /* We need to disable interrupts to make sure our updates are atomic */ + unsigned long flags; + local_irq_save(flags); + clear_c0_status(0x100 << irq); + local_irq_restore(flags); +} + +static void octeon_irq_core_disable(unsigned int irq) +{ +#ifdef CONFIG_SMP + on_each_cpu((void (*)(void *)) octeon_irq_core_disable_local, + (void *) (long) irq, 1); +#else + octeon_irq_core_disable_local(irq); +#endif +} + +struct irq_chip octeon_irq_chip_core = { + .name = "Core", + .enable = octeon_irq_core_enable, + .disable = octeon_irq_core_disable, + .ack = octeon_irq_core_ack, + .eoi = octeon_irq_core_eoi, +}; + + +static void octeon_irq_ciu0_ack(unsigned int irq) +{ + /* In order to avoid any locking accessing the CIU, we acknowledge CIU + interrupts by disabling all of them. This way we can use a per core + register and avoid any out of core locking requirements. This has + the side affect that CIU interrupts can't be processed recursively */ + /* We don't need to disable IRQs to make these atomic since they are + already disabled earlier in the low level interrupt code */ + clear_c0_status(0x100 << 2); +} + +static void octeon_irq_ciu0_eoi(unsigned int irq) +{ + /* Enable all CIU interrupts again */ + /* We don't need to disable IRQs to make these atomic since they are + already disabled earlier in the low level interrupt code */ + set_c0_status(0x100 << 2); +} + +static void octeon_irq_ciu0_enable(unsigned int irq) +{ + int coreid = cvmx_get_core_num(); + unsigned long flags; + uint64_t en0; + int bit = irq - OCTEON_IRQ_WORKQ0; /* Bit 0-63 of EN0 */ + + /* A read lock is used here to make sure only one core is ever updating + the CIU enable bits at a time. During an enable the cores don't + interfere with each other. During a disable the write lock stops any + enables that might cause a problem */ + read_lock_irqsave(&octeon_irq_ciu0_rwlock, flags); + en0 = cvmx_read_csr(CVMX_CIU_INTX_EN0(coreid * 2)); + en0 |= 1ull << bit; + cvmx_write_csr(CVMX_CIU_INTX_EN0(coreid * 2), en0); + cvmx_read_csr(CVMX_CIU_INTX_EN0(coreid * 2)); + read_unlock_irqrestore(&octeon_irq_ciu0_rwlock, flags); +} + +static void octeon_irq_ciu0_disable(unsigned int irq) +{ + int bit = irq - OCTEON_IRQ_WORKQ0; /* Bit 0-63 of EN0 */ + unsigned long flags; + uint64_t en0; +#ifdef CONFIG_SMP + int cpu; + write_lock_irqsave(&octeon_irq_ciu0_rwlock, flags); + for (cpu = 0; cpu < NR_CPUS; cpu++) { + if (cpu_present(cpu)) { + int coreid = cpu_logical_map(cpu); + en0 = cvmx_read_csr(CVMX_CIU_INTX_EN0(coreid * 2)); + en0 &= ~(1ull << bit); + cvmx_write_csr(CVMX_CIU_INTX_EN0(coreid * 2), en0); + } + } + /* We need to do a read after the last update to make sure all of them + are done */ + cvmx_read_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num() * 2)); + write_unlock_irqrestore(&octeon_irq_ciu0_rwlock, flags); +#else + int coreid = cvmx_get_core_num(); + local_irq_save(flags); + en0 = cvmx_read_csr(CVMX_CIU_INTX_EN0(coreid * 2)); + en0 &= ~(1ull << bit); + cvmx_write_csr(CVMX_CIU_INTX_EN0(coreid * 2), en0); + cvmx_read_csr(CVMX_CIU_INTX_EN0(coreid * 2)); + local_irq_restore(flags); +#endif +} + +#ifdef CONFIG_SMP +static void octeon_irq_ciu0_set_affinity(unsigned int irq, cpumask_t dest) +{ + int cpu; + int bit = irq - OCTEON_IRQ_WORKQ0; /* Bit 0-63 of EN0 */ + + write_lock(&octeon_irq_ciu0_rwlock); + for (cpu = 0; cpu < NR_CPUS; cpu++) { + if (cpu_present(cpu)) { + int coreid = cpu_logical_map(cpu); + uint64_t en0 = + cvmx_read_csr(CVMX_CIU_INTX_EN0(coreid * 2)); + if (cpu_isset(cpu, dest)) + en0 |= 1ull << bit; + else + en0 &= ~(1ull << bit); + cvmx_write_csr(CVMX_CIU_INTX_EN0(coreid * 2), en0); + } + } + /* We need to do a read after the last update to make sure all of them + are done */ + cvmx_read_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num() * 2)); + write_unlock(&octeon_irq_ciu0_rwlock); +} +#endif + +struct irq_chip octeon_irq_chip_ciu0 = { + .name = "CIU0", + .enable = octeon_irq_ciu0_enable, + .disable = octeon_irq_ciu0_disable, + .ack = octeon_irq_ciu0_ack, + .eoi = octeon_irq_ciu0_eoi, +#ifdef CONFIG_SMP + .set_affinity = octeon_irq_ciu0_set_affinity, +#endif +}; + + +static void octeon_irq_ciu1_ack(unsigned int irq) +{ + /* In order to avoid any locking accessing the CIU, we acknowledge CIU + interrupts by disabling all of them. This way we can use a per core + register and avoid any out of core locking requirements. This has + the side affect that CIU interrupts can't be processed recursively */ + /* We don't need to disable IRQs to make these atomic since they are + already disabled earlier in the low level interrupt code */ + clear_c0_status(0x100 << 3); +} + +static void octeon_irq_ciu1_eoi(unsigned int irq) +{ + /* Enable all CIU interrupts again */ + /* We don't need to disable IRQs to make these atomic since they are + already disabled earlier in the low level interrupt code */ + set_c0_status(0x100 << 3); +} + +static void octeon_irq_ciu1_enable(unsigned int irq) +{ + int coreid = cvmx_get_core_num(); + unsigned long flags; + uint64_t en1; + int bit = irq - OCTEON_IRQ_WDOG0; /* Bit 0-63 of EN1 */ + + /* A read lock is used here to make sure only one core is ever updating + the CIU enable bits at a time. During an enable the cores don't + interfere with each other. During a disable the write lock stops any + enables that might cause a problem */ + read_lock_irqsave(&octeon_irq_ciu1_rwlock, flags); + en1 = cvmx_read_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1)); + en1 |= 1ull << bit; + cvmx_write_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1), en1); + cvmx_read_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1)); + read_unlock_irqrestore(&octeon_irq_ciu1_rwlock, flags); +} + +static void octeon_irq_ciu1_disable(unsigned int irq) +{ + int bit = irq - OCTEON_IRQ_WDOG0; /* Bit 0-63 of EN1 */ + unsigned long flags; + uint64_t en1; +#ifdef CONFIG_SMP + int cpu; + write_lock_irqsave(&octeon_irq_ciu1_rwlock, flags); + for (cpu = 0; cpu < NR_CPUS; cpu++) { + if (cpu_present(cpu)) { + int coreid = cpu_logical_map(cpu); + en1 = cvmx_read_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1)); + en1 &= ~(1ull << bit); + cvmx_write_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1), en1); + } + } + /* We need to do a read after the last update to make sure all of them + are done */ + cvmx_read_csr(CVMX_CIU_INTX_EN1(cvmx_get_core_num() * 2 + 1)); + write_unlock_irqrestore(&octeon_irq_ciu1_rwlock, flags); +#else + int coreid = cvmx_get_core_num(); + local_irq_save(flags); + en1 = cvmx_read_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1)); + en1 &= ~(1ull << bit); + cvmx_write_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1), en1); + cvmx_read_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1)); + local_irq_restore(flags); +#endif +} + +#ifdef CONFIG_SMP +static void octeon_irq_ciu1_set_affinity(unsigned int irq, cpumask_t dest) +{ + int cpu; + int bit = irq - OCTEON_IRQ_WDOG0; /* Bit 0-63 of EN1 */ + + write_lock(&octeon_irq_ciu1_rwlock); + for (cpu = 0; cpu < NR_CPUS; cpu++) { + if (cpu_present(cpu)) { + int coreid = cpu_logical_map(cpu); + uint64_t en1 = + cvmx_read_csr(CVMX_CIU_INTX_EN1 + (coreid * 2 + 1)); + if (cpu_isset(cpu, dest)) + en1 |= 1ull << bit; + else + en1 &= ~(1ull << bit); + cvmx_write_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1), en1); + } + } + /* We need to do a read after the last update to make sure all of them + are done */ + cvmx_read_csr(CVMX_CIU_INTX_EN1(cvmx_get_core_num() * 2 + 1)); + write_unlock(&octeon_irq_ciu1_rwlock); +} +#endif + +struct irq_chip octeon_irq_chip_ciu1 = { + .name = "CIU1", + .enable = octeon_irq_ciu1_enable, + .disable = octeon_irq_ciu1_disable, + .ack = octeon_irq_ciu1_ack, + .eoi = octeon_irq_ciu1_eoi, +#ifdef CONFIG_SMP + .set_affinity = octeon_irq_ciu1_set_affinity, +#endif +}; + + +static void octeon_irq_i8289_master_unmask(unsigned int irq) +{ + unsigned long flags; + local_irq_save(flags); + outb(inb(0x21) & ~(1 << (irq - OCTEON_IRQ_I8259M0)), 0x21); + local_irq_restore(flags); +} + +static void octeon_irq_i8289_master_mask(unsigned int irq) +{ + unsigned long flags; + local_irq_save(flags); + outb(inb(0x21) | (1 << (irq - OCTEON_IRQ_I8259M0)), 0x21); + local_irq_restore(flags); +} + +struct irq_chip octeon_irq_chip_i8259_master = { + .name = "i8259M", + .mask = octeon_irq_i8289_master_mask, + .mask_ack = octeon_irq_i8289_master_mask, + .unmask = octeon_irq_i8289_master_unmask, + .eoi = octeon_irq_i8289_master_unmask, +}; + + +static void octeon_irq_i8289_slave_unmask(unsigned int irq) +{ + outb(inb(0xa1) & ~(1 << (irq - OCTEON_IRQ_I8259S0)), 0xa1); +} + +static void octeon_irq_i8289_slave_mask(unsigned int irq) +{ + outb(inb(0xa1) | (1 << (irq - OCTEON_IRQ_I8259S0)), 0xa1); +} + +struct irq_chip octeon_irq_chip_i8259_slave = { + .name = "i8259S", + .mask = octeon_irq_i8289_slave_mask, + .mask_ack = octeon_irq_i8289_slave_mask, + .unmask = octeon_irq_i8289_slave_unmask, + .eoi = octeon_irq_i8289_slave_unmask, +}; + +#ifdef CONFIG_PCI_MSI + +static void octeon_irq_msi_ack(unsigned int irq) +{ + if (!octeon_has_feature(OCTEON_FEATURE_PCIE)) { + /* These chips have PCI */ + cvmx_write_csr(CVMX_NPI_NPI_MSI_RCV, + 1ull << (irq - OCTEON_IRQ_MSI_BIT0)); + } else { + /* These chips have PCIe. Thankfully the ACK doesn't need any + locking */ + cvmx_write_csr(CVMX_PEXP_NPEI_MSI_RCV0, + 1ull << (irq - OCTEON_IRQ_MSI_BIT0)); + } +} + +static void octeon_irq_msi_eoi(unsigned int irq) +{ + /* Nothing needed */ +} + +static void octeon_irq_msi_enable(unsigned int irq) +{ + if (!octeon_has_feature(OCTEON_FEATURE_PCIE)) { + /* Octeon PCI doesn't have the ability to mask/unmask MSI + interrupts individually. Instead of masking/unmasking them + in groups of 16, we simple assume MSI devices are well + behaved. MSI interrupts are always enable and the ACK is + assumed to be enough */ + } else { + /* These chips have PCIe. Note that we only support the first + 64 MSI interrupts. Unfortunately all the MSI enables are in + the same register. We use MSI0's lock to control access to + them all. */ + uint64_t en; + unsigned long flags; + spin_lock_irqsave(&octeon_irq_msi_lock, flags); + en = cvmx_read_csr(CVMX_PEXP_NPEI_MSI_ENB0); + en |= 1ull << (irq - OCTEON_IRQ_MSI_BIT0); + cvmx_write_csr(CVMX_PEXP_NPEI_MSI_ENB0, en); + cvmx_read_csr(CVMX_PEXP_NPEI_MSI_ENB0); + spin_unlock_irqrestore(&octeon_irq_msi_lock, flags); + } +} + +static void octeon_irq_msi_disable(unsigned int irq) +{ + if (!octeon_has_feature(OCTEON_FEATURE_PCIE)) { + /* See comment in enable */ + } else { + /* These chips have PCIe. Note that we only support the first + 64 MSI interrupts. Unfortunately all the MSI enables are in + the same register. We use MSI0's lock to control access to + them all. */ + uint64_t en; + unsigned long flags; + spin_lock_irqsave(&octeon_irq_msi_lock, flags); + en = cvmx_read_csr(CVMX_PEXP_NPEI_MSI_ENB0); + en &= ~(1ull << (irq - OCTEON_IRQ_MSI_BIT0)); + cvmx_write_csr(CVMX_PEXP_NPEI_MSI_ENB0, en); + cvmx_read_csr(CVMX_PEXP_NPEI_MSI_ENB0); + spin_unlock_irqrestore(&octeon_irq_msi_lock, flags); + } +} + +struct irq_chip octeon_irq_chip_msi = { + .name = "MSI", + .enable = octeon_irq_msi_enable, + .disable = octeon_irq_msi_disable, + .ack = octeon_irq_msi_ack, + .eoi = octeon_irq_msi_eoi, +}; +#endif + +void __init arch_init_irq(void) +{ + int irq; + + if (NR_IRQS < OCTEON_IRQ_LAST) + pr_err("octeon_irq_init: NR_IRQS is set too low\n"); + + /* 0-7 Mips internal */ + for (irq = OCTEON_IRQ_SW0; irq <= OCTEON_IRQ_TIMER; irq++) { + set_irq_chip_and_handler(irq, &octeon_irq_chip_core, + handle_percpu_irq); + } + + /* 8-71 CIU_INT_SUM0 */ + for (irq = OCTEON_IRQ_WORKQ0; irq <= OCTEON_IRQ_BOOTDMA; irq++) { + set_irq_chip_and_handler(irq, &octeon_irq_chip_ciu0, + handle_percpu_irq); + } + + /* 72-135 CIU_INT_SUM1 */ + for (irq = OCTEON_IRQ_WDOG0; irq <= OCTEON_IRQ_RESERVED135; irq++) { + set_irq_chip_and_handler(irq, &octeon_irq_chip_ciu1, + handle_percpu_irq); + } + + /* 136 - 143 are reserved to align the i8259 in a multiple of 16. This + alignment is necessary since old style ISA interrupts hanging off + the i8259 have internal alignment assumptions */ + + /* 144-151 i8259 master controller */ + for (irq = OCTEON_IRQ_I8259M0; irq <= OCTEON_IRQ_I8259M7; irq++) { + set_irq_chip_and_handler(irq, &octeon_irq_chip_i8259_master, + handle_level_irq); + } + + /* 152-159 i8259 slave controller */ + for (irq = OCTEON_IRQ_I8259S0; irq <= OCTEON_IRQ_I8259S7; irq++) { + set_irq_chip_and_handler(irq, &octeon_irq_chip_i8259_slave, + handle_level_irq); + } + +#ifdef CONFIG_PCI_MSI + /* 160-223 PCI/PCIe MSI interrupts */ + for (irq = OCTEON_IRQ_MSI_BIT0; irq <= OCTEON_IRQ_MSI_BIT63; irq++) { + set_irq_chip_and_handler(irq, &octeon_irq_chip_msi, + handle_percpu_irq); + } +#endif + + set_c0_status(0x300 << 2); +} diff --git a/arch/mips/kernel/octeon_switch.S b/arch/mips/kernel/octeon_switch.S new file mode 100644 index 0000000..d523896 --- /dev/null +++ b/arch/mips/kernel/octeon_switch.S @@ -0,0 +1,506 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1994, 1995, 1996, 1998, 1999, 2002, 2003 Ralf Baechle + * Copyright (C) 1996 David S. Miller (dm@xxxxxxxxxxxx) + * Copyright (C) 1994, 1995, 1996, by Andreas Busse + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) 2000 MIPS Technologies, Inc. + * written by Carsten Langgaard, carstenl@xxxxxxxx + */ +#include <asm/asm.h> +#include <asm/cachectl.h> +#include <asm/fpregdef.h> +#include <asm/mipsregs.h> +#include <asm/asm-offsets.h> +#include <asm/page.h> +#include <asm/pgtable-bits.h> +#include <asm/regdef.h> +#include <asm/stackframe.h> +#include <asm/thread_info.h> + +#include <asm/asmmacro.h> + +/* + * Offset to the current process status flags, the first 32 bytes of the + * stack are not used. + */ +#define ST_OFF (_THREAD_SIZE - 32 - PT_SIZE + PT_STATUS) + +/* + * task_struct *resume(task_struct *prev, task_struct *next, + * struct thread_info *next_ti) + */ + .align 7 + LEAF(resume) + .set arch=octeon +#ifndef CONFIG_CPU_HAS_LLSC + sw zero, ll_bit +#endif + mfc0 t1, CP0_STATUS + LONG_S t1, THREAD_STATUS(a0) + cpu_save_nonscratch a0 + LONG_S ra, THREAD_REG31(a0) + + /* check if we need to save COP2 registers */ + PTR_L t2, TASK_THREAD_INFO(a0) + LONG_L t0, ST_OFF(t2) + bbit0 t0, 30, 1f + + /* Disable COP2 in the stored process state */ + li t1, ST0_CU2 + xor t0, t1 + LONG_S t0, ST_OFF(t2) + + /* Enable COP2 so we can save it */ + mfc0 t0, CP0_STATUS + or t0, t1 + mtc0 t0, CP0_STATUS + + /* Save COP2 */ + daddu a0, THREAD_CP2 + jal octeon_cop2_save + dsubu a0, THREAD_CP2 + + /* Disable COP2 now that we are done */ + mfc0 t0, CP0_STATUS + li t1, ST0_CU2 + xor t0, t1 + mtc0 t0, CP0_STATUS + +1: +#if CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0 + /* Check if we need to store CVMSEG state */ + mfc0 t0, $11,7 /* CvmMemCtl */ + bbit0 t0, 6, 3f /* Is user access enabled? */ + + /* Store the CVMSEG state */ + /* Extract the size of CVMSEG */ + andi t0, 0x3f + /* Multiply * (cache line size/sizeof(long)/2) */ + sll t0, 7-LONGLOG-1 + li t1, -32768 /* Base address of CVMSEG */ + LONG_ADDI t2, a0, THREAD_CVMSEG /* Where to store CVMSEG to */ + synciobdma +2: + .set noreorder + LONG_L t8, 0(t1) /* Load from CVMSEG */ + subu t0, 1 /* Decrement loop var */ + LONG_L t9, LONGSIZE(t1)/* Load from CVMSEG */ + LONG_ADDU t1, LONGSIZE*2 /* Increment loc in CVMSEG */ + LONG_S t8, 0(t2) /* Store CVMSEG to thread storage */ + LONG_ADDU t2, LONGSIZE*2 /* Increment loc in thread storage */ + bnez t0, 2b /* Loop until we've copied it all */ + LONG_S t9, -LONGSIZE(t2)/* Store CVMSEG to thread storage */ + .set reorder + + /* Disable access to CVMSEG */ + mfc0 t0, $11,7 /* CvmMemCtl */ + xori t0, t0, 0x40 /* Bit 6 is CVMSEG user enable */ + mtc0 t0, $11,7 /* CvmMemCtl */ +#endif +3: + /* + * The order of restoring the registers takes care of the race + * updating $28, $29 and kernelsp without disabling ints. + */ + move $28, a2 + cpu_restore_nonscratch a1 + +#if (_THREAD_SIZE - 32) < 0x8000 + PTR_ADDIU t0, $28, _THREAD_SIZE - 32 +#else + PTR_LI t0, _THREAD_SIZE - 32 + PTR_ADDU t0, $28 +#endif + set_saved_sp t0, t1, t2 + + mfc0 t1, CP0_STATUS /* Do we really need this? */ + li a3, 0xff01 + and t1, a3 + LONG_L a2, THREAD_STATUS(a1) + nor a3, $0, a3 + and a2, a3 + or a2, t1 + mtc0 a2, CP0_STATUS + move v0, a0 + jr ra + END(resume) + +/* + * void octeon_cop2_save(struct octeon_cop2_state *a0) + */ + .align 7 + LEAF(octeon_cop2_save) + + dmfc0 t9, $9,7 /* CvmCtl register. */ + + /* Save the COP2 CRC state */ + dmfc2 t0, 0x0201 + dmfc2 t1, 0x0202 + dmfc2 t2, 0x0200 + sd t0, OCTEON_CP2_CRC_IV(a0) + sd t1, OCTEON_CP2_CRC_LENGTH(a0) + sd t2, OCTEON_CP2_CRC_POLY(a0) + /* Skip next instructions if CvmCtl[NODFA_CP2] set */ + bbit1 t9, 28, 1f + + /* Save the LLM state */ + dmfc2 t0, 0x0402 + dmfc2 t1, 0x040A + sd t0, OCTEON_CP2_LLM_DAT(a0) + sd t1, OCTEON_CP2_LLM_DAT+8(a0) + +1: bbit1 t9, 26, 3f /* done if CvmCtl[NOCRYPTO] set */ + + /* Save the COP2 crypto state */ + /* this part is mostly common to both pass 1 and later revisions */ + dmfc2 t0, 0x0084 + dmfc2 t1, 0x0080 + dmfc2 t2, 0x0081 + dmfc2 t3, 0x0082 + sd t0, OCTEON_CP2_3DES_IV(a0) + dmfc2 t0, 0x0088 + sd t1, OCTEON_CP2_3DES_KEY(a0) + dmfc2 t1, 0x0111 /* only necessary for pass 1 */ + sd t2, OCTEON_CP2_3DES_KEY+8(a0) + dmfc2 t2, 0x0102 + sd t3, OCTEON_CP2_3DES_KEY+16(a0) + dmfc2 t3, 0x0103 + sd t0, OCTEON_CP2_3DES_RESULT(a0) + dmfc2 t0, 0x0104 + sd t1, OCTEON_CP2_AES_INP0(a0) /* only necessary for pass 1 */ + dmfc2 t1, 0x0105 + sd t2, OCTEON_CP2_AES_IV(a0) + dmfc2 t2, 0x0106 + sd t3, OCTEON_CP2_AES_IV+8(a0) + dmfc2 t3, 0x0107 + sd t0, OCTEON_CP2_AES_KEY(a0) + dmfc2 t0, 0x0110 + sd t1, OCTEON_CP2_AES_KEY+8(a0) + dmfc2 t1, 0x0100 + sd t2, OCTEON_CP2_AES_KEY+16(a0) + dmfc2 t2, 0x0101 + sd t3, OCTEON_CP2_AES_KEY+24(a0) + mfc0 t3, $15,0 /* Get the processor ID register */ + sd t0, OCTEON_CP2_AES_KEYLEN(a0) + li t0, 0x000d0000 /* This is the processor ID of Octeon Pass1 */ + sd t1, OCTEON_CP2_AES_RESULT(a0) + sd t2, OCTEON_CP2_AES_RESULT+8(a0) + /* Skip to the Pass1 version of the remainder of the COP2 state */ + beq t3, t0, 2f + + /* the non-pass1 state when !CvmCtl[NOCRYPTO] */ + dmfc2 t1, 0x0240 + dmfc2 t2, 0x0241 + dmfc2 t3, 0x0242 + dmfc2 t0, 0x0243 + sd t1, OCTEON_CP2_HSH_DATW(a0) + dmfc2 t1, 0x0244 + sd t2, OCTEON_CP2_HSH_DATW+8(a0) + dmfc2 t2, 0x0245 + sd t3, OCTEON_CP2_HSH_DATW+16(a0) + dmfc2 t3, 0x0246 + sd t0, OCTEON_CP2_HSH_DATW+24(a0) + dmfc2 t0, 0x0247 + sd t1, OCTEON_CP2_HSH_DATW+32(a0) + dmfc2 t1, 0x0248 + sd t2, OCTEON_CP2_HSH_DATW+40(a0) + dmfc2 t2, 0x0249 + sd t3, OCTEON_CP2_HSH_DATW+48(a0) + dmfc2 t3, 0x024A + sd t0, OCTEON_CP2_HSH_DATW+56(a0) + dmfc2 t0, 0x024B + sd t1, OCTEON_CP2_HSH_DATW+64(a0) + dmfc2 t1, 0x024C + sd t2, OCTEON_CP2_HSH_DATW+72(a0) + dmfc2 t2, 0x024D + sd t3, OCTEON_CP2_HSH_DATW+80(a0) + dmfc2 t3, 0x024E + sd t0, OCTEON_CP2_HSH_DATW+88(a0) + dmfc2 t0, 0x0250 + sd t1, OCTEON_CP2_HSH_DATW+96(a0) + dmfc2 t1, 0x0251 + sd t2, OCTEON_CP2_HSH_DATW+104(a0) + dmfc2 t2, 0x0252 + sd t3, OCTEON_CP2_HSH_DATW+112(a0) + dmfc2 t3, 0x0253 + sd t0, OCTEON_CP2_HSH_IVW(a0) + dmfc2 t0, 0x0254 + sd t1, OCTEON_CP2_HSH_IVW+8(a0) + dmfc2 t1, 0x0255 + sd t2, OCTEON_CP2_HSH_IVW+16(a0) + dmfc2 t2, 0x0256 + sd t3, OCTEON_CP2_HSH_IVW+24(a0) + dmfc2 t3, 0x0257 + sd t0, OCTEON_CP2_HSH_IVW+32(a0) + dmfc2 t0, 0x0258 + sd t1, OCTEON_CP2_HSH_IVW+40(a0) + dmfc2 t1, 0x0259 + sd t2, OCTEON_CP2_HSH_IVW+48(a0) + dmfc2 t2, 0x025E + sd t3, OCTEON_CP2_HSH_IVW+56(a0) + dmfc2 t3, 0x025A + sd t0, OCTEON_CP2_GFM_MULT(a0) + dmfc2 t0, 0x025B + sd t1, OCTEON_CP2_GFM_MULT+8(a0) + sd t2, OCTEON_CP2_GFM_POLY(a0) + sd t3, OCTEON_CP2_GFM_RESULT(a0) + sd t0, OCTEON_CP2_GFM_RESULT+8(a0) + jr ra + +2: /* pass 1 special stuff when !CvmCtl[NOCRYPTO] */ + dmfc2 t3, 0x0040 + dmfc2 t0, 0x0041 + dmfc2 t1, 0x0042 + dmfc2 t2, 0x0043 + sd t3, OCTEON_CP2_HSH_DATW(a0) + dmfc2 t3, 0x0044 + sd t0, OCTEON_CP2_HSH_DATW+8(a0) + dmfc2 t0, 0x0045 + sd t1, OCTEON_CP2_HSH_DATW+16(a0) + dmfc2 t1, 0x0046 + sd t2, OCTEON_CP2_HSH_DATW+24(a0) + dmfc2 t2, 0x0048 + sd t3, OCTEON_CP2_HSH_DATW+32(a0) + dmfc2 t3, 0x0049 + sd t0, OCTEON_CP2_HSH_DATW+40(a0) + dmfc2 t0, 0x004A + sd t1, OCTEON_CP2_HSH_DATW+48(a0) + sd t2, OCTEON_CP2_HSH_IVW(a0) + sd t3, OCTEON_CP2_HSH_IVW+8(a0) + sd t0, OCTEON_CP2_HSH_IVW+16(a0) + +3: /* pass 1 or CvmCtl[NOCRYPTO] set */ + jr ra + END(octeon_cop2_save) + +/* + * void octeon_cop2_restore(struct octeon_cop2_state *a0) + */ + .align 7 + .set push + .set noreorder + LEAF(octeon_cop2_restore) + /* First cache line was prefetched before the call */ + pref 4, 128(a0) + dmfc0 t9, $9,7 /* CvmCtl register. */ + + pref 4, 256(a0) + ld t0, OCTEON_CP2_CRC_IV(a0) + pref 4, 384(a0) + ld t1, OCTEON_CP2_CRC_LENGTH(a0) + ld t2, OCTEON_CP2_CRC_POLY(a0) + + /* Restore the COP2 CRC state */ + dmtc2 t0, 0x0201 + dmtc2 t1, 0x1202 + bbit1 t9, 28, 2f /* Skip LLM if CvmCtl[NODFA_CP2] is set */ + dmtc2 t2, 0x4200 + + /* Restore the LLM state */ + ld t0, OCTEON_CP2_LLM_DAT(a0) + ld t1, OCTEON_CP2_LLM_DAT+8(a0) + dmtc2 t0, 0x0402 + dmtc2 t1, 0x040A + +2: + bbit1 t9, 26, done_restore /* done if CvmCtl[NOCRYPTO] set */ + nop + + /* Restore the COP2 crypto state common to pass 1 and pass 2 */ + ld t0, OCTEON_CP2_3DES_IV(a0) + ld t1, OCTEON_CP2_3DES_KEY(a0) + ld t2, OCTEON_CP2_3DES_KEY+8(a0) + dmtc2 t0, 0x0084 + ld t0, OCTEON_CP2_3DES_KEY+16(a0) + dmtc2 t1, 0x0080 + ld t1, OCTEON_CP2_3DES_RESULT(a0) + dmtc2 t2, 0x0081 + ld t2, OCTEON_CP2_AES_INP0(a0) /* only really needed for pass 1 */ + dmtc2 t0, 0x0082 + ld t0, OCTEON_CP2_AES_IV(a0) + dmtc2 t1, 0x0098 + ld t1, OCTEON_CP2_AES_IV+8(a0) + dmtc2 t2, 0x010A /* only really needed for pass 1 */ + ld t2, OCTEON_CP2_AES_KEY(a0) + dmtc2 t0, 0x0102 + ld t0, OCTEON_CP2_AES_KEY+8(a0) + dmtc2 t1, 0x0103 + ld t1, OCTEON_CP2_AES_KEY+16(a0) + dmtc2 t2, 0x0104 + ld t2, OCTEON_CP2_AES_KEY+24(a0) + dmtc2 t0, 0x0105 + ld t0, OCTEON_CP2_AES_KEYLEN(a0) + dmtc2 t1, 0x0106 + ld t1, OCTEON_CP2_AES_RESULT(a0) + dmtc2 t2, 0x0107 + ld t2, OCTEON_CP2_AES_RESULT+8(a0) + mfc0 t3, $15,0 /* Get the processor ID register */ + dmtc2 t0, 0x0110 + li t0, 0x000d0000 /* This is the processor ID of Octeon Pass1 */ + dmtc2 t1, 0x0100 + bne t0, t3, 3f /* Skip the next stuff for non-pass1 */ + dmtc2 t2, 0x0101 + + /* this code is specific for pass 1 */ + ld t0, OCTEON_CP2_HSH_DATW(a0) + ld t1, OCTEON_CP2_HSH_DATW+8(a0) + ld t2, OCTEON_CP2_HSH_DATW+16(a0) + dmtc2 t0, 0x0040 + ld t0, OCTEON_CP2_HSH_DATW+24(a0) + dmtc2 t1, 0x0041 + ld t1, OCTEON_CP2_HSH_DATW+32(a0) + dmtc2 t2, 0x0042 + ld t2, OCTEON_CP2_HSH_DATW+40(a0) + dmtc2 t0, 0x0043 + ld t0, OCTEON_CP2_HSH_DATW+48(a0) + dmtc2 t1, 0x0044 + ld t1, OCTEON_CP2_HSH_IVW(a0) + dmtc2 t2, 0x0045 + ld t2, OCTEON_CP2_HSH_IVW+8(a0) + dmtc2 t0, 0x0046 + ld t0, OCTEON_CP2_HSH_IVW+16(a0) + dmtc2 t1, 0x0048 + dmtc2 t2, 0x0049 + b done_restore /* unconditional branch */ + dmtc2 t0, 0x004A + +3: /* this is post-pass1 code */ + ld t2, OCTEON_CP2_HSH_DATW(a0) + ld t0, OCTEON_CP2_HSH_DATW+8(a0) + ld t1, OCTEON_CP2_HSH_DATW+16(a0) + dmtc2 t2, 0x0240 + ld t2, OCTEON_CP2_HSH_DATW+24(a0) + dmtc2 t0, 0x0241 + ld t0, OCTEON_CP2_HSH_DATW+32(a0) + dmtc2 t1, 0x0242 + ld t1, OCTEON_CP2_HSH_DATW+40(a0) + dmtc2 t2, 0x0243 + ld t2, OCTEON_CP2_HSH_DATW+48(a0) + dmtc2 t0, 0x0244 + ld t0, OCTEON_CP2_HSH_DATW+56(a0) + dmtc2 t1, 0x0245 + ld t1, OCTEON_CP2_HSH_DATW+64(a0) + dmtc2 t2, 0x0246 + ld t2, OCTEON_CP2_HSH_DATW+72(a0) + dmtc2 t0, 0x0247 + ld t0, OCTEON_CP2_HSH_DATW+80(a0) + dmtc2 t1, 0x0248 + ld t1, OCTEON_CP2_HSH_DATW+88(a0) + dmtc2 t2, 0x0249 + ld t2, OCTEON_CP2_HSH_DATW+96(a0) + dmtc2 t0, 0x024A + ld t0, OCTEON_CP2_HSH_DATW+104(a0) + dmtc2 t1, 0x024B + ld t1, OCTEON_CP2_HSH_DATW+112(a0) + dmtc2 t2, 0x024C + ld t2, OCTEON_CP2_HSH_IVW(a0) + dmtc2 t0, 0x024D + ld t0, OCTEON_CP2_HSH_IVW+8(a0) + dmtc2 t1, 0x024E + ld t1, OCTEON_CP2_HSH_IVW+16(a0) + dmtc2 t2, 0x0250 + ld t2, OCTEON_CP2_HSH_IVW+24(a0) + dmtc2 t0, 0x0251 + ld t0, OCTEON_CP2_HSH_IVW+32(a0) + dmtc2 t1, 0x0252 + ld t1, OCTEON_CP2_HSH_IVW+40(a0) + dmtc2 t2, 0x0253 + ld t2, OCTEON_CP2_HSH_IVW+48(a0) + dmtc2 t0, 0x0254 + ld t0, OCTEON_CP2_HSH_IVW+56(a0) + dmtc2 t1, 0x0255 + ld t1, OCTEON_CP2_GFM_MULT(a0) + dmtc2 t2, 0x0256 + ld t2, OCTEON_CP2_GFM_MULT+8(a0) + dmtc2 t0, 0x0257 + ld t0, OCTEON_CP2_GFM_POLY(a0) + dmtc2 t1, 0x0258 + ld t1, OCTEON_CP2_GFM_RESULT(a0) + dmtc2 t2, 0x0259 + ld t2, OCTEON_CP2_GFM_RESULT+8(a0) + dmtc2 t0, 0x025E + dmtc2 t1, 0x025A + dmtc2 t2, 0x025B + +done_restore: + jr ra + nop + END(octeon_cop2_restore) + .set pop + +/* + * void octeon_mult_save() + * sp is assumed to point to a struct pt_regs + * + * NOTE: This is called in SAVE_SOME in stackframe.h. It can only + * safely modify k0 and k1. + */ + .align 7 + .set push + .set noreorder + LEAF(octeon_mult_save) + dmfc0 k0, $9,7 /* CvmCtl register. */ + bbit1 k0, 27, 1f /* Skip CvmCtl[NOMUL] */ + nop + + /* Save the multiplier state */ + v3mulu k0, $0, $0 + v3mulu k1, $0, $0 + sd k0, PT_MTP(sp) /* PT_MTP has P0 */ + v3mulu k0, $0, $0 + sd k1, PT_MTP+8(sp) /* PT_MTP+8 has P1 */ + ori k1, $0, 1 + v3mulu k1, k1, $0 + sd k0, PT_MTP+16(sp) /* PT_MTP+16 has P2 */ + v3mulu k0, $0, $0 + sd k1, PT_MPL(sp) /* PT_MPL has MPL0 */ + v3mulu k1, $0, $0 + sd k0, PT_MPL+8(sp) /* PT_MPL+8 has MPL1 */ + jr ra + sd k1, PT_MPL+16(sp) /* PT_MPL+16 has MPL2 */ + +1: /* Resume here if CvmCtl[NOMUL] */ + jr ra + END(octeon_mult_save) + .set pop + +/* + * void octeon_mult_restore() + * sp is assumed to point to a struct pt_regs + * + * NOTE: This is called in RESTORE_SOME in stackframe.h. + */ + .align 7 + .set push + .set noreorder + LEAF(octeon_mult_restore) + dmfc0 k1, $9,7 /* CvmCtl register. */ + ld v0, PT_MPL(sp) /* MPL0 */ + ld v1, PT_MPL+8(sp) /* MPL1 */ + ld k0, PT_MPL+16(sp) /* MPL2 */ + bbit1 k1, 27, 1f /* Skip CvmCtl[NOMUL] */ + /* Normally falls through, so no time wasted here */ + nop + + /* Restore the multiplier state */ + ld k1, PT_MTP+16(sp) /* P2 */ + MTM0 v0 /* MPL0 */ + ld v0, PT_MTP+8(sp) /* P1 */ + MTM1 v1 /* MPL1 */ + ld v1, PT_MTP(sp) /* P0 */ + MTM2 k0 /* MPL2 */ + MTP2 k1 /* P2 */ + MTP1 v0 /* P1 */ + jr ra + MTP0 v1 /* P0 */ + +1: /* Resume here if CvmCtl[NOMUL] */ + jr ra + nop + END(octeon_mult_restore) + .set pop + -- 1.5.5.1