Take 2. Comments (especially from pipeline wizards) are welcome. Add special short path for emulationg RDHWR which is used to support TLS. The handle_tlbl synthesizer takes a care for cpu_has_vtag_icache. Signed-off-by: Atsushi Nemoto <anemo@xxxxxxxxxxxxx> diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index 37fda3d..dfceea9 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -375,6 +375,43 @@ #endif BUILD_HANDLER dsp dsp sti silent /* #26 */ BUILD_HANDLER reserved reserved sti verbose /* others */ + .align 5 + LEAF(handle_ri_rdhwr) + .set push + .set noat + .set noreorder + /* 0x7c03e83b: rdhwr v1,$29 */ + MFC0 k1, CP0_EPC + lui k0, 0x7c03 + lw k1, (k1) + ori k0, 0xe83b + .set reorder + bne k0, k1, handle_ri /* if not ours */ + /* The insn is rdhwr. No need to check CAUSE.BD here. */ + get_saved_sp /* k1 := current_thread_info */ + .set noreorder + MFC0 k0, CP0_EPC +#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) + ori k1, _THREAD_MASK + xori k1, _THREAD_MASK + LONG_L v1, TI_TP_VALUE(k1) + LONG_ADDIU k0, 4 + jr k0 + rfe +#else + LONG_ADDIU k0, 4 /* stall on $k0 */ + MTC0 k0, CP0_EPC + /* I hope three instructions between MTC0 and ERET are enough... */ + ori k1, _THREAD_MASK + xori k1, _THREAD_MASK + LONG_L v1, TI_TP_VALUE(k1) + .set mips3 + eret + .set mips0 +#endif + .set pop + END(handle_ri_rdhwr) + #ifdef CONFIG_64BIT /* A temporary overflow handler used by check_daddi(). */ diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 954a198..46eba9f 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -52,6 +52,7 @@ extern asmlinkage void handle_dbe(void); extern asmlinkage void handle_sys(void); extern asmlinkage void handle_bp(void); extern asmlinkage void handle_ri(void); +extern asmlinkage void handle_ri_rdhwr(void); extern asmlinkage void handle_cpu(void); extern asmlinkage void handle_ov(void); extern asmlinkage void handle_tr(void); @@ -1381,6 +1382,15 @@ #endif memcpy((void *)(uncached_ebase + offset), addr, size); } +int __initdata rdhwr_noopt; +static int __init set_rdhwr_noopt(char *str) +{ + rdhwr_noopt = 1; + return 1; +} + +__setup("rdhwr_noopt", set_rdhwr_noopt); + void __init trap_init(void) { extern char except_vec3_generic, except_vec3_r4000; @@ -1460,7 +1470,7 @@ void __init trap_init(void) set_except_vector(8, handle_sys); set_except_vector(9, handle_bp); - set_except_vector(10, handle_ri); + set_except_vector(10, rdhwr_noopt ? handle_ri : handle_ri_rdhwr); set_except_vector(11, handle_cpu); set_except_vector(12, handle_ov); set_except_vector(13, handle_tr); diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 375e099..3f53fa7 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -817,9 +817,10 @@ static __init void __attribute__((unused * Write random or indexed TLB entry, and care about the hazards from * the preceeding mtc0 and for the following eret. */ -enum tlb_write_entry { tlb_random, tlb_indexed }; +enum tlb_write_entry { tlb_random, tlb_indexed, tlb_arbitrary }; -static __init void build_tlb_write_entry(u32 **p, struct label **l, +static __init void build_tlb_write_entry(u32 **p, unsigned int tmp, + struct label **l, struct reloc **r, enum tlb_write_entry wmode) { @@ -828,6 +829,11 @@ static __init void build_tlb_write_entry switch (wmode) { case tlb_random: tlbw = i_tlbwr; break; case tlb_indexed: tlbw = i_tlbwi; break; + case tlb_arbitrary: + /* tmp contains CP0_INDEX. see build_update_entries(). */ + /* if tmp <= 0, use tlbwr instead of tlbwi */ + tlbw = i_tlbwr; + break; } switch (current_cpu_data.cputype) { @@ -841,6 +847,10 @@ static __init void build_tlb_write_entry * This branch uses up a mtc0 hazard nop slot and saves * two nops after the tlbw instruction. */ + if (wmode == tlb_arbitrary) { + il_bgezl(p, r, tmp, label_tlbw_hazard); + i_tlbwi(p); + } il_bgezl(p, r, 0, label_tlbw_hazard); tlbw(p); l_tlbw_hazard(l, *p); @@ -851,8 +861,13 @@ static __init void build_tlb_write_entry case CPU_R4700: case CPU_R5000: case CPU_R5000A: - i_nop(p); + if (wmode == tlb_arbitrary) { + il_bgezl(p, r, tmp, label_tlbw_hazard); + i_tlbwi(p); + } else + i_nop(p); tlbw(p); + l_tlbw_hazard(l, *p); i_nop(p); break; @@ -865,8 +880,13 @@ static __init void build_tlb_write_entry case CPU_AU1550: case CPU_AU1200: case CPU_PR4450: - i_nop(p); + if (wmode == tlb_arbitrary) { + il_bgezl(p, r, tmp, label_tlbw_hazard); + i_tlbwi(p); + } else + i_nop(p); tlbw(p); + l_tlbw_hazard(l, *p); break; case CPU_R10000: @@ -878,15 +898,24 @@ static __init void build_tlb_write_entry case CPU_4KSC: case CPU_20KC: case CPU_25KF: + if (wmode == tlb_arbitrary) { + il_bgezl(p, r, tmp, label_tlbw_hazard); + i_tlbwi(p); + } tlbw(p); + l_tlbw_hazard(l, *p); break; case CPU_NEVADA: - i_nop(p); /* QED specifies 2 nops hazard */ /* * This branch uses up a mtc0 hazard nop slot and saves * a nop after the tlbw instruction. */ + if (wmode == tlb_arbitrary) { + il_bgezl(p, r, tmp, label_tlbw_hazard); + i_tlbwi(p); + } else + i_nop(p); /* QED specifies 2 nops hazard */ il_bgezl(p, r, 0, label_tlbw_hazard); tlbw(p); l_tlbw_hazard(l, *p); @@ -896,8 +925,13 @@ static __init void build_tlb_write_entry i_nop(p); i_nop(p); i_nop(p); - i_nop(p); + if (wmode == tlb_arbitrary) { + il_bgezl(p, r, tmp, label_tlbw_hazard); + i_tlbwi(p); + } else + i_nop(p); tlbw(p); + l_tlbw_hazard(l, *p); break; case CPU_4KEC: @@ -905,7 +939,12 @@ static __init void build_tlb_write_entry case CPU_34K: case CPU_74K: i_ehb(p); + if (wmode == tlb_arbitrary) { + il_bgezl(p, r, tmp, label_tlbw_hazard); + i_tlbwi(p); + } tlbw(p); + l_tlbw_hazard(l, *p); break; case CPU_RM9000: @@ -918,8 +957,13 @@ static __init void build_tlb_write_entry i_ssnop(p); i_ssnop(p); i_ssnop(p); - i_ssnop(p); + if (wmode == tlb_arbitrary) { + il_bgezl(p, r, tmp, label_tlbw_hazard); + i_tlbwi(p); + } else + i_ssnop(p); tlbw(p); + l_tlbw_hazard(l, *p); i_ssnop(p); i_ssnop(p); i_ssnop(p); @@ -932,8 +976,13 @@ static __init void build_tlb_write_entry case CPU_VR4181: case CPU_VR4181A: i_nop(p); - i_nop(p); + if (wmode == tlb_arbitrary) { + il_bgezl(p, r, tmp, label_tlbw_hazard); + i_tlbwi(p); + } else + i_nop(p); tlbw(p); + l_tlbw_hazard(l, *p); i_nop(p); i_nop(p); break; @@ -942,8 +991,13 @@ static __init void build_tlb_write_entry case CPU_VR4133: case CPU_R5432: i_nop(p); - i_nop(p); + if (wmode == tlb_arbitrary) { + il_bgezl(p, r, tmp, label_tlbw_hazard); + i_tlbwi(p); + } else + i_nop(p); tlbw(p); + l_tlbw_hazard(l, *p); break; default: @@ -1123,7 +1177,7 @@ static __init void build_get_ptep(u32 ** } static __init void build_update_entries(u32 **p, unsigned int tmp, - unsigned int ptep) + unsigned int ptep, int loadindex) { /* * 64bit address support (36bit on a 32bit CPU) in a 32bit @@ -1136,6 +1190,8 @@ #ifdef CONFIG_64BIT_PHYS_ADDR i_dsrl(p, tmp, tmp, 6); /* convert to entrylo0 */ i_mtc0(p, tmp, C0_ENTRYLO0); /* load it */ i_dsrl(p, ptep, ptep, 6); /* convert to entrylo1 */ + if (loadindex) + i_mfc0(p, tmp, C0_INDEX); /* used by tlb_arbitrary */ i_mtc0(p, ptep, C0_ENTRYLO1); /* load it */ } else { int pte_off_even = sizeof(pte_t) / 2; @@ -1145,6 +1201,8 @@ #ifdef CONFIG_64BIT_PHYS_ADDR i_lw(p, tmp, pte_off_even, ptep); /* get even pte */ i_mtc0(p, tmp, C0_ENTRYLO0); /* load it */ i_lw(p, ptep, pte_off_odd, ptep); /* get odd pte */ + if (loadindex) + i_mfc0(p, tmp, C0_INDEX); /* used by tlb_arbitrary */ i_mtc0(p, ptep, C0_ENTRYLO1); /* load it */ } #else @@ -1157,8 +1215,8 @@ #else i_mtc0(p, 0, C0_ENTRYLO0); i_mtc0(p, tmp, C0_ENTRYLO0); /* load it */ i_SRL(p, ptep, ptep, 6); /* convert to entrylo1 */ - if (r45k_bvahwbug()) - i_mfc0(p, tmp, C0_INDEX); + if (r45k_bvahwbug() || loadindex) + i_mfc0(p, tmp, C0_INDEX); /* used by tlb_arbitrary */ if (r4k_250MHZhwbug()) i_mtc0(p, 0, C0_ENTRYLO1); i_mtc0(p, ptep, C0_ENTRYLO1); /* load it */ @@ -1198,8 +1256,8 @@ #else #endif build_get_ptep(&p, K0, K1); - build_update_entries(&p, K0, K1); - build_tlb_write_entry(&p, &l, &r, tlb_random); + build_update_entries(&p, K0, K1, 0); + build_tlb_write_entry(&p, K0, &l, &r, tlb_random); l_leave(&l, p); i_eret(&p); /* return from trap */ @@ -1647,12 +1705,13 @@ # endif static void __init build_r4000_tlbchange_handler_tail(u32 **p, struct label **l, struct reloc **r, unsigned int tmp, - unsigned int ptr) + unsigned int ptr, + enum tlb_write_entry wmode) { i_ori(p, ptr, ptr, sizeof(pte_t)); i_xori(p, ptr, ptr, sizeof(pte_t)); - build_update_entries(p, tmp, ptr); - build_tlb_write_entry(p, l, r, tlb_indexed); + build_update_entries(p, tmp, ptr, wmode == tlb_arbitrary); + build_tlb_write_entry(p, tmp, l, r, wmode); l_leave(l, *p); i_eret(p); /* return from trap */ @@ -1667,6 +1726,9 @@ static void __init build_r4000_tlb_load_ struct label *l = labels; struct reloc *r = relocs; int i; + extern int rdhwr_noopt; + enum tlb_write_entry wmode = (!rdhwr_noopt && cpu_has_vtag_icache) ? + tlb_arbitrary : tlb_indexed; memset(handle_tlbl, 0, sizeof(handle_tlbl)); memset(labels, 0, sizeof(labels)); @@ -1684,7 +1746,7 @@ static void __init build_r4000_tlb_load_ build_r4000_tlbchange_handler_head(&p, &l, &r, K0, K1); build_pte_present(&p, &l, &r, K0, K1, label_nopage_tlbl); build_make_valid(&p, &r, K0, K1); - build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1); + build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1, wmode); l_nopage_tlbl(&l, p); i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff); @@ -1718,7 +1780,7 @@ static void __init build_r4000_tlb_store build_r4000_tlbchange_handler_head(&p, &l, &r, K0, K1); build_pte_writable(&p, &l, &r, K0, K1, label_nopage_tlbs); build_make_write(&p, &r, K0, K1); - build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1); + build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1, tlb_indexed); l_nopage_tlbs(&l, p); i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); @@ -1753,7 +1815,7 @@ static void __init build_r4000_tlb_modif build_pte_modifiable(&p, &l, &r, K0, K1, label_nopage_tlbm); /* Present and writable bits set, set accessed and dirty bits. */ build_make_write(&p, &r, K0, K1); - build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1); + build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1, tlb_indexed); l_nopage_tlbm(&l, p); i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);