On Thu, 2009-05-28 at 21:24 -0400, Steven Rostedt wrote: > On Fri, 29 May 2009, wuzhangjin@xxxxxxxxx wrote: > > > From: Wu Zhangjin <wuzj@xxxxxxxxxx> > > > > dynamic function tracer need to replace "nop" to "jumps & links" and > > something reversely. > > > > Signed-off-by: Wu Zhangjin <wuzj@xxxxxxxxxx> > > --- > > arch/mips/Kconfig | 3 + > > arch/mips/include/asm/ftrace.h | 10 ++ > > arch/mips/kernel/Makefile | 2 + > > arch/mips/kernel/ftrace.c | 217 ++++++++++++++++++++++++++++++++++++++++ > > arch/mips/kernel/mcount.S | 31 ++++++ > > scripts/Makefile.build | 1 + > > scripts/recordmcount.pl | 32 +++++- > > 7 files changed, 290 insertions(+), 6 deletions(-) > > create mode 100644 arch/mips/kernel/ftrace.c > > > > diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig > > index d5c01ca..0c00536 100644 > > --- a/arch/mips/Kconfig > > +++ b/arch/mips/Kconfig > > @@ -6,6 +6,9 @@ config MIPS > > select HAVE_ARCH_KGDB > > select HAVE_FUNCTION_TRACER > > select HAVE_FUNCTION_TRACE_MCOUNT_TEST > > + select HAVE_DYNAMIC_FTRACE > > + select HAVE_FTRACE_MCOUNT_RECORD > > + select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE > > # Horrible source of confusion. Die, die, die ... > > select EMBEDDED > > select RTC_LIB > > diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h > > index 5f8ebcf..b4970c9 100644 > > --- a/arch/mips/include/asm/ftrace.h > > +++ b/arch/mips/include/asm/ftrace.h > > @@ -19,6 +19,16 @@ > > extern void _mcount(void); > > #define mcount _mcount > > > > +#ifdef CONFIG_DYNAMIC_FTRACE > > +/* reloction of mcount call site is the same as the address */ > > +static inline unsigned long ftrace_call_adjust(unsigned long addr) > > +{ > > + return addr; > > +} > > + > > +struct dyn_arch_ftrace { > > +}; > > +#endif /* CONFIG_DYNAMIC_FTRACE */ > > #endif /* __ASSEMBLY__ */ > > #endif /* CONFIG_FUNCTION_TRACER */ > > #endif /* _ASM_MIPS_FTRACE_H */ > > diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile > > index d167dde..6b1a8a5 100644 > > --- a/arch/mips/kernel/Makefile > > +++ b/arch/mips/kernel/Makefile > > @@ -11,6 +11,7 @@ obj-y += cpu-probe.o branch.o entry.o genex.o irq.o process.o \ > > ifdef CONFIG_FUNCTION_TRACER > > # Do not profile debug and lowlevel utilities > > CFLAGS_REMOVE_mcount.o = -pg > > +CFLAGS_REMOVE_ftrace.o = -pg > > CFLAGS_REMOVE_early_printk.o = -pg > > endif > > > > @@ -31,6 +32,7 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o > > obj-$(CONFIG_MODULES) += mips_ksyms.o module.o > > > > obj-$(CONFIG_FUNCTION_TRACER) += mcount.o > > +obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o > > > > obj-$(CONFIG_CPU_LOONGSON2) += r4k_fpu.o r4k_switch.o > > obj-$(CONFIG_CPU_MIPS32) += r4k_fpu.o r4k_switch.o > > diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c > > new file mode 100644 > > index 0000000..827c128 > > --- /dev/null > > +++ b/arch/mips/kernel/ftrace.c > > @@ -0,0 +1,217 @@ > > +/* > > + * Code for replacing ftrace calls with jumps. > > + * > > + * Copyright (C) 2007-2008 Steven Rostedt <srostedt@xxxxxxxxxx> > > + * Copyright (C) 2009 DSLab, Lanzhou University, China > > + * Author: Wu Zhangjin <wuzj@xxxxxxxxxx> > > + * > > + * Thanks goes to Steven Rostedt for writing the original x86 version. > > + */ > > + > > +#include <linux/spinlock.h> > > +#include <linux/hardirq.h> > > +#include <linux/uaccess.h> > > +#include <linux/percpu.h> > > +#include <linux/sched.h> > > +#include <linux/init.h> > > +#include <linux/list.h> > > +#include <linux/ftrace.h> > > + > > +#include <asm/cacheflush.h> > > +#include <asm/ftrace.h> > > +#include <asm/asm.h> > > +#include <asm/unistd.h> > > + > > +#ifdef CONFIG_DYNAMIC_FTRACE > > + > > +#define JAL 0x0c000000 /* jump & link: ip --> ra, jump to target */ > > +#define ADDR_MASK 0x03ffffff /* op_code|addr : 31...26|25 ....0 */ > > + > > +static unsigned int ftrace_nop = 0x00000000; > > + > > +static unsigned char *ftrace_call_replace(unsigned long op_code, > > + unsigned long addr) > > +{ > > + static unsigned int op; > > + > > + op = op_code | ((addr >> 2) & ADDR_MASK); > > + > > + return (unsigned char *) &op; > > +} > > + > > +static atomic_t nmi_running = ATOMIC_INIT(0); > > +static int mod_code_status; /* holds return value of text write */ > > +static int mod_code_write; /* set when NMI should do the write */ > > +static void *mod_code_ip; /* holds the IP to write to */ > > +static void *mod_code_newcode; /* holds the text to write to the IP */ > > + > > +static unsigned nmi_wait_count; > > +static atomic_t nmi_update_count = ATOMIC_INIT(0); > > + > > +int ftrace_arch_read_dyn_info(char *buf, int size) > > +{ > > + int r; > > + > > + r = snprintf(buf, size, "%u %u", > > + nmi_wait_count, atomic_read(&nmi_update_count)); > > + return r; > > +} > > + > > +static void ftrace_mod_code(void) > > +{ > > + /* > > + * Yes, more than one CPU process can be writing to mod_code_status. > > + * (and the code itself) > > + * But if one were to fail, then they all should, and if one were > > + * to succeed, then they all should. > > + */ > > + mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, > > + MCOUNT_INSN_SIZE); > > + > > + /* if we fail, then kill any new writers */ > > + if (mod_code_status) > > + mod_code_write = 0; > > +} > > + > > +void ftrace_nmi_enter(void) > > +{ > > + atomic_inc(&nmi_running); > > + /* Must have nmi_running seen before reading write flag */ > > + smp_mb(); > > + if (mod_code_write) { > > + ftrace_mod_code(); > > + atomic_inc(&nmi_update_count); > > + } > > +} > > + > > +void ftrace_nmi_exit(void) > > +{ > > + /* Finish all executions before clearing nmi_running */ > > + smp_wmb(); > > + atomic_dec(&nmi_running); > > +} > > + > > +static void wait_for_nmi(void) > > +{ > > + int waited = 0; > > + > > + while (atomic_read(&nmi_running)) { > > + waited = 1; > > + cpu_relax(); > > + } > > + > > + if (waited) > > + nmi_wait_count++; > > +} > > + > > +static int do_ftrace_mod_code(unsigned long ip, void *new_code) > > +{ > > + mod_code_ip = (void *) ip; > > + mod_code_newcode = new_code; > > + > > + /* The buffers need to be visible before we let NMIs write them */ > > + smp_wmb(); > > + > > + mod_code_write = 1; > > + > > + /* Make sure write bit is visible before we wait on NMIs */ > > + smp_mb(); > > + > > + wait_for_nmi(); > > + > > + /* Make sure all running NMIs have finished before we write the code */ > > + smp_mb(); > > + > > + ftrace_mod_code(); > > + > > + /* Make sure the write happens before clearing the bit */ > > + smp_wmb(); > > + > > + mod_code_write = 0; > > + > > + /* make sure NMIs see the cleared bit */ > > + smp_mb(); > > + > > + wait_for_nmi(); > > + > > + return mod_code_status; > > +} > > Hmm, this is basically exactly the same as x86's version. I wounder if we > should make a helper function in generic code to let archs use it. We can > put the do_ftrace_mod_code into kernel/trace/ftrace.c and have weak > functions for the ftrace_mod_code. If the arch needs this to handle NMIs, > then it can use it. This code was tricky to write, and I would hate to > have it duplicated in every arch. > so, when will you put do_ftrace_mod_code into kernel/trace/ftrace.c? i just checked the powerpc version, seems something different, so, we should handle it carefully and tune the relative arch-dependent parts? > > + > > +static unsigned char *ftrace_nop_replace(void) > > +{ > > + return (unsigned char *) &ftrace_nop; > > +} > > + > > +static int > > +ftrace_modify_code(unsigned long ip, unsigned char *old_code, > > + unsigned char *new_code) > > +{ > > + unsigned char replaced[MCOUNT_INSN_SIZE]; > > + > > + /* > > + * Note: Due to modules and __init, code can > > + * disappear and change, we need to protect against faulting > > + * as well as code changing. We do this by using the > > + * probe_kernel_* functions. > > hehe, this is an old comment. We don't touch __init sections anymore. I > need to remove it from the x86 file. > Removed, this is the same in powerpc version. > > + * > > + * No real locking needed, this code is run through > > + * kstop_machine, or before SMP starts. > > + */ > > + > > + /* read the text we want to modify */ > > + if (probe_kernel_read(replaced, (void *) ip, MCOUNT_INSN_SIZE)) > > + return -EFAULT; > > + > > + /* Make sure it is what we expect it to be */ > > + if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) > > + return -EINVAL; > > + > > + /* replace the text with the new text */ > > + if (do_ftrace_mod_code(ip, new_code)) > > + return -EPERM; > > + > > + return 0; > > +} > > + > > +int ftrace_make_nop(struct module *mod, > > + struct dyn_ftrace *rec, unsigned long addr) > > +{ > > + unsigned char *new, *old; > > + > > + old = ftrace_call_replace(JAL, addr); > > + new = ftrace_nop_replace(); > > + > > + return ftrace_modify_code(rec->ip, old, new); > > +} > > + > > +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) > > +{ > > + unsigned char *new, *old; > > + > > + old = ftrace_nop_replace(); > > + new = ftrace_call_replace(JAL, addr); > > + > > + return ftrace_modify_code(rec->ip, old, new); > > +} > > + > > +int ftrace_update_ftrace_func(ftrace_func_t func) > > +{ > > + unsigned long ip = (unsigned long) (&ftrace_call); > > + unsigned char old[MCOUNT_INSN_SIZE], *new; > > + int ret; > > + > > + memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); > > + new = ftrace_call_replace(JAL, (unsigned long) func); > > + ret = ftrace_modify_code(ip, old, new); > > + > > + return ret; > > +} > > + > > +int __init ftrace_dyn_arch_init(void *data) > > +{ > > + /* The return code is retured via data */ > > + *(unsigned long *) data = 0; > > egad, I need to clean that up too. I should return the true error code > with ret. That is legacy from the first version of the dynamic ftrace > code. > This review is showing all the flaws of my own work ;-) > Yeap, most of it is copied from your original x86 version. there are really lots of duplications among different arch-specific versions, need to cleanup carefully, and should we write something like a helper document for people developing arch-specific version? > > + > > + return 0; > > +} > > +#endif /* CONFIG_DYNAMIC_FTRACE */ > > diff --git a/arch/mips/kernel/mcount.S b/arch/mips/kernel/mcount.S > > index 268724e..ce8a0ba 100644 > > --- a/arch/mips/kernel/mcount.S > > +++ b/arch/mips/kernel/mcount.S > > @@ -67,6 +67,35 @@ > > move ra, $1 > > .endm > > > > +#ifdef CONFIG_DYNAMIC_FTRACE > > + > > +LEAF(_mcount) > > + RESTORE_SP_FOR_32BIT > > + RETURN_BACK > > + END(_mcount) > > + > > +NESTED(ftrace_caller, PT_SIZE, ra) > > + RESTORE_SP_FOR_32BIT > > + lw t0, function_trace_stop > > + bnez t0, ftrace_stub > > + nop > > + > > + MCOUNT_SAVE_REGS > > + > > + MCOUNT_SET_ARGS > > + .globl ftrace_call > > +ftrace_call: > > + jal ftrace_stub > > + nop > > + > > + MCOUNT_RESTORE_REGS > > + .globl ftrace_stub > > +ftrace_stub: > > + RETURN_BACK > > + END(ftrace_caller) > > + > > +#else /* ! CONFIG_DYNAMIC_FTRACE */ > > + > > NESTED(_mcount, PT_SIZE, ra) > > RESTORE_SP_FOR_32BIT > > PTR_L t0, function_trace_stop > > @@ -94,5 +123,7 @@ ftrace_stub: > > RETURN_BACK > > END(_mcount) > > > > +#endif /* ! CONFIG_DYNAMIC_FTRACE */ > > + > > .set at > > .set reorder > > diff --git a/scripts/Makefile.build b/scripts/Makefile.build > > index 5c4b7a4..548d575 100644 > > --- a/scripts/Makefile.build > > +++ b/scripts/Makefile.build > > @@ -207,6 +207,7 @@ endif > > > > ifdef CONFIG_FTRACE_MCOUNT_RECORD > > cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ > > + "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \ > > "$(if $(CONFIG_64BIT),64,32)" \ > > "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" \ > > "$(if $(part-of-module),1,0)" "$(@)"; > > This big/little endian addition, I would like in its own patch. > okay, will split it out later. > > diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl > > index 409596e..e963948 100755 > > --- a/scripts/recordmcount.pl > > +++ b/scripts/recordmcount.pl > > @@ -100,13 +100,13 @@ $P =~ s@.*/@@g; > > > > my $V = '0.1'; > > > > -if ($#ARGV < 7) { > > - print "usage: $P arch bits objdump objcopy cc ld nm rm mv is_module inputfile\n"; > > +if ($#ARGV < 8) { > > + print "usage: $P arch endian bits objdump objcopy cc ld nm rm mv is_module inputfile\n"; > > print "version: $V\n"; > > exit(1); > > } > > > > -my ($arch, $bits, $objdump, $objcopy, $cc, > > +my ($arch, $endian, $bits, $objdump, $objcopy, $cc, > > $ld, $nm, $rm, $mv, $is_module, $inputfile) = @ARGV; > > > > # This file refers to mcount and shouldn't be ftraced, so lets' ignore it > > @@ -213,6 +213,26 @@ if ($arch eq "x86_64") { > > if ($is_module eq "0") { > > $cc .= " -mconstant-gp"; > > } > > + > > +} elsif ($arch eq "mips") { > > + $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$"; > > + $objdump .= " -Melf-trad".$endian."mips "; > > + > > + if ($endian eq "big") { > > + $endian = " -EB "; > > + $ld .= " -melf".$bits."btsmip"; > > + } else { > > + $endian = " -EL "; > > + $ld .= " -melf".$bits."ltsmip"; > > + } > > + > > + $cc .= " -mno-abicalls -fno-pic -mabi=" . $bits . $endian; > > + $ld .= $endian; > > + > > + if ($bits == 64) { > > + $type = ".dword"; > > + } > > The mips addition to the recordmcount.pl is OK to keep with this patch. > > > + > > } else { > > die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD"; > > } > > @@ -441,12 +461,12 @@ if ($#converts >= 0) { > > # > > # Step 5: set up each local function as a global > > # > > - `$objcopy $globallist $inputfile $globalobj`; > > + `$objcopy $globallist $inputfile $globalobj 2>&1 >/dev/null`; > > Are these spitting out errors? > no errors, but some warnings. seems some files not have _mcount(ooh, I did this patch about two months ago, so not remember the _real_ reason now), so there will some complaint about "No such file ...", this fix just make it not complain again. > -- Steve > > > > > # > > # Step 6: Link the global version to our list. > > # > > - `$ld -r $globalobj $mcount_o -o $globalmix`; > > + `$ld -r $globalobj $mcount_o -o $globalmix 2>&1 >/dev/null`; > > > > # > > # Step 7: Convert the local functions back into local symbols > > @@ -454,7 +474,7 @@ if ($#converts >= 0) { > > `$objcopy $locallist $globalmix $inputfile`; > > > > # Remove the temp files > > - `$rm $globalobj $globalmix`; > > + `$rm $globalobj $globalmix 2>&1 >/dev/null`; > > > > } else { > > > > -- > > 1.6.0.4 > > > >