[PATCH -v5] MIPS: tracing: Optimize the implementation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Wu Zhangjin <wuzhangjin@xxxxxxxxx>

Hi, Ralf

This patch not only fixes the function graph tracing support of 32bit kernel
and module compiled with/without mmcount-ra-address, but also speeds up the
dynamic function tracer and function graph tracer.

Just tested it with the latest gcc 4.5 and added necessary changes to make the
kernel and module support work well with mmcount-ra-address. I think it is
applicable now.

Regards,
	Wu Zhangjin

--------------

Changes:

v4 -> v5:

  * All of the these changes have been tested in 32bit & 64bit kernel
  compiled with gcc 4.4 & 4.5, including kernel & module Ftrace support.

  + Fixup of the module support broken by the change of v1->v2.

    As the source code(and also the objdump result of the kernel image)
    of -mmcount-ra-address shows, the change of v1->v2 is wrong and has
    broken the module support:

    For 64bit kernel with -mmcount-ra-address, the return address saving
    operation is put in the delay slot of the jump instruction(calling
    site of _mcount), so, the offset of "b 1f" is the same to the
    -mno-mcount-ra-address: 16.

    but for 32bit kernel, since there is already an instruction put in
    the delay slot, the return address operation must be put before the
    jump instruciton, so, the offset is bigger than the one with
    -mno-mcount-ra-address: 16+4=20.

  + Fixup of the 32bit support with gcc 4.5

    As the doc[1] of gcc-4.5 shows, the -mmcount-ra-address uses the $12
    to transfer the stack offset of the return address to the _mcount
    function. in 64bit kernel, $12 is t0, but in 32bit kernel, it is t4,
    so, we need to use $12 instead of t0 here to cover the 64bit and
    32bit support.

    [1] Gcc doc: MIPS Options
    http://gcc.gnu.org/onlinedocs/gcc/MIPS-Options.html

  + Cleanup of mcount.S

    o merges two continuous "#ifdef CONFIG_64BIT".
    o removes the instruction "move    a0, t0" via using a0 directly.
    o share the common "PTR_LA  a0, PT_R1(sp)"

v3 -> v4:

  + Fixup of 'undefined ftrace_graph_caller'

    ftrace_graph_caller variable is only needed by
    CONFIG_FUNCTION_GRAPH_TRACER, so, wrap it with #ifdef.

v2 -> v3:

  + Fixes the buidling error of the changes from v2.

v1 -> v2:

  + Fixes the support of -mmcount-ra-address

    The offset of "b 1f" instruction should be 20 for leaf-function and
    24 for non-leaf function when -mmcount-ra-address is enabled. This
    patch adds a new get_insn_b_1f() function to get the different "b
    1f" instruction.

  (only test without -mmcount-ra-address.)

v1:

  + Reduce the runtime overhead

    o Uses macros instead of variables for the fixed instructions to
    reduce memory access

    o Moves the Initilization of the instructions which will be fixed
    after linking from ftrace_make_nop/ftrace_make_call to
    ftrace_dyn_arch_init() and encodes the instructions through
    uasm(arch/mips/include/asm/uasm.h).

    o A common macro in_module() is defined to determine which space the
    instruction pointer stays in and several related conditional
    statements are converted to conditional operator(? :) statement.

  + Cleanup the whole stuff

    Lots of comments/macros have been cleaned up to let it look better.

Signed-off-by: Wu Zhangjin <wuzhangjin@xxxxxxxxx>
---
 arch/mips/include/asm/ftrace.h |   12 ++-
 arch/mips/kernel/ftrace.c      |  217 +++++++++++++++++++++++-----------------
 arch/mips/kernel/mcount.S      |   20 ++--
 3 files changed, 141 insertions(+), 108 deletions(-)

diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h
index ce35c9a..29d71c0 100644
--- a/arch/mips/include/asm/ftrace.h
+++ b/arch/mips/include/asm/ftrace.h
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive for
  * more details.
  *
- * Copyright (C) 2009 DSLab, Lanzhou University, China
+ * Copyright (C) 2009, 2010 DSLab, Lanzhou University, China
  * Author: Wu Zhangjin <wuzhangjin@xxxxxxxxx>
  */
 
@@ -19,6 +19,14 @@
 extern void _mcount(void);
 #define mcount _mcount
 
+/*
+ * If the Instruction Pointer is in module space (0xc0000000), return true;
+ * otherwise, it is in kernel space (0x80000000), return false.
+ *
+ * FIXME: This may not work in some cases.
+ */
+#define in_module(ip) (unlikely((ip) & 0x40000000))
+
 #define safe_load(load, src, dst, error)		\
 do {							\
 	asm volatile (					\
@@ -83,8 +91,8 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
 
 struct dyn_arch_ftrace {
 };
-
 #endif /*  CONFIG_DYNAMIC_FTRACE */
+
 #endif /* __ASSEMBLY__ */
 #endif /* CONFIG_FUNCTION_TRACER */
 #endif /* _ASM_MIPS_FTRACE_H */
diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
index e9e64e0..d01ab52 100644
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c
@@ -2,7 +2,7 @@
  * Code for replacing ftrace calls with jumps.
  *
  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@xxxxxxxxxx>
- * Copyright (C) 2009 DSLab, Lanzhou University, China
+ * Copyright (C) 2009, 2010 DSLab, Lanzhou University, China
  * Author: Wu Zhangjin <wuzhangjin@xxxxxxxxx>
  *
  * Thanks goes to Steven Rostedt for writing the original x86 version.
@@ -15,15 +15,51 @@
 #include <asm/cacheflush.h>
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
+#include <asm/uasm.h>
+
+#define INSN_S_R_SP	0xafb00000	/* s{d,w} R, offset(sp) */
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 
-#define JAL 0x0c000000		/* jump & link: ip --> ra, jump to target */
-#define ADDR_MASK 0x03ffffff	/*  op_code|addr : 31...26|25 ....0 */
-#define jump_insn_encode(op_code, addr) \
-	((unsigned int)((op_code) | (((addr) >> 2) & ADDR_MASK)))
+/* Before linking, the following instructions are fixed. */
+#ifdef CONFIG_CPU_LOONGSON2F
+#define INSN_NOP 0x00200825	/* or at, at, zero */
+#else
+#define INSN_NOP 0x00000000	/* nop */
+#endif
+#define INSN_B_1F_16 0x10000004	/* b 1f; offset = (16 >> 2) */
+#define INSN_B_1F_20 0x10000005	/* b 1f; offset = (20 >> 2) */
+
+/* After linking, the following instructions are fixed. */
+static unsigned int insn_jal_ftrace_caller __read_mostly;
+static unsigned int insn_lui_v1_hi16_mcount __read_mostly;
+static unsigned int insn_j_ftrace_graph_caller __maybe_unused __read_mostly;
+
+/* The following instructions are encoded in the run-time */
+/* insn: jal func; op_code|addr : 31...26|25 ....0 */
+#define INSN_JAL(addr) \
+	((unsigned int)(0x0c000000 | (((addr) >> 2) & 0x03ffffff)))
+
+static inline void ftrace_dyn_arch_init_insns(void)
+{
+	u32 *buf;
+	unsigned int v1;
 
-static unsigned int ftrace_nop = 0x00000000;
+	/* lui v1, hi16_mcount */
+	v1 = 3;
+	buf = (u32 *)&insn_lui_v1_hi16_mcount;
+	UASM_i_LA_mostly(&buf, v1, MCOUNT_ADDR);
+
+	/* jal (ftrace_caller + 8), jump over the first two instruction */
+	buf = (u32 *)&insn_jal_ftrace_caller;
+	uasm_i_jal(&buf, (FTRACE_ADDR + 8));
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	/* j ftrace_graph_caller */
+	buf = (u32 *)&insn_j_ftrace_graph_caller;
+	uasm_i_j(&buf, (unsigned long)ftrace_graph_caller);
+#endif
+}
 
 static int ftrace_modify_code(unsigned long ip, unsigned int new_code)
 {
@@ -31,7 +67,6 @@ static int ftrace_modify_code(unsigned long ip, unsigned int new_code)
 
 	/* *(unsigned int *)ip = new_code; */
 	safe_store_code(new_code, ip, faulted);
-
 	if (unlikely(faulted))
 		return -EFAULT;
 
@@ -40,84 +75,82 @@ static int ftrace_modify_code(unsigned long ip, unsigned int new_code)
 	return 0;
 }
 
-static int lui_v1;
-static int jal_mcount;
-
 int ftrace_make_nop(struct module *mod,
 		    struct dyn_ftrace *rec, unsigned long addr)
 {
 	unsigned int new;
-	int faulted;
 	unsigned long ip = rec->ip;
 
-	/* We have compiled module with -mlong-calls, but compiled the kernel
-	 * without it, we need to cope with them respectively. */
-	if (ip & 0x40000000) {
-		/* record it for ftrace_make_call */
-		if (lui_v1 == 0) {
-			/* lui_v1 = *(unsigned int *)ip; */
-			safe_load_code(lui_v1, ip, faulted);
-
-			if (unlikely(faulted))
-				return -EFAULT;
-		}
-
-		/* lui v1, hi_16bit_of_mcount        --> b 1f (0x10000004)
-		 * addiu v1, v1, low_16bit_of_mcount
-		 * move at, ra
-		 * jalr v1
-		 * nop
-		 * 				     1f: (ip + 12)
-		 */
-		new = 0x10000004;
-	} else {
-		/* record/calculate it for ftrace_make_call */
-		if (jal_mcount == 0) {
-			/* We can record it directly like this:
-			 *     jal_mcount = *(unsigned int *)ip;
-			 * Herein, jump over the first two nop instructions */
-			jal_mcount = jump_insn_encode(JAL, (MCOUNT_ADDR + 8));
-		}
-
-		/* move at, ra
-		 * jalr v1		--> nop
-		 */
-		new = ftrace_nop;
+	/*
+	 * We have compiled modules with -mlong-calls, but compiled kernel
+	 * without it, therefore, need to cope with them respectively.
+	 *
+	 * For module:
+	 *
+	 *	lui	v1, hi16_mcount		--> b	1f
+	 *	addiu	v1, v1, lo16_mcount
+	 *	move at, ra
+	 *	jalr v1
+	 *	 nop
+	 *					1f: (ip + 16)
+	 * For kernel:
+	 *
+	 *	move	at, ra
+	 *	jal	_mcount			--> nop
+	 *
+	 * And with the -mmcount-ra-address option, the offset may be 20 for
+	 * leaf fuction and 24 for non-leaf function.
+	 */
+
+	if (!in_module(ip))
+		new = INSN_NOP;
+	else {
+#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT)
+		new = INSN_B_1F_20;
+#else
+		new = INSN_B_1F_16;
+#endif
 	}
+
 	return ftrace_modify_code(ip, new);
 }
 
-static int modified;	/* initialized as 0 by default */
-
 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 {
 	unsigned int new;
 	unsigned long ip = rec->ip;
 
-	/* We just need to remove the "b ftrace_stub" at the fist time! */
-	if (modified == 0) {
-		modified = 1;
-		ftrace_modify_code(addr, ftrace_nop);
-	}
-	/* ip, module: 0xc0000000, kernel: 0x80000000 */
-	new = (ip & 0x40000000) ? lui_v1 : jal_mcount;
+	new = in_module(ip) ? insn_lui_v1_hi16_mcount : insn_jal_ftrace_caller;
 
 	return ftrace_modify_code(ip, new);
 }
 
-#define FTRACE_CALL_IP ((unsigned long)(&ftrace_call))
-
 int ftrace_update_ftrace_func(ftrace_func_t func)
 {
 	unsigned int new;
 
-	new = jump_insn_encode(JAL, (unsigned long)func);
+	new = INSN_JAL((unsigned long)func);
 
-	return ftrace_modify_code(FTRACE_CALL_IP, new);
+	return ftrace_modify_code((unsigned long)(&ftrace_call), new);
 }
 
 int __init ftrace_dyn_arch_init(void *data)
 {
+	ftrace_dyn_arch_init_insns();
+
+	/*
+	 * We are safe to remove the "b ftrace_stub" for the current
+	 * ftrace_caller() is almost empty (only the stack operations), and
+	 * most importantly, the calling to mcount will be disabled later in
+	 * ftrace_init(), then there is no 'big' overhead. And in the future,
+	 * we are hoping the function_trace_stop is initialized as 1 then we
+	 * can eventually remove that 'useless' "b ftrace_stub" and the
+	 * following nop. Currently, although we can call ftrace_stop() to set
+	 * function_trace_stop as 1, but it will change the behavior of the
+	 * Function Tracer.
+	 */
+	ftrace_modify_code(MCOUNT_ADDR, INSN_NOP);
+
 	/* The return code is retured via data */
 	*(unsigned long *)data = 0;
 
@@ -128,30 +161,24 @@ int __init ftrace_dyn_arch_init(void *data)
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
 #ifdef CONFIG_DYNAMIC_FTRACE
-
 extern void ftrace_graph_call(void);
-#define JMP	0x08000000	/* jump to target directly */
-#define CALL_FTRACE_GRAPH_CALLER \
-	jump_insn_encode(JMP, (unsigned long)(&ftrace_graph_caller))
 #define FTRACE_GRAPH_CALL_IP	((unsigned long)(&ftrace_graph_call))
 
 int ftrace_enable_ftrace_graph_caller(void)
 {
 	return ftrace_modify_code(FTRACE_GRAPH_CALL_IP,
-				  CALL_FTRACE_GRAPH_CALLER);
+			insn_j_ftrace_graph_caller);
 }
 
 int ftrace_disable_ftrace_graph_caller(void)
 {
-	return ftrace_modify_code(FTRACE_GRAPH_CALL_IP, ftrace_nop);
+	return ftrace_modify_code(FTRACE_GRAPH_CALL_IP, INSN_NOP);
 }
-
 #endif				/* !CONFIG_DYNAMIC_FTRACE */
 
 #ifndef KBUILD_MCOUNT_RA_ADDRESS
-#define S_RA_SP	(0xafbf << 16)	/* s{d,w} ra, offset(sp) */
-#define S_R_SP	(0xafb0 << 16)  /* s{d,w} R, offset(sp) */
-#define OFFSET_MASK	0xffff	/* stack offset range: 0 ~ PT_SIZE */
+#define INSN_S_RA_SP	0xafbf0000	/* s{d,w} ra, offset(sp) */
+#define STACK_OFFSET_MASK	0xffff	/* stack offset range: 0 ~ PT_SIZE */
 
 unsigned long ftrace_get_parent_addr(unsigned long self_addr,
 				     unsigned long parent,
@@ -162,35 +189,35 @@ unsigned long ftrace_get_parent_addr(unsigned long self_addr,
 	unsigned int code;
 	int faulted;
 
-	/* in module or kernel? */
-	if (self_addr & 0x40000000) {
-		/* module: move to the instruction "lui v1, HI_16BIT_OF_MCOUNT" */
-		ip = self_addr - 20;
-	} else {
-		/* kernel: move to the instruction "move ra, at" */
-		ip = self_addr - 12;
-	}
+	/*
+	 * For module, move the ip from calling site of mcount to the
+	 * instruction "lui v1, hi_16bit_of_mcount"(offset is 20), but for
+	 * kernel, move to the instruction "move ra, at"(offset is 12)
+	 */
+	ip = self_addr - (in_module(self_addr) ? 20 : 12);
 
-	/* search the text until finding the non-store instruction or "s{d,w}
-	 * ra, offset(sp)" instruction */
+	/*
+	 * search the text until finding the non-store instruction or "s{d,w}
+	 * ra, offset(sp)" instruction
+	 */
 	do {
 		ip -= 4;
 
 		/* get the code at "ip": code = *(unsigned int *)ip; */
 		safe_load_code(code, ip, faulted);
-
 		if (unlikely(faulted))
 			return 0;
 
-		/* If we hit the non-store instruction before finding where the
+		/*
+		 * If we hit the non-store instruction before finding where the
 		 * ra is stored, then this is a leaf function and it does not
-		 * store the ra on the stack. */
-		if ((code & S_R_SP) != S_R_SP)
+		 * store the ra on the stack.
+		 */
+		if ((code & INSN_S_R_SP) != INSN_S_R_SP)
 			return parent_addr;
+	} while (((code & INSN_S_RA_SP) != INSN_S_RA_SP));
 
-	} while (((code & S_RA_SP) != S_RA_SP));
-
-	sp = fp + (code & OFFSET_MASK);
+	sp = fp + (code & STACK_OFFSET_MASK);
 
 	/* ra = *(unsigned long *)sp; */
 	safe_load_stack(ra, sp, faulted);
@@ -201,8 +228,7 @@ unsigned long ftrace_get_parent_addr(unsigned long self_addr,
 		return sp;
 	return 0;
 }
-
-#endif
+#endif	/* !KBUILD_MCOUNT_RA_ADDRESS */
 
 /*
  * Hook the return address and push it in the stack of return addrs
@@ -211,16 +237,17 @@ unsigned long ftrace_get_parent_addr(unsigned long self_addr,
 void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 			   unsigned long fp)
 {
+	int faulted;
 	unsigned long old;
 	struct ftrace_graph_ent trace;
 	unsigned long return_hooker = (unsigned long)
 	    &return_to_handler;
-	int faulted;
 
 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
 		return;
 
-	/* "parent" is the stack address saved the return address of the caller
+	/*
+	 * "parent" is the stack address saved the return address of the caller
 	 * of _mcount.
 	 *
 	 * if the gcc < 4.5, a leaf function does not save the return address
@@ -241,12 +268,14 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 	if (unlikely(faulted))
 		goto out;
 #ifndef KBUILD_MCOUNT_RA_ADDRESS
-	parent = (unsigned long *)ftrace_get_parent_addr(self_addr, old,
-							 (unsigned long)parent,
-							 fp);
-	/* If fails when getting the stack address of the non-leaf function's
-	 * ra, stop function graph tracer and return */
-	if (parent == 0)
+	parent = (unsigned long *)ftrace_get_parent_addr(
+			self_addr, old, (unsigned long)parent, fp);
+
+	/*
+	 * If fails on getting the stack address of the non-leaf function's ra,
+	 * stop function graph tracer and return
+	 */
+	if (unlikely(parent == 0))
 		goto out;
 #endif
 	/* *parent = return_hooker; */
diff --git a/arch/mips/kernel/mcount.S b/arch/mips/kernel/mcount.S
index 6851fc9..f95714c 100644
--- a/arch/mips/kernel/mcount.S
+++ b/arch/mips/kernel/mcount.S
@@ -6,6 +6,7 @@
  * more details.
  *
  * Copyright (C) 2009 Lemote Inc. & DSLab, Lanzhou University, China
+ * Copyright (C) 2010 DSLab, Lanzhou University, China
  * Author: Wu Zhangjin <wuzhangjin@xxxxxxxxx>
  */
 
@@ -45,8 +46,6 @@
 	PTR_L	a5, PT_R9(sp)
 	PTR_L	a6, PT_R10(sp)
 	PTR_L	a7, PT_R11(sp)
-#endif
-#ifdef CONFIG_64BIT
 	PTR_ADDIU	sp, PT_SIZE
 #else
 	PTR_ADDIU	sp, (PT_SIZE + 8)
@@ -71,7 +70,7 @@ _mcount:
 
 	MCOUNT_SAVE_REGS
 #ifdef KBUILD_MCOUNT_RA_ADDRESS
-	PTR_S	t0, PT_R12(sp)	/* t0 saved the location of the return address(at) by -mmcount-ra-address */
+	PTR_S	$12, PT_R12(sp)	/* $12 saved the location of the return address(at) */
 #endif
 
 	move	a0, ra		/* arg1: next ip, selfaddr */
@@ -137,25 +136,22 @@ NESTED(ftrace_graph_caller, PT_SIZE, ra)
 #ifdef CONFIG_DYNAMIC_FTRACE
 	PTR_L	a1, PT_R31(sp)	/* load the original ra from the stack */
 #ifdef KBUILD_MCOUNT_RA_ADDRESS
-	PTR_L	t0, PT_R12(sp)	/* load the original t0 from the stack */
+	PTR_L	a0, PT_R12(sp)	/* arg1: the location of the return address */
 #endif
-#else
+#else /* ! CONFIG_DYNAMIC_FTRACE */
 	MCOUNT_SAVE_REGS
 	move	a1, ra		/* arg2: next ip, selfaddr */
 #endif
 
 #ifdef KBUILD_MCOUNT_RA_ADDRESS
-	bnez	t0, 1f		/* non-leaf func: t0 saved the location of the return address */
+	bnez	a0, 1f		/* non-leaf func: get the location of the return address from saved $12 */
 	 nop
-	PTR_LA	t0, PT_R1(sp)	/* leaf func: get the location of at(old ra) from our own stack */
-1:	move	a0, t0		/* arg1: the location of the return address */
-#else
-	PTR_LA	a0, PT_R1(sp)	/* arg1: &AT -> a0 */
 #endif
-	jal	prepare_ftrace_return
+	PTR_LA	a0, PT_R1(sp)	/* leaf func: get the location of at(old ra) from our own stack */
+1:	jal	prepare_ftrace_return
 #ifdef CONFIG_FRAME_POINTER
 	 move	a2, fp		/* arg3: frame pointer */
-#else
+#else /* ! CONFIG_FRAME_POINTER */
 #ifdef CONFIG_64BIT
 	 PTR_LA	a2, PT_SIZE(sp)
 #else
-- 
1.7.0



[Index of Archives]     [Linux MIPS Home]     [LKML Archive]     [Linux ARM Kernel]     [Linux ARM]     [Linux]     [Git]     [Yosemite News]     [Linux SCSI]     [Linux Hams]

  Powered by Linux