[PATCH v2 1/2] seq_file: Optimize seq_puts()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Most of seq_puts() usages are done with a string literal. In such cases,
the length of the string car be computed at compile time in order to save
a strlen() call at run-time. seq_putc() or seq_write() can then be used
instead.

This saves a few cycles.

To have an estimation of how often this optimization triggers:
   $ git grep seq_puts.*\" | wc -l
   3436

   $ git grep seq_puts.*\".\" | wc -l
   84

Signed-off-by: Christophe JAILLET <christophe.jaillet@xxxxxxxxxx>
---
Changes in v2:
   - Use a function, instead of a macro   [Al Viro]
   - Handle the case of 1 char only strings, in order to use seq_putc()   [Al Viro]
   - Use __always_inline   [David Laight]

V1: https://lore.kernel.org/all/5c4f7ad7b88f5026940efa9c8be36a58755ec1b3.1704374916.git.christophe.jaillet@xxxxxxxxxx/


Checked by comparing the output of a few .s files.
Here is one of these outputs:

$ diff -u drivers/clk/clk.s.old drivers/clk/clk.s | grep -C6 seq_w

 .L2918:
@@ -46072,10 +46073,11 @@
 	call	clk_prepare_unlock	#
 # drivers/clk/clk.c:3438: 	clk_pm_runtime_put_all();
 	call	clk_pm_runtime_put_all	#
-# drivers/clk/clk.c:3440: 	seq_puts(s, "}\n");
+# ./include/linux/seq_file.h:130: 		seq_write(m, s, __builtin_strlen(s));
+	movl	$2, %edx	#,
 	movq	$.LC94, %rsi	#,
 	movq	%rbp, %rdi	# s,
-	call	seq_puts	#
+	call	seq_write	#
 # drivers/clk/clk.c:3441: 	return 0;
 	jmp	.L2917	#
 	.size	clk_dump_show, .-clk_dump_show
@@ -46200,7 +46202,7 @@
 	leaq	128(%rbx), %r15	#, _97
 	movq	%r15, %rdi	# _97,
--
 # drivers/clk/clk.c:1987: 		__clk_recalc_rates(core, false, 0);
@@ -46480,15 +46482,17 @@
 	call	seq_printf	#
 	jmp	.L2950	#
 .L2946:
-# drivers/clk/clk.c:3315: 		seq_puts(s, "-----");
+# ./include/linux/seq_file.h:130: 		seq_write(m, s, __builtin_strlen(s));
 	call	__sanitizer_cov_trace_pc	#
+	movl	$5, %edx	#,
 	movq	$.LC100, %rsi	#,
 	movq	%rbp, %rdi	# s,
-	call	seq_puts	#
+	call	seq_write	#
+# ./include/linux/seq_file.h:131: }
 	jmp	.L2947	#
 .L2957:
 # drivers/clk/clk.c:1903: 	return clk_core_get_accuracy_no_lock(core);
-	xorl	%r14d, %r14d	# _34
+	xorl	%r14d, %r14d	# _35
--
@@ -46736,21 +46740,22 @@
 	call	__sanitizer_cov_trace_pc	#
 	leaq	240(%r12), %rdi	#, tmp101
 	call	__tsan_read8	#
-# drivers/clk/clk.c:3355: 	seq_puts(s, "                                 enable  prepare  protect                                duty  hardware                            connection\n");
-	movq	$.LC104, %rsi	#,
+# ./include/linux/seq_file.h:130: 		seq_write(m, s, __builtin_strlen(s));
+	movl	$142, %edx	#,
 	movq	%r12, %rdi	# s,
+	movq	$.LC104, %rsi	#,
 # drivers/clk/clk.c:3352: 	struct hlist_head **lists = s->private;
 	movq	240(%r12), %r13	# s_10(D)->private, lists
-# drivers/clk/clk.c:3355: 	seq_puts(s, "                                 enable  prepare  protect                                duty  hardware                            connection\n");
-	call	seq_puts	#
-# drivers/clk/clk.c:3356: 	seq_puts(s, "   clock                          count    count    count        rate   accuracy phase  cycle    enable   consumer                         id\n");
+# ./include/linux/seq_file.h:130: 		seq_write(m, s, __builtin_strlen(s));
+	call	seq_write	#
+	movl	$142, %edx	#,
 	movq	$.LC105, %rsi	#,
 	movq	%r12, %rdi	# s,
-	call	seq_puts	#
-# drivers/clk/clk.c:3357: 	seq_puts(s, "---------------------------------------------------------------------------------------------------------------------------------------------\n");
+	call	seq_write	#
 	movq	$.LC106, %rsi	#,
 	movq	%r12, %rdi	# s,
-	call	seq_puts	#
+	movl	$142, %edx	#,
+	call	seq_write	#
 # drivers/clk/clk.c:3359: 	ret = clk_pm_runtime_get_all();
 	call	clk_pm_runtime_get_all	#
 # drivers/clk/clk.c:3360: 	if (ret)
@@ -57943,7 +57948,7 @@
 	subq	$88, %rsp	#,
 # drivers/clk/clk.c:5338: {


The output for seq_putc() generation has also be checked and works.
---
 fs/seq_file.c            |  4 ++--
 include/linux/seq_file.h | 13 ++++++++++++-
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/fs/seq_file.c b/fs/seq_file.c
index f5fdaf3b1572..8ef0a07033ca 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -669,7 +669,7 @@ void seq_putc(struct seq_file *m, char c)
 }
 EXPORT_SYMBOL(seq_putc);
 
-void seq_puts(struct seq_file *m, const char *s)
+void __seq_puts(struct seq_file *m, const char *s)
 {
 	int len = strlen(s);
 
@@ -680,7 +680,7 @@ void seq_puts(struct seq_file *m, const char *s)
 	memcpy(m->buf + m->count, s, len);
 	m->count += len;
 }
-EXPORT_SYMBOL(seq_puts);
+EXPORT_SYMBOL(__seq_puts);
 
 /**
  * seq_put_decimal_ull_width - A helper routine for putting decimal numbers
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 234bcdb1fba4..8bd4fda6e027 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -118,7 +118,18 @@ void seq_vprintf(struct seq_file *m, const char *fmt, va_list args);
 __printf(2, 3)
 void seq_printf(struct seq_file *m, const char *fmt, ...);
 void seq_putc(struct seq_file *m, char c);
-void seq_puts(struct seq_file *m, const char *s);
+void __seq_puts(struct seq_file *m, const char *s);
+
+static __always_inline void seq_puts(struct seq_file *m, const char *s)
+{
+	if (!__builtin_constant_p(*s))
+		__seq_puts(m, s);
+	else if (s[0] && !s[1])
+		seq_putc(m, s[0]);
+	else
+		seq_write(m, s, __builtin_strlen(s));
+}
+
 void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
 			       unsigned long long num, unsigned int width);
 void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
-- 
2.44.0





[Index of Archives]     [Kernel Development]     [Kernel Announce]     [Kernel Newbies]     [Linux Networking Development]     [Share Photos]     [IDE]     [Security]     [Git]     [Netfilter]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Device Mapper]

  Powered by Linux