Hi All,
I have noticed that, reading or dumping a full sysfs entry (ex:
/sys/kernel/debug/gpio )
which exceeds PAGE_SIZE restarts associated seq_file iterator (calling
start->show->stop)
morethan one time.
In my case gpio entry just exceeded 8k which resulted in show function
getting
called 3 times.
The reason is because the allocated buffer overflows and seq_file allocates
new buffer of next order and starts iteration from the start, which is
clear in
seq_file code.
However this is bit of conservative approach which results in:
- restarting iterator if file sz exceeds PAGE_SIZE( which is really not
necessary )
- not reusing the formated buffer (which can be done by re-alloc and copy).
- Will help entires like dumping cacheline or pagetables to capture its
state with better accuracy.
To address this situation, I think seq_file formating/printf functions
should
re-allocate buffer as soon as they detect buffer overflow.
This patch attempts to do the same by adding new function buf_realloc
which inturn
calls krealloc and all the seq_file formating or printing functions call
this
function whenever an overflow is detected and retry.
Main motive of this patch is to avoid restarting the iterator and NOT
performance, however there is a bit of gain in performance too.
Performance figures(average over 100 times loop) for different sizes of
sysfs entries:
without realloc:
---------------------------------
SZ REAL USER SYS
---------------------------------
| 4k | 0.012 0.002 0.009
| 8k | 0.013 0.002 0.010
| 16k | 0.015 0.003 0.012
| 32k | 0.019 0.002 0.017
--------------------------------
with realloc:
---------------------------------
SZ REAL USER SYS
---------------------------------
| 4k | 0.012 0.002 0.009
| 8k | 0.012 0.002 0.009
| 16k | 0.013 0.002 0.010
| 32k | 0.015 0.003 0.012
-------------------------------
I discovered this issue while I was working on a different issue as I
happed
to stick a printk in show function, which got called 3 times when I
dumped the
entry with 'cat' :-). which I did not expect.
comments? :-)
--srini
>From f545eba46fe07db63aba334413837053f7d4bb4b Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@xxxxxx>
Date: Fri, 15 Jul 2011 09:26:13 +0100
Subject: [PATCH kernel-3.0.0.rc7] seq_file: optimize mem re-alloc strategy for seq file sizes > PAGE_SIZE.
This patch updates the memory allocation strategy for memory
requirements exceeding PAGE_SIZE. Now the memory is re-allocated as soon
as seq buffer overflow is detected.
Doing this way
-Avoids restarting the iterator, when buffer overflows.
-Improves speed by reusing the formated buffer.
Orignially I discovered that the my show function gets called 3 times if
I dump a debugfs file which is just more than 8K.
Looking at the code It was clear that seq_read starts with allocating
PAGE_SIZE and then calls show. Once it encountered overflow, It goes and
allocates next order (>>1) and restarts the iterator.
In my case as show() is dumping just above 8K order-3 page is allocated
by seq_read which is why show function was getting called 3 times.
As a result of this behaviour we can't capture the exact current state
of entires like dumping cache entries or page-table entries, like we do
in SH architecture.
I am sure seq_file functions are optimized for virtual file size less
than PAGE_SIZE, however this strategy will optimize any filesize.
Main motive of this patch is to avoid restarting the iterator and NOT
performance, however there is a bit of gain in performance too.
Performance figures(average over 100 times loop) for different sizes of
sysfs entries:
without realloc:
---------------------------------
SZ REAL USER SYS
---------------------------------
| 4k | 0.012 0.002 0.009
| 8k | 0.013 0.002 0.010
| 16k | 0.015 0.003 0.012
| 32k | 0.019 0.002 0.017
--------------------------------
with realloc:
---------------------------------
SZ REAL USER SYS
---------------------------------
| 4k | 0.012 0.002 0.009
| 8k | 0.012 0.002 0.009
| 16k | 0.013 0.002 0.010
| 32k | 0.015 0.003 0.012
-------------------------------
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@xxxxxx>
---
fs/seq_file.c | 91 +++++++++++++++++++++++++++++++++++++++++++--------------
1 files changed, 69 insertions(+), 22 deletions(-)
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 05d6b0e..84323cc 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -93,7 +93,7 @@ static int traverse(struct seq_file *m, loff_t offset)
m->count = 0;
}
if (m->count == m->size)
- goto Eoverflow;
+ goto Enomem;
if (pos + m->count > offset) {
m->from = offset - pos;
m->count -= m->from;
@@ -113,11 +113,22 @@ static int traverse(struct seq_file *m, loff_t offset)
m->index = index;
return error;
-Eoverflow:
+Enomem:
m->op->stop(m, p);
kfree(m->buf);
- m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
- return !m->buf ? -ENOMEM : -EAGAIN;
+ m->buf = NULL;
+ return -ENOMEM;
+}
+
+static int buf_realloc(struct seq_file *m)
+{
+ void *ret = krealloc(m->buf, m->size <<= 1, GFP_KERNEL);
+ if (ret && m->buf != ret) {
+ m->buf = ret;
+ return 0;
+ }
+ m->size >>= 1;
+ return -ENOMEM;
}
/**
@@ -143,8 +154,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
/* Don't assume *ppos is where we left it */
if (unlikely(*ppos != m->read_pos)) {
m->read_pos = *ppos;
- while ((err = traverse(m, *ppos)) == -EAGAIN)
- ;
+ err = traverse(m, *ppos);
if (err) {
/* With prejudice... */
m->read_pos = 0;
@@ -208,15 +218,12 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
}
if (m->count < m->size)
goto Fill;
- m->op->stop(m, p);
- kfree(m->buf);
- m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
- if (!m->buf)
+ else { /* allocation failed */
+ m->op->stop(m, p);
+ kfree(m->buf);
+ m->buf = NULL;
goto Enomem;
- m->count = 0;
- m->version = 0;
- pos = m->index;
- p = m->op->start(m, &pos);
+ }
}
m->op->stop(m, p);
m->count = 0;
@@ -293,8 +300,7 @@ loff_t seq_lseek(struct file *file, loff_t offset, int origin)
break;
retval = offset;
if (offset != m->read_pos) {
- while ((retval=traverse(m, offset)) == -EAGAIN)
- ;
+ retval = traverse(m, offset);
if (retval) {
/* with extreme prejudice... */
file->f_pos = 0;
@@ -339,14 +345,16 @@ EXPORT_SYMBOL(seq_release);
*
* Puts string into buffer, replacing each occurrence of character from
* @esc with usual octal escape. Returns 0 in case of success, -1 - in
- * case of overflow.
+ * case of overflow and seq file failed to realloc the memory.
*/
int seq_escape(struct seq_file *m, const char *s, const char *esc)
{
- char *end = m->buf + m->size;
+ char *end;
char *p;
char c;
+Retry:
+ end = m->buf + m->size;
for (p = m->buf + m->count; (c = *s) != '\0' && p < end; s++) {
if (!strchr(esc, c)) {
*p++ = c;
@@ -359,6 +367,9 @@ int seq_escape(struct seq_file *m, const char *s, const char *esc)
*p++ = '0' + (c & 07);
continue;
}
+ if (!buf_realloc(m))
+ goto Retry;
+
m->count = m->size;
return -1;
}
@@ -372,6 +383,7 @@ int seq_printf(struct seq_file *m, const char *f, ...)
va_list args;
int len;
+Retry:
if (m->count < m->size) {
va_start(args, f);
len = vsnprintf(m->buf + m->count, m->size - m->count, f, args);
@@ -381,6 +393,8 @@ int seq_printf(struct seq_file *m, const char *f, ...)
return 0;
}
}
+ if (!buf_realloc(m))
+ goto Retry;
m->count = m->size;
return -1;
}
@@ -430,8 +444,10 @@ EXPORT_SYMBOL(mangle_path);
int seq_path(struct seq_file *m, struct path *path, char *esc)
{
char *buf;
- size_t size = seq_get_buf(m, &buf);
+ size_t size;
int res = -1;
+Retry:
+ size = seq_get_buf(m, &buf);
if (size) {
char *p = d_path(path, buf, size);
@@ -441,6 +457,9 @@ int seq_path(struct seq_file *m, struct path *path, char *esc)
res = end - buf;
}
}
+ if (res < 0 && !buf_realloc(m))
+ goto Retry;
+
seq_commit(m, res);
return res;
@@ -456,9 +475,10 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
char *esc)
{
char *buf;
- size_t size = seq_get_buf(m, &buf);
+ size_t size;
int res = -ENAMETOOLONG;
-
+Retry:
+ size = seq_get_buf(m, &buf);
if (size) {
char *p;
@@ -472,6 +492,9 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
res = -ENAMETOOLONG;
}
}
+ if (res < 0 && !buf_realloc(m))
+ goto Retry;
+
seq_commit(m, res);
return res < 0 ? res : 0;
@@ -483,8 +506,10 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
{
char *buf;
- size_t size = seq_get_buf(m, &buf);
+ size_t size;
int res = -1;
+Retry:
+ size = seq_get_buf(m, &buf);
if (size) {
char *p = dentry_path(dentry, buf, size);
@@ -494,6 +519,9 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
res = end - buf;
}
}
+ if (res < 0 && !buf_realloc(m))
+ goto Retry;
+
seq_commit(m, res);
return res;
@@ -502,6 +530,7 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
int seq_bitmap(struct seq_file *m, const unsigned long *bits,
unsigned int nr_bits)
{
+Retry:
if (m->count < m->size) {
int len = bitmap_scnprintf(m->buf + m->count,
m->size - m->count, bits, nr_bits);
@@ -510,6 +539,9 @@ int seq_bitmap(struct seq_file *m, const unsigned long *bits,
return 0;
}
}
+ if (!buf_realloc(m))
+ goto Retry;
+
m->count = m->size;
return -1;
}
@@ -518,6 +550,7 @@ EXPORT_SYMBOL(seq_bitmap);
int seq_bitmap_list(struct seq_file *m, const unsigned long *bits,
unsigned int nr_bits)
{
+Retry:
if (m->count < m->size) {
int len = bitmap_scnlistprintf(m->buf + m->count,
m->size - m->count, bits, nr_bits);
@@ -526,6 +559,9 @@ int seq_bitmap_list(struct seq_file *m, const unsigned long *bits,
return 0;
}
}
+ if (!buf_realloc(m))
+ goto Retry;
+
m->count = m->size;
return -1;
}
@@ -621,10 +657,13 @@ EXPORT_SYMBOL(seq_open_private);
int seq_putc(struct seq_file *m, char c)
{
+Retry:
if (m->count < m->size) {
m->buf[m->count++] = c;
return 0;
}
+ if (!buf_realloc(m))
+ goto Retry;
return -1;
}
EXPORT_SYMBOL(seq_putc);
@@ -632,11 +671,15 @@ EXPORT_SYMBOL(seq_putc);
int seq_puts(struct seq_file *m, const char *s)
{
int len = strlen(s);
+Retry:
if (m->count + len < m->size) {
memcpy(m->buf + m->count, s, len);
m->count += len;
return 0;
}
+ if (!buf_realloc(m))
+ goto Retry;
+
m->count = m->size;
return -1;
}
@@ -652,11 +695,15 @@ EXPORT_SYMBOL(seq_puts);
*/
int seq_write(struct seq_file *seq, const void *data, size_t len)
{
+Retry:
if (seq->count + len < seq->size) {
memcpy(seq->buf + seq->count, data, len);
seq->count += len;
return 0;
}
+ if (!buf_realloc(seq))
+ goto Retry;
+
seq->count = seq->size;
return -1;
}
--
1.6.3.3