Hello Joonsoo, On 30/12/14 07:38, Joonsoo Kim wrote: > On Fri, Dec 26, 2014 at 05:39:03PM +0300, Stefan I. Strogin wrote: >> /proc/cmainfo contains a list of currently allocated CMA buffers for every >> CMA area when CONFIG_CMA_DEBUG is enabled. > Hello, > > I think that providing these information looks useful, but, we need better > implementation. As Laura said, it is better to use debugfs. And, > instead of re-implementing the wheel, how about using tracepoint > to print these information? See below comments. Excuse me for a long delay. I've tried to give a detailed answer here: https://lkml.org/lkml/2015/1/21/362 Do you mean by «the re-implemented wheel» seq_print_stack_trace()? If so then it was thought to show an owner of each allocated buffer. I used a similar way as in page_owner: saving stack_trace for each allocation. Do you think we can use tracepoints instead? > >> Format is: >> >> <base_phys_addr> - <end_phys_addr> (<size> kB), allocated by <PID>\ >> (<command name>), latency <allocation latency> us >> <stack backtrace when the buffer had been allocated> >> >> Signed-off-by: Stefan I. Strogin <s.strogin@xxxxxxxxxxxxxxxxxxx> >> --- >> mm/cma.c | 202 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 202 insertions(+) >> >> diff --git a/mm/cma.c b/mm/cma.c >> index a85ae28..ffaea26 100644 >> --- a/mm/cma.c >> +++ b/mm/cma.c >> @@ -34,6 +34,10 @@ >> #include <linux/cma.h> >> #include <linux/highmem.h> >> #include <linux/io.h> >> +#include <linux/list.h> >> +#include <linux/proc_fs.h> >> +#include <linux/uaccess.h> >> +#include <linux/time.h> >> >> struct cma { >> unsigned long base_pfn; >> @@ -41,8 +45,25 @@ struct cma { >> unsigned long *bitmap; >> unsigned int order_per_bit; /* Order of pages represented by one bit */ >> struct mutex lock; >> +#ifdef CONFIG_CMA_DEBUG >> + struct list_head buffers_list; >> + struct mutex list_lock; >> +#endif >> }; >> >> +#ifdef CONFIG_CMA_DEBUG >> +struct cma_buffer { >> + unsigned long pfn; >> + unsigned long count; >> + pid_t pid; >> + char comm[TASK_COMM_LEN]; >> + unsigned int latency; >> + unsigned long trace_entries[16]; >> + unsigned int nr_entries; >> + struct list_head list; >> +}; >> +#endif >> + >> static struct cma cma_areas[MAX_CMA_AREAS]; >> static unsigned cma_area_count; >> static DEFINE_MUTEX(cma_mutex); >> @@ -132,6 +153,10 @@ static int __init cma_activate_area(struct cma *cma) >> } while (--i); >> >> mutex_init(&cma->lock); >> +#ifdef CONFIG_CMA_DEBUG >> + INIT_LIST_HEAD(&cma->buffers_list); >> + mutex_init(&cma->list_lock); >> +#endif >> return 0; >> >> err: >> @@ -347,6 +372,86 @@ err: >> return ret; >> } >> >> +#ifdef CONFIG_CMA_DEBUG >> +/** >> + * cma_buffer_list_add() - add a new entry to a list of allocated buffers >> + * @cma: Contiguous memory region for which the allocation is performed. >> + * @pfn: Base PFN of the allocated buffer. >> + * @count: Number of allocated pages. >> + * @latency: Nanoseconds spent to allocate the buffer. >> + * >> + * This function adds a new entry to the list of allocated contiguous memory >> + * buffers in a CMA area. It uses the CMA area specificated by the device >> + * if available or the default global one otherwise. >> + */ >> +static int cma_buffer_list_add(struct cma *cma, unsigned long pfn, >> + int count, s64 latency) >> +{ >> + struct cma_buffer *cmabuf; >> + struct stack_trace trace; >> + >> + cmabuf = kmalloc(sizeof(struct cma_buffer), GFP_KERNEL); >> + if (!cmabuf) >> + return -ENOMEM; >> + >> + trace.nr_entries = 0; >> + trace.max_entries = ARRAY_SIZE(cmabuf->trace_entries); >> + trace.entries = &cmabuf->trace_entries[0]; >> + trace.skip = 2; >> + save_stack_trace(&trace); >> + >> + cmabuf->pfn = pfn; >> + cmabuf->count = count; >> + cmabuf->pid = task_pid_nr(current); >> + cmabuf->nr_entries = trace.nr_entries; >> + get_task_comm(cmabuf->comm, current); >> + cmabuf->latency = (unsigned int) div_s64(latency, NSEC_PER_USEC); >> + >> + mutex_lock(&cma->list_lock); >> + list_add_tail(&cmabuf->list, &cma->buffers_list); >> + mutex_unlock(&cma->list_lock); >> + >> + return 0; >> +} >> + >> +/** >> + * cma_buffer_list_del() - delete an entry from a list of allocated buffers >> + * @cma: Contiguous memory region for which the allocation was performed. >> + * @pfn: Base PFN of the released buffer. >> + * >> + * This function deletes a list entry added by cma_buffer_list_add(). >> + */ >> +static void cma_buffer_list_del(struct cma *cma, unsigned long pfn) >> +{ >> + struct cma_buffer *cmabuf; >> + >> + mutex_lock(&cma->list_lock); >> + >> + list_for_each_entry(cmabuf, &cma->buffers_list, list) >> + if (cmabuf->pfn == pfn) { >> + list_del(&cmabuf->list); >> + kfree(cmabuf); >> + goto out; >> + } >> + > Is there more elegant way to find buffer? This linear search overhead > would change system behaviour if there are lots of buffers. > >> + pr_err("%s(pfn %lu): couldn't find buffers list entry\n", >> + __func__, pfn); >> + >> +out: >> + mutex_unlock(&cma->list_lock); >> +} >> +#else >> +static int cma_buffer_list_add(struct cma *cma, unsigned long pfn, >> + int count, s64 latency) >> +{ >> + return 0; >> +} >> + >> +static void cma_buffer_list_del(struct cma *cma, unsigned long pfn) >> +{ >> +} >> +#endif /* CONFIG_CMA_DEBUG */ >> + >> /** >> * cma_alloc() - allocate pages from contiguous area >> * @cma: Contiguous memory region for which the allocation is performed. >> @@ -361,11 +466,15 @@ struct page *cma_alloc(struct cma *cma, int count, unsigned int align) >> unsigned long mask, offset, pfn, start = 0; >> unsigned long bitmap_maxno, bitmap_no, bitmap_count; >> struct page *page = NULL; >> + struct timespec ts1, ts2; >> + s64 latency; >> int ret; >> >> if (!cma || !cma->count) >> return NULL; >> >> + getnstimeofday(&ts1); >> + >> pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma, >> count, align); >> >> @@ -413,6 +522,19 @@ struct page *cma_alloc(struct cma *cma, int count, unsigned int align) >> start = bitmap_no + mask + 1; >> } >> >> + getnstimeofday(&ts2); >> + latency = timespec_to_ns(&ts2) - timespec_to_ns(&ts1); >> + >> + if (page) { >> + ret = cma_buffer_list_add(cma, pfn, count, latency); >> + if (ret) { >> + pr_warn("%s(): cma_buffer_list_add() returned %d\n", >> + __func__, ret); >> + cma_release(cma, page, count); >> + page = NULL; >> + } > So, we would fail to allocate CMA memory if we can't allocate buffer > for debugging. I don't think it makes sense. With tracepoint, > we don't need to allocate buffer in runtime. > > Thanks. > > -- > To unsubscribe, send a message with 'unsubscribe linux-mm' in > the body to majordomo@xxxxxxxxx. For more info on Linux MM, > see: http://www.linux-mm.org/ . > Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a> > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>