On Fri, Dec 26, 2014 at 05:39:03PM +0300, Stefan I. Strogin wrote: > /proc/cmainfo contains a list of currently allocated CMA buffers for every > CMA area when CONFIG_CMA_DEBUG is enabled. Hello, I think that providing these information looks useful, but, we need better implementation. As Laura said, it is better to use debugfs. And, instead of re-implementing the wheel, how about using tracepoint to print these information? See below comments. > > Format is: > > <base_phys_addr> - <end_phys_addr> (<size> kB), allocated by <PID>\ > (<command name>), latency <allocation latency> us > <stack backtrace when the buffer had been allocated> > > Signed-off-by: Stefan I. Strogin <s.strogin@xxxxxxxxxxxxxxxxxxx> > --- > mm/cma.c | 202 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 202 insertions(+) > > diff --git a/mm/cma.c b/mm/cma.c > index a85ae28..ffaea26 100644 > --- a/mm/cma.c > +++ b/mm/cma.c > @@ -34,6 +34,10 @@ > #include <linux/cma.h> > #include <linux/highmem.h> > #include <linux/io.h> > +#include <linux/list.h> > +#include <linux/proc_fs.h> > +#include <linux/uaccess.h> > +#include <linux/time.h> > > struct cma { > unsigned long base_pfn; > @@ -41,8 +45,25 @@ struct cma { > unsigned long *bitmap; > unsigned int order_per_bit; /* Order of pages represented by one bit */ > struct mutex lock; > +#ifdef CONFIG_CMA_DEBUG > + struct list_head buffers_list; > + struct mutex list_lock; > +#endif > }; > > +#ifdef CONFIG_CMA_DEBUG > +struct cma_buffer { > + unsigned long pfn; > + unsigned long count; > + pid_t pid; > + char comm[TASK_COMM_LEN]; > + unsigned int latency; > + unsigned long trace_entries[16]; > + unsigned int nr_entries; > + struct list_head list; > +}; > +#endif > + > static struct cma cma_areas[MAX_CMA_AREAS]; > static unsigned cma_area_count; > static DEFINE_MUTEX(cma_mutex); > @@ -132,6 +153,10 @@ static int __init cma_activate_area(struct cma *cma) > } while (--i); > > mutex_init(&cma->lock); > +#ifdef CONFIG_CMA_DEBUG > + INIT_LIST_HEAD(&cma->buffers_list); > + mutex_init(&cma->list_lock); > +#endif > return 0; > > err: > @@ -347,6 +372,86 @@ err: > return ret; > } > > +#ifdef CONFIG_CMA_DEBUG > +/** > + * cma_buffer_list_add() - add a new entry to a list of allocated buffers > + * @cma: Contiguous memory region for which the allocation is performed. > + * @pfn: Base PFN of the allocated buffer. > + * @count: Number of allocated pages. > + * @latency: Nanoseconds spent to allocate the buffer. > + * > + * This function adds a new entry to the list of allocated contiguous memory > + * buffers in a CMA area. It uses the CMA area specificated by the device > + * if available or the default global one otherwise. > + */ > +static int cma_buffer_list_add(struct cma *cma, unsigned long pfn, > + int count, s64 latency) > +{ > + struct cma_buffer *cmabuf; > + struct stack_trace trace; > + > + cmabuf = kmalloc(sizeof(struct cma_buffer), GFP_KERNEL); > + if (!cmabuf) > + return -ENOMEM; > + > + trace.nr_entries = 0; > + trace.max_entries = ARRAY_SIZE(cmabuf->trace_entries); > + trace.entries = &cmabuf->trace_entries[0]; > + trace.skip = 2; > + save_stack_trace(&trace); > + > + cmabuf->pfn = pfn; > + cmabuf->count = count; > + cmabuf->pid = task_pid_nr(current); > + cmabuf->nr_entries = trace.nr_entries; > + get_task_comm(cmabuf->comm, current); > + cmabuf->latency = (unsigned int) div_s64(latency, NSEC_PER_USEC); > + > + mutex_lock(&cma->list_lock); > + list_add_tail(&cmabuf->list, &cma->buffers_list); > + mutex_unlock(&cma->list_lock); > + > + return 0; > +} > + > +/** > + * cma_buffer_list_del() - delete an entry from a list of allocated buffers > + * @cma: Contiguous memory region for which the allocation was performed. > + * @pfn: Base PFN of the released buffer. > + * > + * This function deletes a list entry added by cma_buffer_list_add(). > + */ > +static void cma_buffer_list_del(struct cma *cma, unsigned long pfn) > +{ > + struct cma_buffer *cmabuf; > + > + mutex_lock(&cma->list_lock); > + > + list_for_each_entry(cmabuf, &cma->buffers_list, list) > + if (cmabuf->pfn == pfn) { > + list_del(&cmabuf->list); > + kfree(cmabuf); > + goto out; > + } > + Is there more elegant way to find buffer? This linear search overhead would change system behaviour if there are lots of buffers. > + pr_err("%s(pfn %lu): couldn't find buffers list entry\n", > + __func__, pfn); > + > +out: > + mutex_unlock(&cma->list_lock); > +} > +#else > +static int cma_buffer_list_add(struct cma *cma, unsigned long pfn, > + int count, s64 latency) > +{ > + return 0; > +} > + > +static void cma_buffer_list_del(struct cma *cma, unsigned long pfn) > +{ > +} > +#endif /* CONFIG_CMA_DEBUG */ > + > /** > * cma_alloc() - allocate pages from contiguous area > * @cma: Contiguous memory region for which the allocation is performed. > @@ -361,11 +466,15 @@ struct page *cma_alloc(struct cma *cma, int count, unsigned int align) > unsigned long mask, offset, pfn, start = 0; > unsigned long bitmap_maxno, bitmap_no, bitmap_count; > struct page *page = NULL; > + struct timespec ts1, ts2; > + s64 latency; > int ret; > > if (!cma || !cma->count) > return NULL; > > + getnstimeofday(&ts1); > + > pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma, > count, align); > > @@ -413,6 +522,19 @@ struct page *cma_alloc(struct cma *cma, int count, unsigned int align) > start = bitmap_no + mask + 1; > } > > + getnstimeofday(&ts2); > + latency = timespec_to_ns(&ts2) - timespec_to_ns(&ts1); > + > + if (page) { > + ret = cma_buffer_list_add(cma, pfn, count, latency); > + if (ret) { > + pr_warn("%s(): cma_buffer_list_add() returned %d\n", > + __func__, ret); > + cma_release(cma, page, count); > + page = NULL; > + } So, we would fail to allocate CMA memory if we can't allocate buffer for debugging. I don't think it makes sense. With tracepoint, we don't need to allocate buffer in runtime. Thanks. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>