Re: [PATCH 2/3] mm: cma: introduce /proc/cmainfo

Joonsoo Kim <iamjoonsoo.kim@xxxxxxx> · Fri, 23 Jan 2015 15:35:29 +0900

On Thu, Jan 22, 2015 at 06:35:53PM +0300, Stefan Strogin wrote:
> Hello Joonsoo,
> 
> On 30/12/14 07:38, Joonsoo Kim wrote:
> > On Fri, Dec 26, 2014 at 05:39:03PM +0300, Stefan I. Strogin wrote:
> >> /proc/cmainfo contains a list of currently allocated CMA buffers for every
> >> CMA area when CONFIG_CMA_DEBUG is enabled.
> > Hello,
> >
> > I think that providing these information looks useful, but, we need better
> > implementation. As Laura said, it is better to use debugfs. And,
> > instead of re-implementing the wheel, how about using tracepoint
> > to print these information? See below comments.
> 
> Excuse me for a long delay. I've tried to give a detailed answer here:
> https://lkml.org/lkml/2015/1/21/362
> Do you mean by «the re-implemented wheel» seq_print_stack_trace()? If so
> then it was thought to show an owner of each allocated buffer. I used a
> similar way as in page_owner: saving stack_trace for each allocation. Do
> you think we can use tracepoints instead?

I wrote why I said this is re-implemented wheel on the reply of other mail.
Please refer it.

Thanks.

> 
> 
> >
> >> Format is:
> >>
> >> <base_phys_addr> - <end_phys_addr> (<size> kB), allocated by <PID>\
> >> 		(<command name>), latency <allocation latency> us
> >>  <stack backtrace when the buffer had been allocated>
> >>
> >> Signed-off-by: Stefan I. Strogin <s.strogin@xxxxxxxxxxxxxxxxxxx>
> >> ---
> >>  mm/cma.c | 202 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >>  1 file changed, 202 insertions(+)
> >>
> >> diff --git a/mm/cma.c b/mm/cma.c
> >> index a85ae28..ffaea26 100644
> >> --- a/mm/cma.c
> >> +++ b/mm/cma.c
> >> @@ -34,6 +34,10 @@
> >>  #include <linux/cma.h>
> >>  #include <linux/highmem.h>
> >>  #include <linux/io.h>
> >> +#include <linux/list.h>
> >> +#include <linux/proc_fs.h>
> >> +#include <linux/uaccess.h>
> >> +#include <linux/time.h>
> >>  
> >>  struct cma {
> >>  	unsigned long	base_pfn;
> >> @@ -41,8 +45,25 @@ struct cma {
> >>  	unsigned long	*bitmap;
> >>  	unsigned int order_per_bit; /* Order of pages represented by one bit */
> >>  	struct mutex	lock;
> >> +#ifdef CONFIG_CMA_DEBUG
> >> +	struct list_head buffers_list;
> >> +	struct mutex	list_lock;
> >> +#endif
> >>  };
> >>  
> >> +#ifdef CONFIG_CMA_DEBUG
> >> +struct cma_buffer {
> >> +	unsigned long pfn;
> >> +	unsigned long count;
> >> +	pid_t pid;
> >> +	char comm[TASK_COMM_LEN];
> >> +	unsigned int latency;
> >> +	unsigned long trace_entries[16];
> >> +	unsigned int nr_entries;
> >> +	struct list_head list;
> >> +};
> >> +#endif
> >> +
> >>  static struct cma cma_areas[MAX_CMA_AREAS];
> >>  static unsigned cma_area_count;
> >>  static DEFINE_MUTEX(cma_mutex);
> >> @@ -132,6 +153,10 @@ static int __init cma_activate_area(struct cma *cma)
> >>  	} while (--i);
> >>  
> >>  	mutex_init(&cma->lock);
> >> +#ifdef CONFIG_CMA_DEBUG
> >> +	INIT_LIST_HEAD(&cma->buffers_list);
> >> +	mutex_init(&cma->list_lock);
> >> +#endif
> >>  	return 0;
> >>  
> >>  err:
> >> @@ -347,6 +372,86 @@ err:
> >>  	return ret;
> >>  }
> >>  
> >> +#ifdef CONFIG_CMA_DEBUG
> >> +/**
> >> + * cma_buffer_list_add() - add a new entry to a list of allocated buffers
> >> + * @cma:     Contiguous memory region for which the allocation is performed.
> >> + * @pfn:     Base PFN of the allocated buffer.
> >> + * @count:   Number of allocated pages.
> >> + * @latency: Nanoseconds spent to allocate the buffer.
> >> + *
> >> + * This function adds a new entry to the list of allocated contiguous memory
> >> + * buffers in a CMA area. It uses the CMA area specificated by the device
> >> + * if available or the default global one otherwise.
> >> + */
> >> +static int cma_buffer_list_add(struct cma *cma, unsigned long pfn,
> >> +			       int count, s64 latency)
> >> +{
> >> +	struct cma_buffer *cmabuf;
> >> +	struct stack_trace trace;
> >> +
> >> +	cmabuf = kmalloc(sizeof(struct cma_buffer), GFP_KERNEL);
> >> +	if (!cmabuf)
> >> +		return -ENOMEM;
> >> +
> >> +	trace.nr_entries = 0;
> >> +	trace.max_entries = ARRAY_SIZE(cmabuf->trace_entries);
> >> +	trace.entries = &cmabuf->trace_entries[0];
> >> +	trace.skip = 2;
> >> +	save_stack_trace(&trace);
> >> +
> >> +	cmabuf->pfn = pfn;
> >> +	cmabuf->count = count;
> >> +	cmabuf->pid = task_pid_nr(current);
> >> +	cmabuf->nr_entries = trace.nr_entries;
> >> +	get_task_comm(cmabuf->comm, current);
> >> +	cmabuf->latency = (unsigned int) div_s64(latency, NSEC_PER_USEC);
> >> +
> >> +	mutex_lock(&cma->list_lock);
> >> +	list_add_tail(&cmabuf->list, &cma->buffers_list);
> >> +	mutex_unlock(&cma->list_lock);
> >> +
> >> +	return 0;
> >> +}
> >> +
> >> +/**
> >> + * cma_buffer_list_del() - delete an entry from a list of allocated buffers
> >> + * @cma:   Contiguous memory region for which the allocation was performed.
> >> + * @pfn:   Base PFN of the released buffer.
> >> + *
> >> + * This function deletes a list entry added by cma_buffer_list_add().
> >> + */
> >> +static void cma_buffer_list_del(struct cma *cma, unsigned long pfn)
> >> +{
> >> +	struct cma_buffer *cmabuf;
> >> +
> >> +	mutex_lock(&cma->list_lock);
> >> +
> >> +	list_for_each_entry(cmabuf, &cma->buffers_list, list)
> >> +		if (cmabuf->pfn == pfn) {
> >> +			list_del(&cmabuf->list);
> >> +			kfree(cmabuf);
> >> +			goto out;
> >> +		}
> >> +
> > Is there more elegant way to find buffer? This linear search overhead
> > would change system behaviour if there are lots of buffers.
> >
> >> +	pr_err("%s(pfn %lu): couldn't find buffers list entry\n",
> >> +	       __func__, pfn);
> >> +
> >> +out:
> >> +	mutex_unlock(&cma->list_lock);
> >> +}
> >> +#else
> >> +static int cma_buffer_list_add(struct cma *cma, unsigned long pfn,
> >> +			       int count, s64 latency)
> >> +{
> >> +	return 0;
> >> +}
> >> +
> >> +static void cma_buffer_list_del(struct cma *cma, unsigned long pfn)
> >> +{
> >> +}
> >> +#endif /* CONFIG_CMA_DEBUG */
> >> +
> >>  /**
> >>   * cma_alloc() - allocate pages from contiguous area
> >>   * @cma:   Contiguous memory region for which the allocation is performed.
> >> @@ -361,11 +466,15 @@ struct page *cma_alloc(struct cma *cma, int count, unsigned int align)
> >>  	unsigned long mask, offset, pfn, start = 0;
> >>  	unsigned long bitmap_maxno, bitmap_no, bitmap_count;
> >>  	struct page *page = NULL;
> >> +	struct timespec ts1, ts2;
> >> +	s64 latency;
> >>  	int ret;
> >>  
> >>  	if (!cma || !cma->count)
> >>  		return NULL;
> >>  
> >> +	getnstimeofday(&ts1);
> >> +
> >>  	pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
> >>  		 count, align);
> >>  
> >> @@ -413,6 +522,19 @@ struct page *cma_alloc(struct cma *cma, int count, unsigned int align)
> >>  		start = bitmap_no + mask + 1;
> >>  	}
> >>  
> >> +	getnstimeofday(&ts2);
> >> +	latency = timespec_to_ns(&ts2) - timespec_to_ns(&ts1);
> >> +
> >> +	if (page) {
> >> +		ret = cma_buffer_list_add(cma, pfn, count, latency);
> >> +		if (ret) {
> >> +			pr_warn("%s(): cma_buffer_list_add() returned %d\n",
> >> +				__func__, ret);
> >> +			cma_release(cma, page, count);
> >> +			page = NULL;
> >> +		}
> > So, we would fail to allocate CMA memory if we can't allocate buffer
> > for debugging. I don't think it makes sense. With tracepoint,
> > we don't need to allocate buffer in runtime.
> >
> > Thanks.
> >
> > --
> > To unsubscribe, send a message with 'unsubscribe linux-mm' in
> > the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
> > see: http://www.linux-mm.org/ .
> > Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>
> >
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>