Re: [PATCH] Add /proc/pid/smaps_rollup

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Adding more people.

On Tue, Aug 08 2017, Daniel Colascione wrote:
> /proc/pid/smaps_rollup is a new proc file that improves the
> performance of user programs that determine aggregate memory
> statistics (e.g., total PSS) of a process.
>
> Anroid regularly "samples" the memory usage of various processes in
> order to blance its memory pool sizes. This sampling process involves
> opening /proc/pid/smaps and summing certain fields. For very large
> processes, sampling memory use this way can take several hundred
> milliseconds, due mostly to the overhead of the seq_printf calls in
> task_mmu.c.
>
> smaps_rollup improves the situation. It contains most of the fields of
> /proc/pid/smaps, but instead of a set of fields for each VMA,
> smaps_rollup instead contains one synthetic smaps-format entry
> representing the whole process. In the single smaps_rollup synthetic
> entry, each field is the summation of the corresponding field in all
> of the real-smaps VMAs. Using a common format for smaps_rollup and
> smaps allows userspace parsers to repurpose parsers meant for use with
> non-rollup smaps for smaps_rollup, and it allows userspace to switch
> between smaps_rollup and smaps at runtime (say, based on the
> availablity of smaps_rollup in a given kernel) with minimal fuss.
>
> By using smaps_rollup instead of smaps, a caller can avoid the
> significant overhead of formatting, reading, and parsing each of a
> large process's potentially very numerous memory mappings. For
> sampling system_server's PSS in Android, we measured a 12x speedup,
> representing a savings of several hundred milliseconds.
>
> One alternative to a new per-process proc file would have been
> including PSS information in /proc/pid/status. We considered this
> option but thought that PSS would be too expensive (by a few orders of
> magnitude) to collect relative to what's already emitted as part of
> /proc/pid/status, and slowing every user of /proc/pid/status for the
> sake of readers that happen to want PSS feels wrong.
>
> The code itself works by reusing the existing VMA-walking framework we
> use for regular smaps generation and keeping the mem_size_stats
> structure around between VMA walks instead of using a fresh one for
> each VMA.  In this way, summation happens automatically.  We let
> seq_file walk over the VMAs just as it does for regular smaps and just
> emit nothing to the seq_file until we hit the last VMA.
>
> Signed-off-by: Daniel Colascione <dancol@xxxxxxxxxx>
> ---
>  fs/proc/base.c     |   2 +
>  fs/proc/internal.h |   3 +
>  fs/proc/task_mmu.c | 196 ++++++++++++++++++++++++++++++++++++-----------------
>  3 files changed, 139 insertions(+), 62 deletions(-)
>
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index 719c2e943ea1..a9587b9cace5 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -2930,6 +2930,7 @@ static const struct pid_entry tgid_base_stuff[] = {
>  #ifdef CONFIG_PROC_PAGE_MONITOR
>  	REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
>  	REG("smaps",      S_IRUGO, proc_pid_smaps_operations),
> +	REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
>  	REG("pagemap",    S_IRUSR, proc_pagemap_operations),
>  #endif
>  #ifdef CONFIG_SECURITY
> @@ -3323,6 +3324,7 @@ static const struct pid_entry tid_base_stuff[] = {
>  #ifdef CONFIG_PROC_PAGE_MONITOR
>  	REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
>  	REG("smaps",     S_IRUGO, proc_tid_smaps_operations),
> +	REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
>  	REG("pagemap",    S_IRUSR, proc_pagemap_operations),
>  #endif
>  #ifdef CONFIG_SECURITY
> diff --git a/fs/proc/internal.h b/fs/proc/internal.h
> index aa2b89071630..2cbfcd32e884 100644
> --- a/fs/proc/internal.h
> +++ b/fs/proc/internal.h
> @@ -269,10 +269,12 @@ extern int proc_remount(struct super_block *, int *, char *);
>  /*
>   * task_[no]mmu.c
>   */
> +struct mem_size_stats;
>  struct proc_maps_private {
>  	struct inode *inode;
>  	struct task_struct *task;
>  	struct mm_struct *mm;
> +	struct mem_size_stats *rollup;
>  #ifdef CONFIG_MMU
>  	struct vm_area_struct *tail_vma;
>  #endif
> @@ -288,6 +290,7 @@ extern const struct file_operations proc_tid_maps_operations;
>  extern const struct file_operations proc_pid_numa_maps_operations;
>  extern const struct file_operations proc_tid_numa_maps_operations;
>  extern const struct file_operations proc_pid_smaps_operations;
> +extern const struct file_operations proc_pid_smaps_rollup_operations;
>  extern const struct file_operations proc_tid_smaps_operations;
>  extern const struct file_operations proc_clear_refs_operations;
>  extern const struct file_operations proc_pagemap_operations;
> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> index b836fd61ed87..02b55df7291c 100644
> --- a/fs/proc/task_mmu.c
> +++ b/fs/proc/task_mmu.c
> @@ -252,6 +252,7 @@ static int proc_map_release(struct inode *inode, struct file *file)
>  	if (priv->mm)
>  		mmdrop(priv->mm);
>  
> +	kfree(priv->rollup);
>  	return seq_release_private(inode, file);
>  }
>  
> @@ -278,6 +279,23 @@ static int is_stack(struct proc_maps_private *priv,
>  		vma->vm_end >= vma->vm_mm->start_stack;
>  }
>  
> +static void show_vma_header_prefix(struct seq_file *m,
> +				   unsigned long start, unsigned long end,
> +				   vm_flags_t flags, unsigned long long pgoff,
> +				   dev_t dev, unsigned long ino)
> +{
> +	seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
> +	seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
> +		   start,
> +		   end,
> +		   flags & VM_READ ? 'r' : '-',
> +		   flags & VM_WRITE ? 'w' : '-',
> +		   flags & VM_EXEC ? 'x' : '-',
> +		   flags & VM_MAYSHARE ? 's' : 'p',
> +		   pgoff,
> +		   MAJOR(dev), MINOR(dev), ino);
> +}
> +
>  static void
>  show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
>  {
> @@ -300,17 +318,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
>  
>  	start = vma->vm_start;
>  	end = vma->vm_end;
> -
> -	seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
> -	seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
> -			start,
> -			end,
> -			flags & VM_READ ? 'r' : '-',
> -			flags & VM_WRITE ? 'w' : '-',
> -			flags & VM_EXEC ? 'x' : '-',
> -			flags & VM_MAYSHARE ? 's' : 'p',
> -			pgoff,
> -			MAJOR(dev), MINOR(dev), ino);
> +	show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino);
>  
>  	/*
>  	 * Print the dentry name for named mappings, and a
> @@ -429,6 +437,7 @@ const struct file_operations proc_tid_maps_operations = {
>  
>  #ifdef CONFIG_PROC_PAGE_MONITOR
>  struct mem_size_stats {
> +	bool first;
>  	unsigned long resident;
>  	unsigned long shared_clean;
>  	unsigned long shared_dirty;
> @@ -442,7 +451,9 @@ struct mem_size_stats {
>  	unsigned long swap;
>  	unsigned long shared_hugetlb;
>  	unsigned long private_hugetlb;
> +	unsigned long first_vma_start;
>  	u64 pss;
> +	u64 pss_locked;
>  	u64 swap_pss;
>  	bool check_shmem_swap;
>  };
> @@ -718,18 +729,36 @@ void __weak arch_show_smap(struct seq_file *m, struct vm_area_struct *vma)
>  
>  static int show_smap(struct seq_file *m, void *v, int is_pid)
>  {
> +	struct proc_maps_private *priv = m->private;
>  	struct vm_area_struct *vma = v;
> -	struct mem_size_stats mss;
> +	struct mem_size_stats mss_stack;
> +	struct mem_size_stats *mss;
>  	struct mm_walk smaps_walk = {
>  		.pmd_entry = smaps_pte_range,
>  #ifdef CONFIG_HUGETLB_PAGE
>  		.hugetlb_entry = smaps_hugetlb_range,
>  #endif
>  		.mm = vma->vm_mm,
> -		.private = &mss,
>  	};
> +	int ret = 0;
> +	bool rollup_mode;
> +	bool last_vma;
> +
> +	if (priv->rollup) {
> +		rollup_mode = true;
> +		mss = priv->rollup;
> +		if (mss->first) {
> +			mss->first_vma_start = vma->vm_start;
> +			mss->first = false;
> +		}
> +		last_vma = !m_next_vma(priv, vma);
> +	} else {
> +		rollup_mode = false;
> +		memset(&mss_stack, 0, sizeof(mss_stack));
> +		mss = &mss_stack;
> +	}
>  
> -	memset(&mss, 0, sizeof mss);
> +	smaps_walk.private = mss;
>  
>  #ifdef CONFIG_SHMEM
>  	if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) {
> @@ -747,9 +776,9 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
>  
>  		if (!shmem_swapped || (vma->vm_flags & VM_SHARED) ||
>  					!(vma->vm_flags & VM_WRITE)) {
> -			mss.swap = shmem_swapped;
> +			mss->swap = shmem_swapped;
>  		} else {
> -			mss.check_shmem_swap = true;
> +			mss->check_shmem_swap = true;
>  			smaps_walk.pte_hole = smaps_pte_hole;
>  		}
>  	}
> @@ -757,54 +786,71 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
>  
>  	/* mmap_sem is held in m_start */
>  	walk_page_vma(vma, &smaps_walk);
> +	if (vma->vm_flags & VM_LOCKED)
> +		mss->pss_locked += mss->pss;
> +
> +	if (!rollup_mode) {
> +		show_map_vma(m, vma, is_pid);
> +	} else if (last_vma) {
> +		show_vma_header_prefix(
> +			m, mss->first_vma_start, vma->vm_end, 0, 0, 0, 0);
> +		seq_pad(m, ' ');
> +		seq_puts(m, "[rollup]\n");
> +	} else {
> +		ret = SEQ_SKIP;
> +	}
>  
> -	show_map_vma(m, vma, is_pid);
> -
> -	seq_printf(m,
> -		   "Size:           %8lu kB\n"
> -		   "Rss:            %8lu kB\n"
> -		   "Pss:            %8lu kB\n"
> -		   "Shared_Clean:   %8lu kB\n"
> -		   "Shared_Dirty:   %8lu kB\n"
> -		   "Private_Clean:  %8lu kB\n"
> -		   "Private_Dirty:  %8lu kB\n"
> -		   "Referenced:     %8lu kB\n"
> -		   "Anonymous:      %8lu kB\n"
> -		   "LazyFree:       %8lu kB\n"
> -		   "AnonHugePages:  %8lu kB\n"
> -		   "ShmemPmdMapped: %8lu kB\n"
> -		   "Shared_Hugetlb: %8lu kB\n"
> -		   "Private_Hugetlb: %7lu kB\n"
> -		   "Swap:           %8lu kB\n"
> -		   "SwapPss:        %8lu kB\n"
> -		   "KernelPageSize: %8lu kB\n"
> -		   "MMUPageSize:    %8lu kB\n"
> -		   "Locked:         %8lu kB\n",
> -		   (vma->vm_end - vma->vm_start) >> 10,
> -		   mss.resident >> 10,
> -		   (unsigned long)(mss.pss >> (10 + PSS_SHIFT)),
> -		   mss.shared_clean  >> 10,
> -		   mss.shared_dirty  >> 10,
> -		   mss.private_clean >> 10,
> -		   mss.private_dirty >> 10,
> -		   mss.referenced >> 10,
> -		   mss.anonymous >> 10,
> -		   mss.lazyfree >> 10,
> -		   mss.anonymous_thp >> 10,
> -		   mss.shmem_thp >> 10,
> -		   mss.shared_hugetlb >> 10,
> -		   mss.private_hugetlb >> 10,
> -		   mss.swap >> 10,
> -		   (unsigned long)(mss.swap_pss >> (10 + PSS_SHIFT)),
> -		   vma_kernel_pagesize(vma) >> 10,
> -		   vma_mmu_pagesize(vma) >> 10,
> -		   (vma->vm_flags & VM_LOCKED) ?
> -			(unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0);
> -
> -	arch_show_smap(m, vma);
> -	show_smap_vma_flags(m, vma);
> +	if (!rollup_mode)
> +		seq_printf(m,
> +			   "Size:           %8lu kB\n"
> +			   "KernelPageSize: %8lu kB\n"
> +			   "MMUPageSize:    %8lu kB\n",
> +			   (vma->vm_end - vma->vm_start) >> 10,
> +			   vma_kernel_pagesize(vma) >> 10,
> +			   vma_mmu_pagesize(vma) >> 10);
> +
> +
> +	if (!rollup_mode || last_vma)
> +		seq_printf(m,
> +			   "Rss:            %8lu kB\n"
> +			   "Pss:            %8lu kB\n"
> +			   "Shared_Clean:   %8lu kB\n"
> +			   "Shared_Dirty:   %8lu kB\n"
> +			   "Private_Clean:  %8lu kB\n"
> +			   "Private_Dirty:  %8lu kB\n"
> +			   "Referenced:     %8lu kB\n"
> +			   "Anonymous:      %8lu kB\n"
> +			   "LazyFree:       %8lu kB\n"
> +			   "AnonHugePages:  %8lu kB\n"
> +			   "ShmemPmdMapped: %8lu kB\n"
> +			   "Shared_Hugetlb: %8lu kB\n"
> +			   "Private_Hugetlb: %7lu kB\n"
> +			   "Swap:           %8lu kB\n"
> +			   "SwapPss:        %8lu kB\n"
> +			   "Locked:         %8lu kB\n",
> +			   mss->resident >> 10,
> +			   (unsigned long)(mss->pss >> (10 + PSS_SHIFT)),
> +			   mss->shared_clean  >> 10,
> +			   mss->shared_dirty  >> 10,
> +			   mss->private_clean >> 10,
> +			   mss->private_dirty >> 10,
> +			   mss->referenced >> 10,
> +			   mss->anonymous >> 10,
> +			   mss->lazyfree >> 10,
> +			   mss->anonymous_thp >> 10,
> +			   mss->shmem_thp >> 10,
> +			   mss->shared_hugetlb >> 10,
> +			   mss->private_hugetlb >> 10,
> +			   mss->swap >> 10,
> +			   (unsigned long)(mss->swap_pss >> (10 + PSS_SHIFT)),
> +			   (unsigned long)(mss->pss >> (10 + PSS_SHIFT)));
> +
> +	if (!rollup_mode) {
> +		arch_show_smap(m, vma);
> +		show_smap_vma_flags(m, vma);
> +	}
>  	m_cache_vma(m, vma);
> -	return 0;
> +	return ret;
>  }
>  
>  static int show_pid_smap(struct seq_file *m, void *v)
> @@ -836,6 +882,25 @@ static int pid_smaps_open(struct inode *inode, struct file *file)
>  	return do_maps_open(inode, file, &proc_pid_smaps_op);
>  }
>  
> +static int pid_smaps_rollup_open(struct inode *inode, struct file *file)
> +{
> +	struct seq_file *seq;
> +	struct proc_maps_private *priv;
> +	int ret = do_maps_open(inode, file, &proc_pid_smaps_op);
> +
> +	if (ret < 0)
> +		return ret;
> +	seq = file->private_data;
> +	priv = seq->private;
> +	priv->rollup = kzalloc(sizeof(*priv->rollup), GFP_KERNEL);
> +	if (!priv->rollup) {
> +		proc_map_release(inode, file);
> +		return -ENOMEM;
> +	}
> +	priv->rollup->first = true;
> +	return 0;
> +}
> +
>  static int tid_smaps_open(struct inode *inode, struct file *file)
>  {
>  	return do_maps_open(inode, file, &proc_tid_smaps_op);
> @@ -848,6 +913,13 @@ const struct file_operations proc_pid_smaps_operations = {
>  	.release	= proc_map_release,
>  };
>  
> +const struct file_operations proc_pid_smaps_rollup_operations = {
> +	.open		= pid_smaps_rollup_open,
> +	.read		= seq_read,
> +	.llseek		= seq_lseek,
> +	.release	= proc_map_release,
> +};
> +
>  const struct file_operations proc_tid_smaps_operations = {
>  	.open		= tid_smaps_open,
>  	.read		= seq_read,



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux