Re: [PATCH] mm: add vm event for page cache miss

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue 02-04-19 14:15:20, Yafang Shao wrote:
> We found that some latency spike was caused by page cache miss on our
> database server.
> So we decide to measure the page cache miss.
> Currently the kernel is lack of this facility for measuring it.

What are you going to use this information for?

> This patch introduces a new vm counter PGCACHEMISS for this purpose.
> This counter will be incremented in bellow scenario,
> - page cache miss in generic file read routine
> - read access page cache miss in mmap
> - read access page cache miss in swapin
>
> NB, readahead routine is not counted because it won't stall the
> application directly.

Doesn't this partially open the side channel we have closed for mincore
just recently?

> Signed-off-by: Yafang Shao <laoar.shao@xxxxxxxxx>
> ---
>  include/linux/pagemap.h       | 7 +++++++
>  include/linux/vm_event_item.h | 1 +
>  mm/filemap.c                  | 2 ++
>  mm/memory.c                   | 1 +
>  mm/shmem.c                    | 9 +++++----
>  mm/vmstat.c                   | 1 +
>  6 files changed, 17 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> index f939e00..8355b51 100644
> --- a/include/linux/pagemap.h
> +++ b/include/linux/pagemap.h
> @@ -233,6 +233,13 @@ pgoff_t page_cache_next_miss(struct address_space *mapping,
>  pgoff_t page_cache_prev_miss(struct address_space *mapping,
>  			     pgoff_t index, unsigned long max_scan);
>  
> +static inline void page_cache_read_miss(struct vm_fault *vmf)
> +{
> +	if (!vmf || (vmf->flags & (FAULT_FLAG_USER | FAULT_FLAG_WRITE)) ==
> +	    FAULT_FLAG_USER)
> +		count_vm_event(PGCACHEMISS);
> +}
> +
>  #define FGP_ACCESSED		0x00000001
>  #define FGP_LOCK		0x00000002
>  #define FGP_CREAT		0x00000004
> diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
> index 47a3441..d589f05 100644
> --- a/include/linux/vm_event_item.h
> +++ b/include/linux/vm_event_item.h
> @@ -29,6 +29,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
>  		PGFREE, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE,
>  		PGFAULT, PGMAJFAULT,
>  		PGLAZYFREED,
> +		PGCACHEMISS,
>  		PGREFILL,
>  		PGSTEAL_KSWAPD,
>  		PGSTEAL_DIRECT,
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 4157f85..fc12c2d 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -2256,6 +2256,7 @@ static ssize_t generic_file_buffered_read(struct kiocb *iocb,
>  		goto out;
>  
>  no_cached_page:
> +		page_cache_read_miss(NULL);
>  		/*
>  		 * Ok, it wasn't cached, so we need to create a new
>  		 * page..
> @@ -2556,6 +2557,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
>  		fpin = do_async_mmap_readahead(vmf, page);
>  	} else if (!page) {
>  		/* No page in the page cache at all */
> +		page_cache_read_miss(vmf);
>  		count_vm_event(PGMAJFAULT);
>  		count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
>  		ret = VM_FAULT_MAJOR;
> diff --git a/mm/memory.c b/mm/memory.c
> index bd157f2..63bcd41 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -2754,6 +2754,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
>  		ret = VM_FAULT_MAJOR;
>  		count_vm_event(PGMAJFAULT);
>  		count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
> +		page_cache_read_miss(vmf);
>  	} else if (PageHWPoison(page)) {
>  		/*
>  		 * hwpoisoned dirty swapcache pages are kept for killing
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 3a4b74c..47e33a4 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -127,7 +127,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
>  static int shmem_swapin_page(struct inode *inode, pgoff_t index,
>  			     struct page **pagep, enum sgp_type sgp,
>  			     gfp_t gfp, struct vm_area_struct *vma,
> -			     vm_fault_t *fault_type);
> +			     struct vm_fault *vmf, vm_fault_t *fault_type);
>  static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
>  		struct page **pagep, enum sgp_type sgp,
>  		gfp_t gfp, struct vm_area_struct *vma,
> @@ -1159,7 +1159,7 @@ static int shmem_unuse_swap_entries(struct inode *inode, struct pagevec pvec,
>  		error = shmem_swapin_page(inode, indices[i],
>  					  &page, SGP_CACHE,
>  					  mapping_gfp_mask(mapping),
> -					  NULL, NULL);
> +					  NULL, NULL, NULL);
>  		if (error == 0) {
>  			unlock_page(page);
>  			put_page(page);
> @@ -1614,7 +1614,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
>  static int shmem_swapin_page(struct inode *inode, pgoff_t index,
>  			     struct page **pagep, enum sgp_type sgp,
>  			     gfp_t gfp, struct vm_area_struct *vma,
> -			     vm_fault_t *fault_type)
> +			     struct vm_fault *vmf, vm_fault_t *fault_type)
>  {
>  	struct address_space *mapping = inode->i_mapping;
>  	struct shmem_inode_info *info = SHMEM_I(inode);
> @@ -1636,6 +1636,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
>  			*fault_type |= VM_FAULT_MAJOR;
>  			count_vm_event(PGMAJFAULT);
>  			count_memcg_event_mm(charge_mm, PGMAJFAULT);
> +			page_cache_read_miss(vmf);
>  		}
>  		/* Here we actually start the io */
>  		page = shmem_swapin(swap, gfp, info, index);
> @@ -1758,7 +1759,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
>  	page = find_lock_entry(mapping, index);
>  	if (xa_is_value(page)) {
>  		error = shmem_swapin_page(inode, index, &page,
> -					  sgp, gfp, vma, fault_type);
> +					  sgp, gfp, vma, vmf, fault_type);
>  		if (error == -EEXIST)
>  			goto repeat;
>  
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index 36b56f8..c49ecba 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -1188,6 +1188,7 @@ int fragmentation_index(struct zone *zone, unsigned int order)
>  	"pgfault",
>  	"pgmajfault",
>  	"pglazyfreed",
> +	"pgcachemiss",
>  
>  	"pgrefill",
>  	"pgsteal_kswapd",
> -- 
> 1.8.3.1
> 

-- 
Michal Hocko
SUSE Labs




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux