On Tue 02-04-19 14:15:20, Yafang Shao wrote: > We found that some latency spike was caused by page cache miss on our > database server. > So we decide to measure the page cache miss. > Currently the kernel is lack of this facility for measuring it. What are you going to use this information for? > This patch introduces a new vm counter PGCACHEMISS for this purpose. > This counter will be incremented in bellow scenario, > - page cache miss in generic file read routine > - read access page cache miss in mmap > - read access page cache miss in swapin > > NB, readahead routine is not counted because it won't stall the > application directly. Doesn't this partially open the side channel we have closed for mincore just recently? > Signed-off-by: Yafang Shao <laoar.shao@xxxxxxxxx> > --- > include/linux/pagemap.h | 7 +++++++ > include/linux/vm_event_item.h | 1 + > mm/filemap.c | 2 ++ > mm/memory.c | 1 + > mm/shmem.c | 9 +++++---- > mm/vmstat.c | 1 + > 6 files changed, 17 insertions(+), 4 deletions(-) > > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h > index f939e00..8355b51 100644 > --- a/include/linux/pagemap.h > +++ b/include/linux/pagemap.h > @@ -233,6 +233,13 @@ pgoff_t page_cache_next_miss(struct address_space *mapping, > pgoff_t page_cache_prev_miss(struct address_space *mapping, > pgoff_t index, unsigned long max_scan); > > +static inline void page_cache_read_miss(struct vm_fault *vmf) > +{ > + if (!vmf || (vmf->flags & (FAULT_FLAG_USER | FAULT_FLAG_WRITE)) == > + FAULT_FLAG_USER) > + count_vm_event(PGCACHEMISS); > +} > + > #define FGP_ACCESSED 0x00000001 > #define FGP_LOCK 0x00000002 > #define FGP_CREAT 0x00000004 > diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h > index 47a3441..d589f05 100644 > --- a/include/linux/vm_event_item.h > +++ b/include/linux/vm_event_item.h > @@ -29,6 +29,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, > PGFREE, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE, > PGFAULT, PGMAJFAULT, > PGLAZYFREED, > + PGCACHEMISS, > PGREFILL, > PGSTEAL_KSWAPD, > PGSTEAL_DIRECT, > diff --git a/mm/filemap.c b/mm/filemap.c > index 4157f85..fc12c2d 100644 > --- a/mm/filemap.c > +++ b/mm/filemap.c > @@ -2256,6 +2256,7 @@ static ssize_t generic_file_buffered_read(struct kiocb *iocb, > goto out; > > no_cached_page: > + page_cache_read_miss(NULL); > /* > * Ok, it wasn't cached, so we need to create a new > * page.. > @@ -2556,6 +2557,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) > fpin = do_async_mmap_readahead(vmf, page); > } else if (!page) { > /* No page in the page cache at all */ > + page_cache_read_miss(vmf); > count_vm_event(PGMAJFAULT); > count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT); > ret = VM_FAULT_MAJOR; > diff --git a/mm/memory.c b/mm/memory.c > index bd157f2..63bcd41 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -2754,6 +2754,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) > ret = VM_FAULT_MAJOR; > count_vm_event(PGMAJFAULT); > count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); > + page_cache_read_miss(vmf); > } else if (PageHWPoison(page)) { > /* > * hwpoisoned dirty swapcache pages are kept for killing > diff --git a/mm/shmem.c b/mm/shmem.c > index 3a4b74c..47e33a4 100644 > --- a/mm/shmem.c > +++ b/mm/shmem.c > @@ -127,7 +127,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, > static int shmem_swapin_page(struct inode *inode, pgoff_t index, > struct page **pagep, enum sgp_type sgp, > gfp_t gfp, struct vm_area_struct *vma, > - vm_fault_t *fault_type); > + struct vm_fault *vmf, vm_fault_t *fault_type); > static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, > struct page **pagep, enum sgp_type sgp, > gfp_t gfp, struct vm_area_struct *vma, > @@ -1159,7 +1159,7 @@ static int shmem_unuse_swap_entries(struct inode *inode, struct pagevec pvec, > error = shmem_swapin_page(inode, indices[i], > &page, SGP_CACHE, > mapping_gfp_mask(mapping), > - NULL, NULL); > + NULL, NULL, NULL); > if (error == 0) { > unlock_page(page); > put_page(page); > @@ -1614,7 +1614,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, > static int shmem_swapin_page(struct inode *inode, pgoff_t index, > struct page **pagep, enum sgp_type sgp, > gfp_t gfp, struct vm_area_struct *vma, > - vm_fault_t *fault_type) > + struct vm_fault *vmf, vm_fault_t *fault_type) > { > struct address_space *mapping = inode->i_mapping; > struct shmem_inode_info *info = SHMEM_I(inode); > @@ -1636,6 +1636,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, > *fault_type |= VM_FAULT_MAJOR; > count_vm_event(PGMAJFAULT); > count_memcg_event_mm(charge_mm, PGMAJFAULT); > + page_cache_read_miss(vmf); > } > /* Here we actually start the io */ > page = shmem_swapin(swap, gfp, info, index); > @@ -1758,7 +1759,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, > page = find_lock_entry(mapping, index); > if (xa_is_value(page)) { > error = shmem_swapin_page(inode, index, &page, > - sgp, gfp, vma, fault_type); > + sgp, gfp, vma, vmf, fault_type); > if (error == -EEXIST) > goto repeat; > > diff --git a/mm/vmstat.c b/mm/vmstat.c > index 36b56f8..c49ecba 100644 > --- a/mm/vmstat.c > +++ b/mm/vmstat.c > @@ -1188,6 +1188,7 @@ int fragmentation_index(struct zone *zone, unsigned int order) > "pgfault", > "pgmajfault", > "pglazyfreed", > + "pgcachemiss", > > "pgrefill", > "pgsteal_kswapd", > -- > 1.8.3.1 > -- Michal Hocko SUSE Labs