On Tue, Apr 2, 2019 at 3:23 PM Michal Hocko <mhocko@xxxxxxxx> wrote: > > On Tue 02-04-19 14:15:20, Yafang Shao wrote: > > We found that some latency spike was caused by page cache miss on our > > database server. > > So we decide to measure the page cache miss. > > Currently the kernel is lack of this facility for measuring it. > > What are you going to use this information for? > With this counter, we can monitor pgcachemiss per second and this can give us some informaton that whether the database performance issue is releated with pgcachemiss. For example, if this value increase suddently, it always cause latency spike. What's more, I also want to measure how long this page cache miss may cause, but this seems more complex to implement. > > This patch introduces a new vm counter PGCACHEMISS for this purpose. > > This counter will be incremented in bellow scenario, > > - page cache miss in generic file read routine > > - read access page cache miss in mmap > > - read access page cache miss in swapin > > > > NB, readahead routine is not counted because it won't stall the > > application directly. > > Doesn't this partially open the side channel we have closed for mincore > just recently? > Seems I missed this dicussion. Could you pls. give a reference to it? Thanks Yafang > > Signed-off-by: Yafang Shao <laoar.shao@xxxxxxxxx> > > --- > > include/linux/pagemap.h | 7 +++++++ > > include/linux/vm_event_item.h | 1 + > > mm/filemap.c | 2 ++ > > mm/memory.c | 1 + > > mm/shmem.c | 9 +++++---- > > mm/vmstat.c | 1 + > > 6 files changed, 17 insertions(+), 4 deletions(-) > > > > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h > > index f939e00..8355b51 100644 > > --- a/include/linux/pagemap.h > > +++ b/include/linux/pagemap.h > > @@ -233,6 +233,13 @@ pgoff_t page_cache_next_miss(struct address_space *mapping, > > pgoff_t page_cache_prev_miss(struct address_space *mapping, > > pgoff_t index, unsigned long max_scan); > > > > +static inline void page_cache_read_miss(struct vm_fault *vmf) > > +{ > > + if (!vmf || (vmf->flags & (FAULT_FLAG_USER | FAULT_FLAG_WRITE)) == > > + FAULT_FLAG_USER) > > + count_vm_event(PGCACHEMISS); > > +} > > + > > #define FGP_ACCESSED 0x00000001 > > #define FGP_LOCK 0x00000002 > > #define FGP_CREAT 0x00000004 > > diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h > > index 47a3441..d589f05 100644 > > --- a/include/linux/vm_event_item.h > > +++ b/include/linux/vm_event_item.h > > @@ -29,6 +29,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, > > PGFREE, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE, > > PGFAULT, PGMAJFAULT, > > PGLAZYFREED, > > + PGCACHEMISS, > > PGREFILL, > > PGSTEAL_KSWAPD, > > PGSTEAL_DIRECT, > > diff --git a/mm/filemap.c b/mm/filemap.c > > index 4157f85..fc12c2d 100644 > > --- a/mm/filemap.c > > +++ b/mm/filemap.c > > @@ -2256,6 +2256,7 @@ static ssize_t generic_file_buffered_read(struct kiocb *iocb, > > goto out; > > > > no_cached_page: > > + page_cache_read_miss(NULL); > > /* > > * Ok, it wasn't cached, so we need to create a new > > * page.. > > @@ -2556,6 +2557,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) > > fpin = do_async_mmap_readahead(vmf, page); > > } else if (!page) { > > /* No page in the page cache at all */ > > + page_cache_read_miss(vmf); > > count_vm_event(PGMAJFAULT); > > count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT); > > ret = VM_FAULT_MAJOR; > > diff --git a/mm/memory.c b/mm/memory.c > > index bd157f2..63bcd41 100644 > > --- a/mm/memory.c > > +++ b/mm/memory.c > > @@ -2754,6 +2754,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) > > ret = VM_FAULT_MAJOR; > > count_vm_event(PGMAJFAULT); > > count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); > > + page_cache_read_miss(vmf); > > } else if (PageHWPoison(page)) { > > /* > > * hwpoisoned dirty swapcache pages are kept for killing > > diff --git a/mm/shmem.c b/mm/shmem.c > > index 3a4b74c..47e33a4 100644 > > --- a/mm/shmem.c > > +++ b/mm/shmem.c > > @@ -127,7 +127,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, > > static int shmem_swapin_page(struct inode *inode, pgoff_t index, > > struct page **pagep, enum sgp_type sgp, > > gfp_t gfp, struct vm_area_struct *vma, > > - vm_fault_t *fault_type); > > + struct vm_fault *vmf, vm_fault_t *fault_type); > > static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, > > struct page **pagep, enum sgp_type sgp, > > gfp_t gfp, struct vm_area_struct *vma, > > @@ -1159,7 +1159,7 @@ static int shmem_unuse_swap_entries(struct inode *inode, struct pagevec pvec, > > error = shmem_swapin_page(inode, indices[i], > > &page, SGP_CACHE, > > mapping_gfp_mask(mapping), > > - NULL, NULL); > > + NULL, NULL, NULL); > > if (error == 0) { > > unlock_page(page); > > put_page(page); > > @@ -1614,7 +1614,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, > > static int shmem_swapin_page(struct inode *inode, pgoff_t index, > > struct page **pagep, enum sgp_type sgp, > > gfp_t gfp, struct vm_area_struct *vma, > > - vm_fault_t *fault_type) > > + struct vm_fault *vmf, vm_fault_t *fault_type) > > { > > struct address_space *mapping = inode->i_mapping; > > struct shmem_inode_info *info = SHMEM_I(inode); > > @@ -1636,6 +1636,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, > > *fault_type |= VM_FAULT_MAJOR; > > count_vm_event(PGMAJFAULT); > > count_memcg_event_mm(charge_mm, PGMAJFAULT); > > + page_cache_read_miss(vmf); > > } > > /* Here we actually start the io */ > > page = shmem_swapin(swap, gfp, info, index); > > @@ -1758,7 +1759,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, > > page = find_lock_entry(mapping, index); > > if (xa_is_value(page)) { > > error = shmem_swapin_page(inode, index, &page, > > - sgp, gfp, vma, fault_type); > > + sgp, gfp, vma, vmf, fault_type); > > if (error == -EEXIST) > > goto repeat; > > > > diff --git a/mm/vmstat.c b/mm/vmstat.c > > index 36b56f8..c49ecba 100644 > > --- a/mm/vmstat.c > > +++ b/mm/vmstat.c > > @@ -1188,6 +1188,7 @@ int fragmentation_index(struct zone *zone, unsigned int order) > > "pgfault", > > "pgmajfault", > > "pglazyfreed", > > + "pgcachemiss", > > > > "pgrefill", > > "pgsteal_kswapd", > > -- > > 1.8.3.1 > > > > -- > Michal Hocko > SUSE Labs