From: Song Liu <songliubraving@xxxxxx> Subject: mm,thp: avoid writes to file with THP in pagecache In previous patch, an application could put part of its text section in THP via madvise(). These THPs will be protected from writes when the application is still running (TXTBSY). However, after the application exits, the file is available for writes. This patch avoids writes to file THP by dropping page cache for the file when the file is open for write. A new counter nr_thps is added to struct address_space. In do_dentry_open(), if the file is open for write and nr_thps is non-zero, we drop page cache for the whole file. Link: http://lkml.kernel.org/r/20190801184244.3169074-8-songliubraving@xxxxxx Signed-off-by: Song Liu <songliubraving@xxxxxx> Reported-by: kbuild test robot <lkp@xxxxxxxxx> Acked-by: Rik van Riel <riel@xxxxxxxxxxx> Acked-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> Acked-by: Johannes Weiner <hannes@xxxxxxxxxxx> Cc: Hillf Danton <hdanton@xxxxxxxx> Cc: Hugh Dickins <hughd@xxxxxxxxxx> Cc: William Kucharski <william.kucharski@xxxxxxxxxx> Cc: Oleg Nesterov <oleg@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- fs/inode.c | 3 +++ fs/open.c | 8 ++++++++ include/linux/fs.h | 32 ++++++++++++++++++++++++++++++++ mm/filemap.c | 1 + mm/khugepaged.c | 4 +++- 5 files changed, 47 insertions(+), 1 deletion(-) --- a/fs/inode.c~mmthp-avoid-writes-to-file-with-thp-in-pagecache +++ a/fs/inode.c @@ -181,6 +181,9 @@ int inode_init_always(struct super_block mapping->flags = 0; mapping->wb_err = 0; atomic_set(&mapping->i_mmap_writable, 0); +#ifdef CONFIG_READ_ONLY_THP_FOR_FS + atomic_set(&mapping->nr_thps, 0); +#endif mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); mapping->private_data = NULL; mapping->writeback_index = 0; --- a/fs/open.c~mmthp-avoid-writes-to-file-with-thp-in-pagecache +++ a/fs/open.c @@ -818,6 +818,14 @@ static int do_dentry_open(struct file *f if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) return -EINVAL; } + + /* + * XXX: Huge page cache doesn't support writing yet. Drop all page + * cache for this file before processing writes. + */ + if ((f->f_mode & FMODE_WRITE) && filemap_nr_thps(inode->i_mapping)) + truncate_pagecache(inode, 0); + return 0; cleanup_all: --- a/include/linux/fs.h~mmthp-avoid-writes-to-file-with-thp-in-pagecache +++ a/include/linux/fs.h @@ -429,6 +429,7 @@ int pagecache_write_end(struct file *, s * @i_pages: Cached pages. * @gfp_mask: Memory allocation flags to use for allocating pages. * @i_mmap_writable: Number of VM_SHARED mappings. + * @nr_thps: Number of THPs in the pagecache (non-shmem only). * @i_mmap: Tree of private and shared mappings. * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable. * @nrpages: Number of page entries, protected by the i_pages lock. @@ -446,6 +447,10 @@ struct address_space { struct xarray i_pages; gfp_t gfp_mask; atomic_t i_mmap_writable; +#ifdef CONFIG_READ_ONLY_THP_FOR_FS + /* number of thp, only for non-shmem files */ + atomic_t nr_thps; +#endif struct rb_root_cached i_mmap; struct rw_semaphore i_mmap_rwsem; unsigned long nrpages; @@ -2798,6 +2803,33 @@ static inline errseq_t filemap_sample_wb return errseq_sample(&mapping->wb_err); } +static inline int filemap_nr_thps(struct address_space *mapping) +{ +#ifdef CONFIG_READ_ONLY_THP_FOR_FS + return atomic_read(&mapping->nr_thps); +#else + return 0; +#endif +} + +static inline void filemap_nr_thps_inc(struct address_space *mapping) +{ +#ifdef CONFIG_READ_ONLY_THP_FOR_FS + atomic_inc(&mapping->nr_thps); +#else + WARN_ON_ONCE(1); +#endif +} + +static inline void filemap_nr_thps_dec(struct address_space *mapping) +{ +#ifdef CONFIG_READ_ONLY_THP_FOR_FS + atomic_dec(&mapping->nr_thps); +#else + WARN_ON_ONCE(1); +#endif +} + extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync); extern int vfs_fsync(struct file *file, int datasync); --- a/mm/filemap.c~mmthp-avoid-writes-to-file-with-thp-in-pagecache +++ a/mm/filemap.c @@ -205,6 +205,7 @@ static void unaccount_page_cache_page(st __dec_node_page_state(page, NR_SHMEM_THPS); } else if (PageTransHuge(page)) { __dec_node_page_state(page, NR_FILE_THPS); + filemap_nr_thps_dec(mapping); } /* --- a/mm/khugepaged.c~mmthp-avoid-writes-to-file-with-thp-in-pagecache +++ a/mm/khugepaged.c @@ -1514,8 +1514,10 @@ out_unlock: if (is_shmem) __inc_node_page_state(new_page, NR_SHMEM_THPS); - else + else { __inc_node_page_state(new_page, NR_FILE_THPS); + filemap_nr_thps_inc(mapping); + } if (nr_none) { struct zone *zone = page_zone(new_page); _