Based on iomap per-block dirty and uptodate state track, add support for shmem_folio_state struct to track the uptodate state per-block for large folios (hugepages). Signed-off-by: Daniel Gomez <da.gomez@xxxxxxxxxxx> --- mm/shmem.c | 195 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 189 insertions(+), 6 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index d7c84ff62186..5980d7b94f65 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -131,6 +131,124 @@ struct shmem_options { #define SHMEM_SEEN_QUOTA 32 }; +/* + * Structure allocated for each folio to track per-block uptodate state. + * + * Like buffered-io iomap_folio_state struct but only for uptodate. + */ +struct shmem_folio_state { + spinlock_t state_lock; + unsigned long state[]; +}; + +static inline bool sfs_is_fully_uptodate(struct folio *folio) +{ + struct inode *inode = folio->mapping->host; + struct shmem_folio_state *sfs = folio->private; + + return bitmap_full(sfs->state, i_blocks_per_folio(inode, folio)); +} + +static inline bool sfs_is_block_uptodate(struct shmem_folio_state *sfs, + unsigned int block) +{ + return test_bit(block, sfs->state); +} + +/** + * sfs_get_last_block_uptodate - find the index of the last uptodate block + * within a specified range + * @folio: The folio + * @first: The starting block of the range to search + * @last: The ending block of the range to search + * + * Returns the index of the last uptodate block within the specified range. If + * a non uptodate block is found at the start, it returns UINT_MAX. + */ +static unsigned int sfs_get_last_block_uptodate(struct folio *folio, + unsigned int first, + unsigned int last) +{ + struct inode *inode = folio->mapping->host; + struct shmem_folio_state *sfs = folio->private; + unsigned int nr_blocks = i_blocks_per_folio(inode, folio); + unsigned int aux = find_next_zero_bit(sfs->state, nr_blocks, first); + + /* + * Exceed the range of possible last block and return UINT_MAX if a non + * uptodate block is found at the beginning of the scan. + */ + if (aux == first) + return UINT_MAX; + + return min_t(unsigned int, aux - 1, last); +} + +static void sfs_set_range_uptodate(struct folio *folio, + struct shmem_folio_state *sfs, size_t off, + size_t len) +{ + struct inode *inode = folio->mapping->host; + unsigned int first_blk = off >> inode->i_blkbits; + unsigned int last_blk = (off + len - 1) >> inode->i_blkbits; + unsigned int nr_blks = last_blk - first_blk + 1; + unsigned long flags; + + spin_lock_irqsave(&sfs->state_lock, flags); + bitmap_set(sfs->state, first_blk, nr_blks); + if (sfs_is_fully_uptodate(folio)) + folio_mark_uptodate(folio); + spin_unlock_irqrestore(&sfs->state_lock, flags); +} + +static struct shmem_folio_state *sfs_alloc(struct inode *inode, + struct folio *folio) +{ + struct shmem_folio_state *sfs = folio->private; + unsigned int nr_blocks = i_blocks_per_folio(inode, folio); + gfp_t gfp = GFP_KERNEL; + + if (sfs || nr_blocks <= 1) + return sfs; + + /* + * sfs->state tracks uptodate flag when the block size is smaller + * than the folio size. + */ + sfs = kzalloc(struct_size(sfs, state, BITS_TO_LONGS(nr_blocks)), gfp); + if (!sfs) + return sfs; + + spin_lock_init(&sfs->state_lock); + if (folio_test_uptodate(folio)) + bitmap_set(sfs->state, 0, nr_blocks); + folio_attach_private(folio, sfs); + + return sfs; +} + +static void sfs_free(struct folio *folio, bool force) +{ + if (!folio_test_private(folio)) + return; + + if (!force) + WARN_ON_ONCE(sfs_is_fully_uptodate(folio) != + folio_test_uptodate(folio)); + + kfree(folio_detach_private(folio)); +} + +static void shmem_set_range_uptodate(struct folio *folio, size_t off, + size_t len) +{ + struct shmem_folio_state *sfs = folio->private; + + if (sfs) + sfs_set_range_uptodate(folio, sfs, off, len); + else + folio_mark_uptodate(folio); +} #ifdef CONFIG_TMPFS static unsigned long shmem_default_max_blocks(void) { @@ -1487,7 +1605,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) } folio_zero_range(folio, 0, folio_size(folio)); flush_dcache_folio(folio); - folio_mark_uptodate(folio); + shmem_set_range_uptodate(folio, 0, folio_size(folio)); } swap = folio_alloc_swap(folio); @@ -1769,13 +1887,16 @@ static int shmem_replace_folio(struct folio **foliop, gfp_t gfp, if (!new) return -ENOMEM; + if (folio_get_private(old)) + folio_attach_private(new, folio_detach_private(old)); + folio_get(new); folio_copy(new, old); flush_dcache_folio(new); __folio_set_locked(new); __folio_set_swapbacked(new); - folio_mark_uptodate(new); + shmem_set_range_uptodate(new, 0, folio_size(new)); new->swap = entry; folio_set_swapcache(new); @@ -2060,6 +2181,12 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, alloced: alloced = true; + + if (!sfs_alloc(inode, folio) && folio_test_large(folio)) { + error = -ENOMEM; + goto unlock; + } + if (folio_test_pmd_mappable(folio) && DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) < folio_next_index(folio) - 1) { @@ -2101,7 +2228,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, for (i = 0; i < n; i++) clear_highpage(folio_page(folio, i)); flush_dcache_folio(folio); - folio_mark_uptodate(folio); + shmem_set_range_uptodate(folio, 0, folio_size(folio)); } /* Perhaps the file has been truncated since we checked */ @@ -2746,8 +2873,8 @@ shmem_write_end(struct file *file, struct address_space *mapping, folio_zero_segments(folio, 0, from, from + copied, folio_size(folio)); } - folio_mark_uptodate(folio); } + shmem_set_range_uptodate(folio, 0, folio_size(folio)); folio_mark_dirty(folio); folio_unlock(folio); folio_put(folio); @@ -2755,6 +2882,59 @@ shmem_write_end(struct file *file, struct address_space *mapping, return copied; } +static void shmem_invalidate_folio(struct folio *folio, size_t offset, + size_t len) +{ + /* + * If we're invalidating the entire folio, clear the dirty state + * from it and release it to avoid unnecessary buildup of the LRU. + */ + if (offset == 0 && len == folio_size(folio)) { + WARN_ON_ONCE(folio_test_writeback(folio)); + folio_cancel_dirty(folio); + sfs_free(folio, true); + } +} + +static bool shmem_release_folio(struct folio *folio, gfp_t gfp_flags) +{ + if (folio_test_dirty(folio) && !sfs_is_fully_uptodate(folio)) + return false; + + sfs_free(folio, false); + return true; +} + +/* + * shmem_is_partially_uptodate checks whether blocks within a folio are + * uptodate or not. + * + * Returns true if all blocks which correspond to the specified part + * of the folio are uptodate. + */ +static bool shmem_is_partially_uptodate(struct folio *folio, size_t from, + size_t count) +{ + struct shmem_folio_state *sfs = folio->private; + struct inode *inode = folio->mapping->host; + unsigned int first, last; + + if (!sfs) + return false; + + /* Caller's range may extend past the end of this folio */ + count = min(folio_size(folio) - from, count); + + /* First and last blocks in range within folio */ + first = from >> inode->i_blkbits; + last = (from + count - 1) >> inode->i_blkbits; + + if (sfs_get_last_block_uptodate(folio, first, last) != last) + return false; + + return true; +} + static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; @@ -3506,7 +3686,7 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir, inode->i_mapping->a_ops = &shmem_aops; inode->i_op = &shmem_symlink_inode_operations; memcpy(folio_address(folio), symname, len); - folio_mark_uptodate(folio); + shmem_set_range_uptodate(folio, 0, folio_size(folio)); folio_mark_dirty(folio); folio_unlock(folio); folio_put(folio); @@ -4476,7 +4656,10 @@ const struct address_space_operations shmem_aops = { #ifdef CONFIG_MIGRATION .migrate_folio = migrate_folio, #endif - .error_remove_folio = shmem_error_remove_folio, + .error_remove_folio = shmem_error_remove_folio, + .invalidate_folio = shmem_invalidate_folio, + .release_folio = shmem_release_folio, + .is_partially_uptodate = shmem_is_partially_uptodate, }; EXPORT_SYMBOL(shmem_aops); -- 2.43.0