On Sat, Nov 19, 2011 at 12:58 AM, Mel Gorman <mgorman@xxxxxxx> wrote: > Asynchronous compaction is when allocating transparent hugepages to > avoid blocking for long periods of time. Due to reports of stalling, > synchronous compaction is never used but this impacts allocation > success rates. When deciding whether to migrate dirty pages, the > following check is made > > if (PageDirty(page) && !sync && > mapping->a_ops->migratepage != migrate_page) > rc = -EBUSY; > > This skips over all pages using buffer_migrate_page() even though > it is possible to migrate some of these pages without blocking. This > patch updates the ->migratepage callback with a "sync" parameter. It > is the resposibility of the callback to gracefully fail migration of > the page if it cannot be achieved without blocking. > > Signed-off-by: Mel Gorman <mgorman@xxxxxxx> > --- > fs/btrfs/disk-io.c | 2 +- > fs/nfs/internal.h | 2 +- > fs/nfs/write.c | 4 +- > include/linux/fs.h | 9 +++- > include/linux/migrate.h | 2 +- > mm/migrate.c | 106 ++++++++++++++++++++++++++++++++--------------- > 6 files changed, 83 insertions(+), 42 deletions(-) > > diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c > index 62afe5c..f841f00 100644 > --- a/fs/btrfs/disk-io.c > +++ b/fs/btrfs/disk-io.c > @@ -872,7 +872,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, > > #ifdef CONFIG_MIGRATION > static int btree_migratepage(struct address_space *mapping, > - struct page *newpage, struct page *page) > + struct page *newpage, struct page *page, bool sync) > { > /* > * we can't safely write a btree page from here, > diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h > index c1a1bd8..d0c460f 100644 > --- a/fs/nfs/internal.h > +++ b/fs/nfs/internal.h > @@ -328,7 +328,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data); > > #ifdef CONFIG_MIGRATION > extern int nfs_migrate_page(struct address_space *, > - struct page *, struct page *); > + struct page *, struct page *, bool); > #else > #define nfs_migrate_page NULL > #endif > diff --git a/fs/nfs/write.c b/fs/nfs/write.c > index 1dda78d..33475df 100644 > --- a/fs/nfs/write.c > +++ b/fs/nfs/write.c > @@ -1711,7 +1711,7 @@ out_error: > > #ifdef CONFIG_MIGRATION > int nfs_migrate_page(struct address_space *mapping, struct page *newpage, > - struct page *page) > + struct page *page, bool sync) > { > /* > * If PagePrivate is set, then the page is currently associated with > @@ -1726,7 +1726,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, > > nfs_fscache_release_page(page, GFP_KERNEL); > > - return migrate_page(mapping, newpage, page); > + return migrate_page(mapping, newpage, page, sync); > } > #endif > > diff --git a/include/linux/fs.h b/include/linux/fs.h > index 0c4df26..67f8e46 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -609,9 +609,12 @@ struct address_space_operations { > loff_t offset, unsigned long nr_segs); > int (*get_xip_mem)(struct address_space *, pgoff_t, int, > void **, unsigned long *); > - /* migrate the contents of a page to the specified target */ > + /* > + * migrate the contents of a page to the specified target. If sync > + * is false, it must not block. If it needs to block, return -EBUSY > + */ > int (*migratepage) (struct address_space *, > - struct page *, struct page *); > + struct page *, struct page *, bool); > int (*launder_page) (struct page *); > int (*is_partially_uptodate) (struct page *, read_descriptor_t *, > unsigned long); > @@ -2577,7 +2580,7 @@ extern int generic_check_addressable(unsigned, u64); > > #ifdef CONFIG_MIGRATION > extern int buffer_migrate_page(struct address_space *, > - struct page *, struct page *); > + struct page *, struct page *, bool); > #else > #define buffer_migrate_page NULL > #endif > diff --git a/include/linux/migrate.h b/include/linux/migrate.h > index e39aeec..14e6d2a 100644 > --- a/include/linux/migrate.h > +++ b/include/linux/migrate.h > @@ -11,7 +11,7 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **); > > extern void putback_lru_pages(struct list_head *l); > extern int migrate_page(struct address_space *, > - struct page *, struct page *); > + struct page *, struct page *, bool); > extern int migrate_pages(struct list_head *l, new_page_t x, > unsigned long private, bool offlining, > bool sync); > diff --git a/mm/migrate.c b/mm/migrate.c > index 578e291..8395697 100644 > --- a/mm/migrate.c > +++ b/mm/migrate.c > @@ -415,7 +415,7 @@ EXPORT_SYMBOL(fail_migrate_page); > * Pages are locked upon entry and exit. > */ > int migrate_page(struct address_space *mapping, > - struct page *newpage, struct page *page) > + struct page *newpage, struct page *page, bool sync) > { > int rc; > > @@ -432,19 +432,60 @@ int migrate_page(struct address_space *mapping, > EXPORT_SYMBOL(migrate_page); > > #ifdef CONFIG_BLOCK > + > +/* Returns true if all buffers are successfully locked */ > +bool buffer_migrate_lock_buffers(struct buffer_head *head, bool sync) > +{ > + struct buffer_head *bh = head; > + > + /* Simple case, sync compaction */ > + if (sync) { > + do { > + get_bh(bh); > + lock_buffer(bh); > + bh = bh->b_this_page; > + > + } while (bh != head); > + > + return true; > + } > + > + /* async case, we cannot block on lock_buffer so use trylock_buffer */ > + do { > + get_bh(bh); > + if (!trylock_buffer(bh)) { > + /* > + * We failed to lock the buffer and cannot stall in > + * async migration. Release the taken locks > + */ > + struct buffer_head *failed_bh = bh; > + bh = head; > + do { > + unlock_buffer(bh); > + put_bh(bh); > + bh = bh->b_this_page; > + } while (bh != failed_bh); > + return false; > + } > + > + bh = bh->b_this_page; > + } while (bh != head); > + return true; > +} > + > /* > * Migration function for pages with buffers. This function can only be used > * if the underlying filesystem guarantees that no other references to "page" > * exist. > */ > int buffer_migrate_page(struct address_space *mapping, > - struct page *newpage, struct page *page) > + struct page *newpage, struct page *page, bool sync) > { > struct buffer_head *bh, *head; > int rc; > > if (!page_has_buffers(page)) > - return migrate_page(mapping, newpage, page); > + return migrate_page(mapping, newpage, page, sync); > > head = page_buffers(page); > > @@ -453,13 +494,18 @@ int buffer_migrate_page(struct address_space *mapping, > if (rc) > return rc; > > - bh = head; > - do { > - get_bh(bh); > - lock_buffer(bh); > - bh = bh->b_this_page; > - > - } while (bh != head); > + if (!buffer_migrate_lock_buffers(head, sync)) { > + /* > + * We have to revert the radix tree update. If this returns > + * non-zero, it either means that the page count changed > + * which "can't happen" or the slot changed from underneath > + * us in which case someone operated on a page that did not > + * have buffers fully migrated which is alarming so warn > + * that it happened. > + */ > + WARN_ON(migrate_page_move_mapping(mapping, page, newpage)); > + return -EBUSY; If this migrate_page_move_mapping() really fails, seems disk IO will be needed to bring the previously already cached page back, I wonder if we should make the double check for the two conditions of "page refs is ok " and "all bh trylocked" before doing radix_tree_replace_slot() ? which I think does not involve IO on the error path. Nai > + } > > ClearPagePrivate(page); > set_page_private(newpage, page_private(page)); > @@ -536,10 +582,13 @@ static int writeout(struct address_space *mapping, struct page *page) > * Default handling if a filesystem does not provide a migration function. > */ > static int fallback_migrate_page(struct address_space *mapping, > - struct page *newpage, struct page *page) > + struct page *newpage, struct page *page, bool sync) > { > - if (PageDirty(page)) > + if (PageDirty(page)) { > + if (!sync) > + return -EBUSY; > return writeout(mapping, page); > + } > > /* > * Buffers may be managed in a filesystem specific way. > @@ -549,7 +598,7 @@ static int fallback_migrate_page(struct address_space *mapping, > !try_to_release_page(page, GFP_KERNEL)) > return -EAGAIN; > > - return migrate_page(mapping, newpage, page); > + return migrate_page(mapping, newpage, page, sync); > } > > /* > @@ -585,29 +634,18 @@ static int move_to_new_page(struct page *newpage, struct page *page, > > mapping = page_mapping(page); > if (!mapping) > - rc = migrate_page(mapping, newpage, page); > - else { > + rc = migrate_page(mapping, newpage, page, sync); > + else if (mapping->a_ops->migratepage) > /* > - * Do not writeback pages if !sync and migratepage is > - * not pointing to migrate_page() which is nonblocking > - * (swapcache/tmpfs uses migratepage = migrate_page). > + * Most pages have a mapping and most filesystems provide a > + * migratepage callback. Anonymous pages are part of swap > + * space which also has its own migratepage callback. This > + * is the most common path for page migration. > */ > - if (PageDirty(page) && !sync && > - mapping->a_ops->migratepage != migrate_page) > - rc = -EBUSY; > - else if (mapping->a_ops->migratepage) > - /* > - * Most pages have a mapping and most filesystems > - * should provide a migration function. Anonymous > - * pages are part of swap space which also has its > - * own migration function. This is the most common > - * path for page migration. > - */ > - rc = mapping->a_ops->migratepage(mapping, > - newpage, page); > - else > - rc = fallback_migrate_page(mapping, newpage, page); > - } > + rc = mapping->a_ops->migratepage(mapping, > + newpage, page, sync); > + else > + rc = fallback_migrate_page(mapping, newpage, page, sync); > > if (rc) { > newpage->mapping = NULL; > -- > 1.7.3.4 > > -- > To unsubscribe, send a message with 'unsubscribe linux-mm' in > the body to majordomo@xxxxxxxxx. For more info on Linux MM, > see: http://www.linux-mm.org/ . > Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/ > Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a> > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/ Don't email: <a href