On 12/8/20 7:34 PM, Jason Gunthorpe wrote: >> @@ -274,6 +291,7 @@ void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, >> bool make_dirty) >> { >> unsigned long index; >> + int refs = 1; >> >> /* >> * TODO: this can be optimized for huge pages: if a series of pages is >> @@ -286,8 +304,9 @@ void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, >> return; >> } >> >> - for (index = 0; index < npages; index++) { >> + for (index = 0; index < npages; index += refs) { >> struct page *page = compound_head(pages[index]); >> + > > I think this is really hard to read, it should end up as some: > > for_each_compond_head(page_list, page_list_len, &head, &ntails) { > if (!PageDirty(head)) > set_page_dirty_lock(head, ntails); > unpin_user_page(head, ntails); > } > > And maybe you open code that iteration, but that basic idea to find a > compound_head and ntails should be computational work performed. > > No reason not to fix set_page_dirty_lock() too while you are here. > The wack of atomics you mentioned earlier you referred to, I suppose it ends being account_page_dirtied(). See partial diff at the end. I was looking at the latter part and renaming all the fs that supply set_page_dirty()... But now my concern is whether it's really safe to assume that filesystems that supply it ... have indeed the ability to dirty @ntails pages. Functionally, fixing set_page_dirty_lock() means we don't call set_page_dirty(head) @ntails times as it happens today, we would only call once with ntails as argument. Perhaps the safest thing to do is still to iterate over @ntails and call .set_page_dirty(page) and instead introduce a set_page_range_dirty() which individual filesystems can separately supply and give precedence of ->set_page_range_dirty() as opposed to ->set_page_dirty() ? Joao --------------------->8------------------------------ diff --git a/mm/gup.c b/mm/gup.c index 41ab3d48e1bb..5f8a0f16ab62 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -295,7 +295,7 @@ void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, * next writeback cycle. This is harmless. */ if (!PageDirty(head)) - set_page_dirty_lock(head); + set_page_range_dirty_lock(head, ntails); put_compound_head(head, ntails, FOLL_PIN); } } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 088729ea80b2..4642d037f657 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2417,7 +2417,8 @@ int __set_page_dirty_no_writeback(struct page *page, unsigned int ntails) * * NOTE: This relies on being atomic wrt interrupts. */ -void account_page_dirtied(struct page *page, struct address_space *mapping) +void account_page_dirtied(struct page *page, struct address_space *mapping, + unsigned int ntails) { struct inode *inode = mapping->host; @@ -2425,17 +2426,18 @@ void account_page_dirtied(struct page *page, struct address_space *mapping) if (mapping_can_writeback(mapping)) { struct bdi_writeback *wb; + int nr = ntails + 1; inode_attach_wb(inode, page); wb = inode_to_wb(inode); - __inc_lruvec_page_state(page, NR_FILE_DIRTY); - __inc_zone_page_state(page, NR_ZONE_WRITE_PENDING); - __inc_node_page_state(page, NR_DIRTIED); - inc_wb_stat(wb, WB_RECLAIMABLE); - inc_wb_stat(wb, WB_DIRTIED); - task_io_account_write(PAGE_SIZE); - current->nr_dirtied++; + mod_lruvec_page_state(page, NR_FILE_DIRTY, nr); + mod_zone_page_state(page_zone(page), NR_ZONE_WRITE_PENDING, nr); + mod_node_page_state(page_pgdat(page), NR_DIRTIED, nr); + __add_wb_stat(wb, WB_RECLAIMABLE, nr); + __add_wb_stat(wb, WB_DIRTIED, nr); + task_io_account_write(nr * PAGE_SIZE); + current->nr_dirtied += nr; this_cpu_inc(bdp_ratelimits); mem_cgroup_track_foreign_dirty(page, wb); @@ -2485,7 +2487,7 @@ int __set_page_dirty_nobuffers(struct page *page, unsigned int ntails) xa_lock_irqsave(&mapping->i_pages, flags); BUG_ON(page_mapping(page) != mapping); WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page)); - account_page_dirtied(page, mapping); + account_page_dirtied(page, mapping, ntails); __xa_set_mark(&mapping->i_pages, page_index(page), PAGECACHE_TAG_DIRTY); xa_unlock_irqrestore(&mapping->i_pages, flags); @@ -2624,6 +2626,27 @@ int set_page_dirty_lock(struct page *page) } EXPORT_SYMBOL(set_page_dirty_lock); +/* + * set_page_range_dirty() is racy if the caller has no reference against + * page->mapping->host, and if the page is unlocked. This is because another + * CPU could truncate the page off the mapping and then free the mapping. + * + * Usually, the page _is_ locked, or the caller is a user-space process which + * holds a reference on the inode by having an open file. + * + * In other cases, the page should be locked before running set_page_range_dirty(). + */ +int set_page_range_dirty_lock(struct page *page, unsigned int ntails) +{ + int ret; + + lock_page(page); + ret = set_page_range_dirty(page, ntails); + unlock_page(page); + return ret; +} +EXPORT_SYMBOL(set_page_range_dirty_lock); +