I want to have the page be protected by page lock between page_mkwrite notification to the filesystem, and the actual setting of the page dirty. Do this by holding the page lock over page_mkwrite, and keep it held until after set_page_dirty. I need this in fsblock because I am working to ensure filesystem metadata can be correctly allocated and refcounted. This means that page cleaning should not require memory allocation (to be really robust). Without this patch, then for example we could get a concurrent writeout after the page_mkwrite (which allocates page metadata required to clean it), but before the set_page_dirty. The writeout will clean the page and notice that the metadata is now unused and may be deallocated (because it appears clean as set_page_dirty hasn't been called yet). So at this point the page may be dirtied via the pte without enough metadata to be able to write it back. --- fs/buffer.c | 11 +++++------ mm/memory.c | 51 ++++++++++++++++++++++++++++----------------------- 2 files changed, 33 insertions(+), 29 deletions(-) Index: linux-2.6/fs/buffer.c =================================================================== --- linux-2.6.orig/fs/buffer.c +++ linux-2.6/fs/buffer.c @@ -2474,12 +2474,12 @@ block_page_mkwrite(struct vm_area_struct loff_t size; int ret = -EINVAL; - lock_page(page); + BUG_ON(page->mapping != inode->i_mapping); + size = i_size_read(inode); - if ((page->mapping != inode->i_mapping) || - (page_offset(page) > size)) { + if (page_offset(page) > size) { /* page got truncated out from underneath us */ - goto out_unlock; + goto out; } /* page is wholly or partially inside EOF */ @@ -2492,8 +2492,7 @@ block_page_mkwrite(struct vm_area_struct if (!ret) ret = block_commit_write(page, 0, end); -out_unlock: - unlock_page(page); +out: return ret; } Index: linux-2.6/mm/memory.c =================================================================== --- linux-2.6.orig/mm/memory.c +++ linux-2.6/mm/memory.c @@ -1948,9 +1948,16 @@ static int do_wp_page(struct mm_struct * */ page_cache_get(old_page); pte_unmap_unlock(page_table, ptl); + lock_page(old_page); + if (!old_page->mapping) { + ret = 0; + goto out_error; + } - if (vma->vm_ops->page_mkwrite(vma, old_page) < 0) - goto unwritable_page; + if (vma->vm_ops->page_mkwrite(vma, old_page) < 0) { + ret = VM_FAULT_SIGBUS; + goto out_error; + } /* * Since we dropped the lock we need to revalidate @@ -1960,9 +1967,11 @@ static int do_wp_page(struct mm_struct * */ page_table = pte_offset_map_lock(mm, pmd, address, &ptl); - page_cache_release(old_page); - if (!pte_same(*page_table, orig_pte)) + if (!pte_same(*page_table, orig_pte)) { + unlock_page(old_page); + page_cache_release(old_page); goto unlock; + } page_mkwrite = 1; } @@ -2085,19 +2094,30 @@ unlock: * * do_no_page is protected similarly. */ - wait_on_page_locked(dirty_page); + if (!page_mkwrite) + wait_on_page_locked(dirty_page); set_page_dirty_balance(dirty_page, page_mkwrite); put_page(dirty_page); + if (page_mkwrite) { + unlock_page(old_page); + page_cache_release(old_page); + } } return ret; oom_free_new: page_cache_release(new_page); oom: - if (old_page) + if (old_page) { + if (page_mkwrite) { + unlock_page(old_page); + page_cache_release(old_page); + } page_cache_release(old_page); + } return VM_FAULT_OOM; -unwritable_page: +out_error: + unlock_page(old_page); page_cache_release(old_page); return VM_FAULT_SIGBUS; } @@ -2643,23 +2663,9 @@ static int __do_fault(struct mm_struct * * to become writable */ if (vma->vm_ops->page_mkwrite) { - unlock_page(page); if (vma->vm_ops->page_mkwrite(vma, page) < 0) { ret = VM_FAULT_SIGBUS; anon = 1; /* no anon but release vmf.page */ - goto out_unlocked; - } - lock_page(page); - /* - * XXX: this is not quite right (racy vs - * invalidate) to unlock and relock the page - * like this, however a better fix requires - * reworking page_mkwrite locking API, which - * is better done later. - */ - if (!page->mapping) { - ret = 0; - anon = 1; /* no anon but release vmf.page */ goto out; } page_mkwrite = 1; @@ -2713,8 +2719,6 @@ static int __do_fault(struct mm_struct * pte_unmap_unlock(page_table, ptl); out: - unlock_page(vmf.page); -out_unlocked: if (anon) page_cache_release(vmf.page); else if (dirty_page) { @@ -2724,6 +2728,7 @@ out_unlocked: set_page_dirty_balance(dirty_page, page_mkwrite); put_page(dirty_page); } + unlock_page(vmf.page); return ret; } -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html