On Tue, 2007-03-06 at 19:04 +0100, Miklos Szeredi wrote: > plain text document attachment (mmap_mtime.patch) > Index: linux/mm/rmap.c > =================================================================== > --- linux.orig/mm/rmap.c 2007-03-06 15:17:46.000000000 +0100 > +++ linux/mm/rmap.c 2007-03-06 15:17:46.000000000 +0100 > @@ -498,6 +498,43 @@ int page_mkclean(struct page *page) > } > > /** > + * is_page_modified - check and clear the dirty bit for all mappings of a page > + * @page: the page to check > + */ > +bool is_page_modified(struct page *page) > +{ > + struct address_space *mapping = page->mapping; > + pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); > + struct vm_area_struct *vma; > + struct prio_tree_iter iter; > + bool modified = false; > + > + BUG_ON(!mapping); > + BUG_ON(!page_mapped(page)); > + > + spin_lock(&mapping->i_mmap_lock); > + vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { > + if (vma->vm_flags & VM_SHARED) { > + struct mm_struct *mm = vma->vm_mm; > + unsigned long addr = vma_address(page, vma); > + pte_t *pte; > + spinlock_t *ptl; > + > + if (addr != -EFAULT && > + (pte = page_check_address(page, mm, addr, &ptl))) { > + if (ptep_clear_flush_dirty(vma, addr, pte)) > + modified = true; > + pte_unmap_unlock(pte, ptl); > + } > + } > + } > + spin_unlock(&mapping->i_mmap_lock); > + if (page_test_and_clear_dirty(page)) > + modified = 1; > + return modified; > +} > + > +/** > * page_set_anon_rmap - setup new anonymous rmap > * @page: the page to add the mapping to > * @vma: the vm area in which the mapping is added I'm not liking this, its not a constant operation as the name implies. And it style is a bit out of line with the rest of rmap. The thing it actually does is page_mkclean(), all it doesn't do is setting the pte read-only. I can understand you wanting to avoid the overhead of the minor faults resulting from using page_mkclean(), but I'm not sure its worth it. > Index: linux/mm/msync.c > =================================================================== > --- linux.orig/mm/msync.c 2007-03-06 15:17:42.000000000 +0100 > +++ linux/mm/msync.c 2007-03-06 15:17:46.000000000 +0100 > @@ -12,6 +12,86 @@ > #include <linux/mman.h> > #include <linux/file.h> > #include <linux/syscalls.h> > +#include <linux/pagemap.h> > +#include <linux/rmap.h> > +#include <linux/pagevec.h> > + > +/* > + * Update ctime/mtime on msync(). > + * > + * POSIX requires, that the times are updated between a modification > + * of the file through a memory mapping and the next msync for a > + * region containing the modification. The wording implies that this > + * must be done even if the modification was through a different > + * address space. Ugh. > + * > + * Non-linear vmas are to hard to handle and they are non-standard > + * anyway, so they are ignored for now. > + * > + * The "file modified" info is collected from two places: > + * > + * - AS_CMTIME flag of the mapping > + * - the dirty bit of the ptes > + * > + * For memory backed filesystems all the pages in the range need to be > + * examined. For non-memory backed filesystems it is enough to look > + * at the pages with the dirty tag. > + */ > +static void msync_update_file_time(struct vm_area_struct *vma, > + unsigned long start, unsigned long end) > +{ > + struct file *file = vma->vm_file; > + struct address_space *mapping = file->f_mapping; > + struct pagevec pvec; > + pgoff_t index; > + pgoff_t end_index; > + bool modified; > + > + if (!file || !(vma->vm_flags & VM_SHARED) || > + (vma->vm_flags & VM_NONLINEAR)) > + return; > + > + modified = test_and_clear_bit(AS_CMTIME, &mapping->flags); > + > + pagevec_init(&pvec, 0); > + index = linear_page_index(vma, start); > + end_index = linear_page_index(vma, end); > + while (index < end_index) { > + int i; > + struct address_space *mapping = file->f_mapping; > + int nr_pages = min(end_index - index, (pgoff_t) PAGEVEC_SIZE); > + > + if (mapping_cap_account_dirty(mapping)) > + nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, > + PAGECACHE_TAG_DIRTY, nr_pages); > + else > + nr_pages = pagevec_lookup(&pvec, mapping, index, > + nr_pages); > + if (!nr_pages) > + break; > + > + for (i = 0; i < nr_pages; i++) { > + struct page *page = pvec.pages[i]; > + > + /* Skip pages which are just being read */ > + if (!PageUptodate(page)) > + continue; > + > + lock_page(page); > + index = page->index + 1; > + if (page->mapping == mapping && > + is_page_modified(page)) { > + set_page_dirty(page); > + modified = true; > + } > + unlock_page(page); > + } > + pagevec_release(&pvec); > + } > + > + if (modified) > + file_update_time(file); > +} > > /* > * MS_SYNC syncs the entire file - including mappings. Something bothers me, although I'm not sure what yet.. - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html