One more comment: On Sun 23-03-14 15:08:33, Matthew Wilcox wrote: > +static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, > + get_block_t get_block) > +{ > + struct file *file = vma->vm_file; > + struct inode *inode = file_inode(file); > + struct address_space *mapping = file->f_mapping; > + struct page *page; > + struct buffer_head bh; > + unsigned long vaddr = (unsigned long)vmf->virtual_address; > + sector_t block; > + pgoff_t size; > + unsigned long pfn; > + int error; > + int major = 0; > + > + size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; > + if (vmf->pgoff >= size) > + return VM_FAULT_SIGBUS; > + > + memset(&bh, 0, sizeof(bh)); > + block = (sector_t)vmf->pgoff << (PAGE_SHIFT - inode->i_blkbits); > + bh.b_size = PAGE_SIZE; > + > + repeat: > + page = find_get_page(mapping, vmf->pgoff); > + if (page) { > + if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) { > + page_cache_release(page); > + return VM_FAULT_RETRY; > + } > + if (unlikely(page->mapping != mapping)) { > + unlock_page(page); > + page_cache_release(page); > + goto repeat; > + } > + } > + > + error = get_block(inode, block, &bh, 0); > + if (error || bh.b_size < PAGE_SIZE) > + goto sigbus; > + > + if (!buffer_written(&bh) && !vmf->cow_page) { > + if (vmf->flags & FAULT_FLAG_WRITE) { > + error = get_block(inode, block, &bh, 1); > + count_vm_event(PGMAJFAULT); > + mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); > + major = VM_FAULT_MAJOR; > + if (error || bh.b_size < PAGE_SIZE) > + goto sigbus; > + } else { > + return dax_load_hole(mapping, page, vmf); > + } > + } > + > + /* Recheck i_size under i_mmap_mutex */ > + mutex_lock(&mapping->i_mmap_mutex); > + size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; > + if (unlikely(vmf->pgoff >= size)) { > + mutex_unlock(&mapping->i_mmap_mutex); > + goto sigbus; You need to release the block you've got from the filesystem in case of error here an below. Honza > + } > + if (vmf->cow_page) { > + if (buffer_written(&bh)) > + copy_user_bh(vmf->cow_page, inode, &bh, vaddr); > + else > + clear_user_highpage(vmf->cow_page, vaddr); > + if (page) { > + unlock_page(page); > + page_cache_release(page); > + } > + /* do_cow_fault() will release the i_mmap_mutex */ > + return VM_FAULT_COWED; > + } > + > + if (buffer_unwritten(&bh) || buffer_new(&bh)) > + dax_clear_blocks(inode, bh.b_blocknr, bh.b_size); > + > + error = dax_get_pfn(inode, &bh, &pfn); > + if (error > 0) > + error = vm_insert_mixed(vma, vaddr, pfn); > + mutex_unlock(&mapping->i_mmap_mutex); > + > + if (page) { > + delete_from_page_cache(page); > + unmap_mapping_range(mapping, vmf->pgoff << PAGE_SHIFT, > + PAGE_CACHE_SIZE, 0); > + unlock_page(page); > + page_cache_release(page); > + } > + > + if (error == -ENOMEM) > + return VM_FAULT_OOM; > + /* -EBUSY is fine, somebody else faulted on the same PTE */ > + if (error != -EBUSY) > + BUG_ON(error); > + return VM_FAULT_NOPAGE | major; > + > + sigbus: > + if (page) { > + unlock_page(page); > + page_cache_release(page); > + } > + return VM_FAULT_SIGBUS; > +} -- Jan Kara <jack@xxxxxxx> SUSE Labs, CR -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>