On Sat 23-12-17 16:56:38, Dan Williams wrote: > In preparation for examining the busy state of dax pages in the truncate > path, switch from sectors to pfns in the radix. > > Cc: Jan Kara <jack@xxxxxxx> > Cc: Jeff Moyer <jmoyer@xxxxxxxxxx> > Cc: Christoph Hellwig <hch@xxxxxx> > Cc: Matthew Wilcox <mawilcox@xxxxxxxxxxxxx> > Cc: Ross Zwisler <ross.zwisler@xxxxxxxxxxxxxxx> > Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> Looks good to me after comments are fixed as Ross asked. You can add: Reviewed-by: Jan Kara <jack@xxxxxxx> Honza > --- > drivers/dax/super.c | 15 ++++++++-- > fs/dax.c | 75 ++++++++++++++++++--------------------------------- > 2 files changed, 39 insertions(+), 51 deletions(-) > > diff --git a/drivers/dax/super.c b/drivers/dax/super.c > index 473af694ad1c..516124ae1ccf 100644 > --- a/drivers/dax/super.c > +++ b/drivers/dax/super.c > @@ -124,10 +124,19 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize) > return len < 0 ? len : -EIO; > } > > - if ((IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn)) > - || pfn_t_devmap(pfn)) > + if (IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn)) { > + /* > + * An arch that has enabled the pmem api should also > + * have its drivers support pfn_t_devmap() > + * > + * This is a developer warning and should not trigger in > + * production. dax_flush() will crash since it depends > + * on being able to do (page_address(pfn_to_page())). > + */ > + WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API)); > + } else if (pfn_t_devmap(pfn)) { > /* pass */; > - else { > + } else { > pr_debug("VFS (%s): error: dax support not enabled\n", > sb->s_id); > return -EOPNOTSUPP; > diff --git a/fs/dax.c b/fs/dax.c > index 78b72c48374e..54071cd27e8c 100644 > --- a/fs/dax.c > +++ b/fs/dax.c > @@ -72,16 +72,15 @@ fs_initcall(init_dax_wait_table); > #define RADIX_DAX_ZERO_PAGE (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2)) > #define RADIX_DAX_EMPTY (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3)) > > -static unsigned long dax_radix_sector(void *entry) > +static unsigned long dax_radix_pfn(void *entry) > { > return (unsigned long)entry >> RADIX_DAX_SHIFT; > } > > -static void *dax_radix_locked_entry(sector_t sector, unsigned long flags) > +static void *dax_radix_locked_entry(unsigned long pfn, unsigned long flags) > { > return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags | > - ((unsigned long)sector << RADIX_DAX_SHIFT) | > - RADIX_DAX_ENTRY_LOCK); > + (pfn << RADIX_DAX_SHIFT) | RADIX_DAX_ENTRY_LOCK); > } > > static unsigned int dax_radix_order(void *entry) > @@ -525,12 +524,13 @@ static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev, > */ > static void *dax_insert_mapping_entry(struct address_space *mapping, > struct vm_fault *vmf, > - void *entry, sector_t sector, > + void *entry, pfn_t pfn_t, > unsigned long flags, bool dirty) > { > struct radix_tree_root *page_tree = &mapping->page_tree; > - void *new_entry; > + unsigned long pfn = pfn_t_to_pfn(pfn_t); > pgoff_t index = vmf->pgoff; > + void *new_entry; > > if (dirty) > __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); > @@ -547,7 +547,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping, > } > > spin_lock_irq(&mapping->tree_lock); > - new_entry = dax_radix_locked_entry(sector, flags); > + new_entry = dax_radix_locked_entry(pfn, flags); > > if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { > /* > @@ -659,17 +659,14 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, > i_mmap_unlock_read(mapping); > } > > -static int dax_writeback_one(struct block_device *bdev, > - struct dax_device *dax_dev, struct address_space *mapping, > - pgoff_t index, void *entry) > +static int dax_writeback_one(struct dax_device *dax_dev, > + struct address_space *mapping, pgoff_t index, void *entry) > { > struct radix_tree_root *page_tree = &mapping->page_tree; > - void *entry2, **slot, *kaddr; > - long ret = 0, id; > - sector_t sector; > - pgoff_t pgoff; > + void *entry2, **slot; > + unsigned long pfn; > + long ret = 0; > size_t size; > - pfn_t pfn; > > /* > * A page got tagged dirty in DAX mapping? Something is seriously > @@ -688,7 +685,7 @@ static int dax_writeback_one(struct block_device *bdev, > * compare sectors as we must not bail out due to difference in lockbit > * or entry type. > */ > - if (dax_radix_sector(entry2) != dax_radix_sector(entry)) > + if (dax_radix_pfn(entry2) != dax_radix_pfn(entry)) > goto put_unlocked; > if (WARN_ON_ONCE(dax_is_empty_entry(entry) || > dax_is_zero_entry(entry))) { > @@ -718,29 +715,11 @@ static int dax_writeback_one(struct block_device *bdev, > * 'entry'. This allows us to flush for PMD_SIZE and not have to > * worry about partial PMD writebacks. > */ > - sector = dax_radix_sector(entry); > + pfn = dax_radix_pfn(entry); > size = PAGE_SIZE << dax_radix_order(entry); > > - id = dax_read_lock(); > - ret = bdev_dax_pgoff(bdev, sector, size, &pgoff); > - if (ret) > - goto dax_unlock; > - > - /* > - * dax_direct_access() may sleep, so cannot hold tree_lock over > - * its invocation. > - */ > - ret = dax_direct_access(dax_dev, pgoff, size / PAGE_SIZE, &kaddr, &pfn); > - if (ret < 0) > - goto dax_unlock; > - > - if (WARN_ON_ONCE(ret < size / PAGE_SIZE)) { > - ret = -EIO; > - goto dax_unlock; > - } > - > - dax_mapping_entry_mkclean(mapping, index, pfn_t_to_pfn(pfn)); > - dax_flush(dax_dev, kaddr, size); > + dax_mapping_entry_mkclean(mapping, index, pfn); > + dax_flush(dax_dev, page_address(pfn_to_page(pfn)), size); > /* > * After we have flushed the cache, we can clear the dirty tag. There > * cannot be new dirty data in the pfn after the flush has completed as > @@ -751,8 +730,6 @@ static int dax_writeback_one(struct block_device *bdev, > radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_DIRTY); > spin_unlock_irq(&mapping->tree_lock); > trace_dax_writeback_one(mapping->host, index, size >> PAGE_SHIFT); > - dax_unlock: > - dax_read_unlock(id); > put_locked_mapping_entry(mapping, index); > return ret; > > @@ -810,8 +787,8 @@ int dax_writeback_mapping_range(struct address_space *mapping, > break; > } > > - ret = dax_writeback_one(bdev, dax_dev, mapping, > - indices[i], pvec.pages[i]); > + ret = dax_writeback_one(dax_dev, mapping, indices[i], > + pvec.pages[i]); > if (ret < 0) { > mapping_set_error(mapping, ret); > goto out; > @@ -879,6 +856,7 @@ static int dax_load_hole(struct address_space *mapping, void *entry, > int ret = VM_FAULT_NOPAGE; > struct page *zero_page; > void *entry2; > + pfn_t pfn; > > zero_page = ZERO_PAGE(0); > if (unlikely(!zero_page)) { > @@ -886,14 +864,15 @@ static int dax_load_hole(struct address_space *mapping, void *entry, > goto out; > } > > - entry2 = dax_insert_mapping_entry(mapping, vmf, entry, 0, > + pfn = page_to_pfn_t(zero_page); > + entry2 = dax_insert_mapping_entry(mapping, vmf, entry, pfn, > RADIX_DAX_ZERO_PAGE, false); > if (IS_ERR(entry2)) { > ret = VM_FAULT_SIGBUS; > goto out; > } > > - vm_insert_mixed(vmf->vma, vaddr, page_to_pfn_t(zero_page)); > + vm_insert_mixed(vmf->vma, vaddr, pfn); > out: > trace_dax_load_hole(inode, vmf, ret); > return ret; > @@ -1200,8 +1179,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, > if (error < 0) > goto error_finish_iomap; > > - entry = dax_insert_mapping_entry(mapping, vmf, entry, > - dax_iomap_sector(&iomap, pos), > + entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn, > 0, write && !sync); > if (IS_ERR(entry)) { > error = PTR_ERR(entry); > @@ -1286,13 +1264,15 @@ static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap, > void *ret = NULL; > spinlock_t *ptl; > pmd_t pmd_entry; > + pfn_t pfn; > > zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm); > > if (unlikely(!zero_page)) > goto fallback; > > - ret = dax_insert_mapping_entry(mapping, vmf, entry, 0, > + pfn = page_to_pfn_t(zero_page); > + ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn, > RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false); > if (IS_ERR(ret)) > goto fallback; > @@ -1415,8 +1395,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, > if (error < 0) > goto finish_iomap; > > - entry = dax_insert_mapping_entry(mapping, vmf, entry, > - dax_iomap_sector(&iomap, pos), > + entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn, > RADIX_DAX_PMD, write && !sync); > if (IS_ERR(entry)) > goto finish_iomap; > -- Jan Kara <jack@xxxxxxxx> SUSE Labs, CR