IOMMU HW now supports updating a dirty bit in an entry when a DMA writes to the entry's VA range. iommufd has a uAPI to read and clear the dirty bits from the tables. This is a trivial recrusive descent algorithm unwound into a function call waterfall. The format needs a function to tell if a contiguous entry is dirty, and a function to clear a contiguous entry back to clean. FIXME: needs kunit testing Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx> --- drivers/iommu/generic_pt/iommu_pt.h | 63 +++++++++++++++++++++++++++++ include/linux/generic_pt/iommu.h | 22 ++++++++++ 2 files changed, 85 insertions(+) diff --git a/drivers/iommu/generic_pt/iommu_pt.h b/drivers/iommu/generic_pt/iommu_pt.h index 4fccdcd58d4ba6..79b0ecbdc1adf6 100644 --- a/drivers/iommu/generic_pt/iommu_pt.h +++ b/drivers/iommu/generic_pt/iommu_pt.h @@ -130,6 +130,64 @@ static phys_addr_t NS(iova_to_phys)(struct pt_iommu *iommu_table, return res; } +struct pt_iommu_dirty_args { + struct iommu_dirty_bitmap *dirty; + unsigned int flags; +}; + +/* FIXME this is a bit big on formats with contig.. */ +static __always_inline int +__do_read_and_clear_dirty(struct pt_range *range, void *arg, unsigned int level, + struct pt_table_p *table, pt_level_fn_t descend_fn) +{ + struct pt_state pts = pt_init(range, level, table); + struct pt_iommu_dirty_args *dirty = arg; + + for_each_pt_level_item(&pts) { + if (pts.type == PT_ENTRY_TABLE) + return pt_descend(&pts, arg, descend_fn); + if (pts.type == PT_ENTRY_EMPTY) + continue; + + if (!pt_entry_write_is_dirty(&pts)) + continue; + + /* FIXME we should probably do our own gathering? */ + iommu_dirty_bitmap_record(dirty->dirty, range->va, + log2_to_int(pt_entry_oa_lg2sz(&pts))); + if (!(dirty->flags & IOMMU_DIRTY_NO_CLEAR)) { + /* + * No write log required because DMA incoherence and + * atomic dirty tracking bits can't work together + */ + pt_entry_set_write_clean(&pts); + } + break; + } + return 0; +} +PT_MAKE_LEVELS(__read_and_clear_dirty, __do_read_and_clear_dirty); + +static int __maybe_unused NS(read_and_clear_dirty)( + struct pt_iommu *iommu_table, dma_addr_t iova, dma_addr_t len, + unsigned long flags, struct iommu_dirty_bitmap *dirty_bitmap) +{ + struct pt_iommu_dirty_args dirty = { + .dirty = dirty_bitmap, + .flags = flags, + }; + struct pt_range range; + int ret; + + ret = make_range(common_from_iommu(iommu_table), &range, iova, len); + if (ret) + return ret; + + ret = pt_walk_range(&range, __read_and_clear_dirty, &dirty); + PT_WARN_ON(ret); + return ret; +} + struct pt_iommu_collect_args { struct pt_radix_list_head free_list; u8 ignore_mapped : 1; @@ -887,6 +945,9 @@ static const struct pt_iommu_ops NS(ops) = { .unmap_pages = NS(unmap_pages), .iova_to_phys = NS(iova_to_phys), .cut_mapping = NS(cut_mapping), +#if IS_ENABLED(CONFIG_IOMMUFD_DRIVER) && defined(pt_entry_write_is_dirty) + .read_and_clear_dirty = NS(read_and_clear_dirty), +#endif .get_info = NS(get_info), .deinit = NS(deinit), }; @@ -963,5 +1024,7 @@ EXPORT_SYMBOL_NS_GPL(pt_iommu_init, GENERIC_PT_IOMMU); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS(GENERIC_PT); +/* For iommu_dirty_bitmap_record() */ +MODULE_IMPORT_NS(IOMMUFD); #endif diff --git a/include/linux/generic_pt/iommu.h b/include/linux/generic_pt/iommu.h index d83f293209fa77..f77f6aef3f5958 100644 --- a/include/linux/generic_pt/iommu.h +++ b/include/linux/generic_pt/iommu.h @@ -10,6 +10,7 @@ struct iommu_iotlb_gather; struct pt_iommu_ops; +struct iommu_dirty_bitmap; /** * DOC: IOMMU Radix Page Table @@ -158,6 +159,27 @@ struct pt_iommu_ops { phys_addr_t (*iova_to_phys)(struct pt_iommu *iommu_table, dma_addr_t iova); + /** + * read_and_clear_dirty() - Manipulate the HW set write dirty state + * @iommu_table: Table to manipulate + * @iova: IO virtual address to start + * @size: Length of the IOVA + * @flags: A bitmap of IOMMU_DIRTY_NO_CLEAR + * + * Iterate over all the entries in the mapped range and record their + * write dirty status in iommu_dirty_bitmap. If IOMMU_DIRTY_NO_CLEAR is + * not specified then the entries will be left dirty, otherwise they are + * returned to being not write dirty. + * + * Context: The caller must hold a read range lock that includes @iova. + * + * Returns: -ERRNO on failure, 0 on success. + */ + int (*read_and_clear_dirty)(struct pt_iommu *iommu_table, + dma_addr_t iova, dma_addr_t len, + unsigned long flags, + struct iommu_dirty_bitmap *dirty_bitmap); + /** * get_info() - Return the pt_iommu_info structure * @iommu_table: Table to query -- 2.46.0