The kernel uapi/linux/iommu.h header file includes the extensions for vSVA support. e.g. bind gpasid, iommu fault report related user structures and etc. This commit updates kernel headers from the below branch: https://github.com/luxis1999/linux-vsva.git: vsva-linux-5.8-rc3-v4 Note: this should be replaced with a full header files update when the vSVA uPAPI is stable. TODO: add the note for the Linux version. Cc: Kevin Tian <kevin.tian@xxxxxxxxx> Cc: Jacob Pan <jacob.jun.pan@xxxxxxxxxxxxxxx> Cc: Peter Xu <peterx@xxxxxxxxxx> Cc: Yi Sun <yi.y.sun@xxxxxxxxxxxxxxx> Cc: Michael S. Tsirkin <mst@xxxxxxxxxx> Cc: Cornelia Huck <cohuck@xxxxxxxxxx> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> Signed-off-by: Liu Yi L <yi.l.liu@xxxxxxxxx> --- linux-headers/linux/iommu.h | 413 ++++++++++++++++++++++++++++++++++++++++++++ linux-headers/linux/vfio.h | 87 +++++++++- 2 files changed, 499 insertions(+), 1 deletion(-) create mode 100644 linux-headers/linux/iommu.h diff --git a/linux-headers/linux/iommu.h b/linux-headers/linux/iommu.h new file mode 100644 index 0000000..8bd12e2 --- /dev/null +++ b/linux-headers/linux/iommu.h @@ -0,0 +1,413 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * IOMMU user API definitions + */ + +#ifndef _IOMMU_H +#define _IOMMU_H + +#include <linux/types.h> + +#define IOMMU_FAULT_PERM_READ (1 << 0) /* read */ +#define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */ +#define IOMMU_FAULT_PERM_EXEC (1 << 2) /* exec */ +#define IOMMU_FAULT_PERM_PRIV (1 << 3) /* privileged */ + +/* Generic fault types, can be expanded IRQ remapping fault */ +enum iommu_fault_type { + IOMMU_FAULT_DMA_UNRECOV = 1, /* unrecoverable fault */ + IOMMU_FAULT_PAGE_REQ, /* page request fault */ +}; + +enum iommu_fault_reason { + IOMMU_FAULT_REASON_UNKNOWN = 0, + + /* Could not access the PASID table (fetch caused external abort) */ + IOMMU_FAULT_REASON_PASID_FETCH, + + /* PASID entry is invalid or has configuration errors */ + IOMMU_FAULT_REASON_BAD_PASID_ENTRY, + + /* + * PASID is out of range (e.g. exceeds the maximum PASID + * supported by the IOMMU) or disabled. + */ + IOMMU_FAULT_REASON_PASID_INVALID, + + /* + * An external abort occurred fetching (or updating) a translation + * table descriptor + */ + IOMMU_FAULT_REASON_WALK_EABT, + + /* + * Could not access the page table entry (Bad address), + * actual translation fault + */ + IOMMU_FAULT_REASON_PTE_FETCH, + + /* Protection flag check failed */ + IOMMU_FAULT_REASON_PERMISSION, + + /* access flag check failed */ + IOMMU_FAULT_REASON_ACCESS, + + /* Output address of a translation stage caused Address Size fault */ + IOMMU_FAULT_REASON_OOR_ADDRESS, +}; + +/** + * struct iommu_fault_unrecoverable - Unrecoverable fault data + * @reason: reason of the fault, from &enum iommu_fault_reason + * @flags: parameters of this fault (IOMMU_FAULT_UNRECOV_* values) + * @pasid: Process Address Space ID + * @perm: requested permission access using by the incoming transaction + * (IOMMU_FAULT_PERM_* values) + * @addr: offending page address + * @fetch_addr: address that caused a fetch abort, if any + */ +struct iommu_fault_unrecoverable { + __u32 reason; +#define IOMMU_FAULT_UNRECOV_PASID_VALID (1 << 0) +#define IOMMU_FAULT_UNRECOV_ADDR_VALID (1 << 1) +#define IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID (1 << 2) + __u32 flags; + __u32 pasid; + __u32 perm; + __u64 addr; + __u64 fetch_addr; +}; + +/** + * struct iommu_fault_page_request - Page Request data + * @flags: encodes whether the corresponding fields are valid and whether this + * is the last page in group (IOMMU_FAULT_PAGE_REQUEST_* values) + * @pasid: Process Address Space ID + * @grpid: Page Request Group Index + * @perm: requested page permissions (IOMMU_FAULT_PERM_* values) + * @addr: page address + * @private_data: device-specific private information + */ +struct iommu_fault_page_request { +#define IOMMU_FAULT_PAGE_REQUEST_PASID_VALID (1 << 0) +#define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE (1 << 1) +#define IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA (1 << 2) + __u32 flags; + __u32 pasid; + __u32 grpid; + __u32 perm; + __u64 addr; + __u64 private_data[2]; +}; + +/** + * struct iommu_fault - Generic fault data + * @type: fault type from &enum iommu_fault_type + * @padding: reserved for future use (should be zero) + * @event: fault event, when @type is %IOMMU_FAULT_DMA_UNRECOV + * @prm: Page Request message, when @type is %IOMMU_FAULT_PAGE_REQ + * @padding2: sets the fault size to allow for future extensions + */ +struct iommu_fault { + __u32 type; + __u32 padding; + union { + struct iommu_fault_unrecoverable event; + struct iommu_fault_page_request prm; + __u8 padding2[56]; + }; +}; + +/** + * enum iommu_page_response_code - Return status of fault handlers + * @IOMMU_PAGE_RESP_SUCCESS: Fault has been handled and the page tables + * populated, retry the access. This is "Success" in PCI PRI. + * @IOMMU_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from + * this device if possible. This is "Response Failure" in PCI PRI. + * @IOMMU_PAGE_RESP_INVALID: Could not handle this fault, don't retry the + * access. This is "Invalid Request" in PCI PRI. + */ +enum iommu_page_response_code { + IOMMU_PAGE_RESP_SUCCESS = 0, + IOMMU_PAGE_RESP_INVALID, + IOMMU_PAGE_RESP_FAILURE, +}; + +/** + * struct iommu_page_response - Generic page response information + * @argsz: User filled size of this data + * @version: API version of this structure + * @flags: encodes whether the corresponding fields are valid + * (IOMMU_FAULT_PAGE_RESPONSE_* values) + * @pasid: Process Address Space ID + * @grpid: Page Request Group Index + * @code: response code from &enum iommu_page_response_code + */ +struct iommu_page_response { + __u32 argsz; +#define IOMMU_PAGE_RESP_VERSION_1 1 + __u32 version; +#define IOMMU_PAGE_RESP_PASID_VALID (1 << 0) + __u32 flags; + __u32 pasid; + __u32 grpid; + __u32 code; +}; + +/* defines the granularity of the invalidation */ +enum iommu_inv_granularity { + IOMMU_INV_GRANU_DOMAIN, /* domain-selective invalidation */ + IOMMU_INV_GRANU_PASID, /* PASID-selective invalidation */ + IOMMU_INV_GRANU_ADDR, /* page-selective invalidation */ + IOMMU_INV_GRANU_NR, /* number of invalidation granularities */ +}; + +/** + * struct iommu_inv_addr_info - Address Selective Invalidation Structure + * + * @flags: indicates the granularity of the address-selective invalidation + * - If the PASID bit is set, the @pasid field is populated and the invalidation + * relates to cache entries tagged with this PASID and matching the address + * range. + * - If ARCHID bit is set, @archid is populated and the invalidation relates + * to cache entries tagged with this architecture specific ID and matching + * the address range. + * - Both PASID and ARCHID can be set as they may tag different caches. + * - If neither PASID or ARCHID is set, global addr invalidation applies. + * - The LEAF flag indicates whether only the leaf PTE caching needs to be + * invalidated and other paging structure caches can be preserved. + * @pasid: process address space ID + * @archid: architecture-specific ID + * @addr: first stage/level input address + * @granule_size: page/block size of the mapping in bytes + * @nb_granules: number of contiguous granules to be invalidated + */ +struct iommu_inv_addr_info { +#define IOMMU_INV_ADDR_FLAGS_PASID (1 << 0) +#define IOMMU_INV_ADDR_FLAGS_ARCHID (1 << 1) +#define IOMMU_INV_ADDR_FLAGS_LEAF (1 << 2) + __u32 flags; + __u32 archid; + __u64 pasid; + __u64 addr; + __u64 granule_size; + __u64 nb_granules; +}; + +/** + * struct iommu_inv_pasid_info - PASID Selective Invalidation Structure + * + * @flags: indicates the granularity of the PASID-selective invalidation + * - If the PASID bit is set, the @pasid field is populated and the invalidation + * relates to cache entries tagged with this PASID and matching the address + * range. + * - If the ARCHID bit is set, the @archid is populated and the invalidation + * relates to cache entries tagged with this architecture specific ID and + * matching the address range. + * - Both PASID and ARCHID can be set as they may tag different caches. + * - At least one of PASID or ARCHID must be set. + * @pasid: process address space ID + * @archid: architecture-specific ID + */ +struct iommu_inv_pasid_info { +#define IOMMU_INV_PASID_FLAGS_PASID (1 << 0) +#define IOMMU_INV_PASID_FLAGS_ARCHID (1 << 1) + __u32 flags; + __u32 archid; + __u64 pasid; +}; + +/** + * struct iommu_cache_invalidate_info - First level/stage invalidation + * information + * @argsz: User filled size of this data + * @version: API version of this structure + * @cache: bitfield that allows to select which caches to invalidate + * @granularity: defines the lowest granularity used for the invalidation: + * domain > PASID > addr + * @padding: reserved for future use (should be zero) + * @pasid_info: invalidation data when @granularity is %IOMMU_INV_GRANU_PASID + * @addr_info: invalidation data when @granularity is %IOMMU_INV_GRANU_ADDR + * + * Not all the combinations of cache/granularity are valid: + * + * +--------------+---------------+---------------+---------------+ + * | type / | DEV_IOTLB | IOTLB | PASID | + * | granularity | | | cache | + * +==============+===============+===============+===============+ + * | DOMAIN | N/A | Y | Y | + * +--------------+---------------+---------------+---------------+ + * | PASID | Y | Y | Y | + * +--------------+---------------+---------------+---------------+ + * | ADDR | Y | Y | N/A | + * +--------------+---------------+---------------+---------------+ + * + * Invalidations by %IOMMU_INV_GRANU_DOMAIN don't take any argument other than + * @version and @cache. + * + * If multiple cache types are invalidated simultaneously, they all + * must support the used granularity. + */ +struct iommu_cache_invalidate_info { + __u32 argsz; +#define IOMMU_CACHE_INVALIDATE_INFO_VERSION_1 1 + __u32 version; +/* IOMMU paging structure cache */ +#define IOMMU_CACHE_INV_TYPE_IOTLB (1 << 0) /* IOMMU IOTLB */ +#define IOMMU_CACHE_INV_TYPE_DEV_IOTLB (1 << 1) /* Device IOTLB */ +#define IOMMU_CACHE_INV_TYPE_PASID (1 << 2) /* PASID cache */ +#define IOMMU_CACHE_INV_TYPE_NR (3) + __u8 cache; + __u8 granularity; + __u8 padding[2]; + union { + struct iommu_inv_pasid_info pasid_info; + struct iommu_inv_addr_info addr_info; + } granu; +}; + +/** + * struct iommu_gpasid_bind_data_vtd - Intel VT-d specific data on device and guest + * SVA binding. + * + * @flags: VT-d PASID table entry attributes + * @pat: Page attribute table data to compute effective memory type + * @emt: Extended memory type + * + * Only guest vIOMMU selectable and effective options are passed down to + * the host IOMMU. + */ +struct iommu_gpasid_bind_data_vtd { +#define IOMMU_SVA_VTD_GPASID_SRE (1 << 0) /* supervisor request */ +#define IOMMU_SVA_VTD_GPASID_EAFE (1 << 1) /* extended access enable */ +#define IOMMU_SVA_VTD_GPASID_PCD (1 << 2) /* page-level cache disable */ +#define IOMMU_SVA_VTD_GPASID_PWT (1 << 3) /* page-level write through */ +#define IOMMU_SVA_VTD_GPASID_EMTE (1 << 4) /* extended mem type enable */ +#define IOMMU_SVA_VTD_GPASID_CD (1 << 5) /* PASID-level cache disable */ + __u64 flags; + __u32 pat; + __u32 emt; +}; + +#define IOMMU_SVA_VTD_GPASID_MTS_MASK (IOMMU_SVA_VTD_GPASID_CD | \ + IOMMU_SVA_VTD_GPASID_EMTE | \ + IOMMU_SVA_VTD_GPASID_PCD | \ + IOMMU_SVA_VTD_GPASID_PWT) + +/** + * struct iommu_gpasid_bind_data - Information about device and guest PASID binding + * @argsz: User filled size of this data + * @version: Version of this data structure + * @format: PASID table entry format + * @flags: Additional information on guest bind request + * @gpgd: Guest page directory base of the guest mm to bind + * @hpasid: Process address space ID used for the guest mm in host IOMMU + * @gpasid: Process address space ID used for the guest mm in guest IOMMU + * @addr_width: Guest virtual address width + * @padding: Reserved for future use (should be zero) + * @vtd: Intel VT-d specific data + * + * Guest to host PASID mapping can be an identity or non-identity, where guest + * has its own PASID space. For non-identify mapping, guest to host PASID lookup + * is needed when VM programs guest PASID into an assigned device. VMM may + * trap such PASID programming then request host IOMMU driver to convert guest + * PASID to host PASID based on this bind data. + */ +struct iommu_gpasid_bind_data { + __u32 argsz; +#define IOMMU_GPASID_BIND_VERSION_1 1 + __u32 version; +#define IOMMU_PASID_FORMAT_INTEL_VTD 1 + __u32 format; +#define IOMMU_SVA_GPASID_VAL (1 << 0) /* guest PASID valid */ + __u64 flags; + __u64 gpgd; + __u64 hpasid; + __u64 gpasid; + __u32 addr_width; + __u8 padding[12]; + /* Vendor specific data */ + union { + struct iommu_gpasid_bind_data_vtd vtd; + } vendor; +}; + +/* + * struct iommu_nesting_info - Information for nesting-capable IOMMU. + * user space should check it before using + * nesting capability. + * + * @size: size of the whole structure + * @format: PASID table entry format, the same definition with + * @format of struct iommu_gpasid_bind_data. + * @features: supported nesting features. + * @flags: currently reserved for future extension. + * @addr_width: The output addr width of first level/stage translation + * @pasid_bits: Maximum supported PASID bits, 0 represents no PASID + * support. + * @data: vendor specific cap info. data[] structure type can be deduced + * from @format field. + * + * +===============+======================================================+ + * | feature | Notes | + * +===============+======================================================+ + * | SYSWIDE_PASID | PASIDs are managed in system-wide, instead of per | + * | | device. When a device is assigned to userspace or | + * | | VM, proper uAPI (userspace driver framework uAPI, | + * | | e.g. VFIO) must be used to allocate/free PASIDs for | + * | | the assigned device. | + * +---------------+------------------------------------------------------+ + * | BIND_PGTBL | The owner of the first level/stage page table must | + * | | explicitly bind the page table to associated PASID | + * | | (either the one specified in bind request or the | + * | | default PASID of iommu domain), through userspace | + * | | driver framework uAPI (e.g. VFIO_IOMMU_NESTING_OP). | + * +---------------+------------------------------------------------------+ + * | CACHE_INVLD | The owner of the first level/stage page table must | + * | | explicitly invalidate the IOMMU cache through uAPI | + * | | provided by userspace driver framework (e.g. VFIO) | + * | | according to vendor-specific requirement when | + * | | changing the page table. | + * +---------------+------------------------------------------------------+ + * + * @data[] types defined for @format: + * +================================+=====================================+ + * | @format | @data[] | + * +================================+=====================================+ + * | IOMMU_PASID_FORMAT_INTEL_VTD | struct iommu_nesting_info_vtd | + * +--------------------------------+-------------------------------------+ + * + */ +struct iommu_nesting_info { + __u32 size; + __u32 format; + __u32 features; +#define IOMMU_NESTING_FEAT_SYSWIDE_PASID (1 << 0) +#define IOMMU_NESTING_FEAT_BIND_PGTBL (1 << 1) +#define IOMMU_NESTING_FEAT_CACHE_INVLD (1 << 2) + __u32 flags; + __u16 addr_width; + __u16 pasid_bits; + __u32 padding; + __u8 data[]; +}; + +/* + * struct iommu_nesting_info_vtd - Intel VT-d specific nesting info + * + * + * @flags: VT-d specific flags. Currently reserved for future + * extension. + * @cap_reg: Describe basic capabilities as defined in VT-d capability + * register. + * @ecap_reg: Describe the extended capabilities as defined in VT-d + * extended capability register. + */ +struct iommu_nesting_info_vtd { + __u32 flags; + __u32 padding; + __u64 cap_reg; + __u64 ecap_reg; +}; + +#endif /* _IOMMU_H */ diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h index f09df26..e6609d1 100644 --- a/linux-headers/linux/vfio.h +++ b/linux-headers/linux/vfio.h @@ -1030,7 +1030,7 @@ struct vfio_iommu_type1_info_cap_iova_range { * size in bytes that can be used by user applications when getting the dirty * bitmap. */ -#define VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION 1 +#define VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION 2 struct vfio_iommu_type1_info_cap_migration { struct vfio_info_cap_header header; @@ -1039,6 +1039,22 @@ struct vfio_iommu_type1_info_cap_migration { __u64 max_dirty_bitmap_size; /* in bytes */ }; +#define VFIO_IOMMU_TYPE1_INFO_CAP_NESTING 3 + +/* + * Reporting nesting info to user space. + * + * @info: the nesting info provided by IOMMU driver. Today + * it is expected to be a struct iommu_nesting_info + * data. + */ +struct vfio_iommu_type1_info_cap_nesting { + struct vfio_info_cap_header header; + __u32 flags; + __u32 padding; + __u8 info[]; +}; + #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) /** @@ -1153,6 +1169,75 @@ struct vfio_iommu_type1_dirty_bitmap_get { #define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17) +/** + * VFIO_IOMMU_PASID_REQUEST - _IOWR(VFIO_TYPE, VFIO_BASE + 18, + * struct vfio_iommu_type1_pasid_request) + * + * PASID (Processor Address Space ID) is a PCIe concept for tagging + * address spaces in DMA requests. When system-wide PASID allocation + * is required by underlying iommu driver (e.g. Intel VT-d), this + * provides an interface for userspace to request pasid alloc/free + * for its assigned devices. Userspace should check the availability + * of this API through VFIO_IOMMU_GET_INFO. + * + * @flags=VFIO_IOMMU_FLAG_ALLOC_PASID, allocate a single PASID within @range. + * @flags=VFIO_IOMMU_FLAG_FREE_PASID, free the PASIDs within @range. + * @range is [min, max], which means both @min and @max are inclusive. + * ALLOC_PASID and FREE_PASID are mutually exclusive. + * + * returns: allocated PASID value on success, -errno on failure for + * ALLOC_PASID; + * 0 for FREE_PASID operation; + */ +struct vfio_iommu_type1_pasid_request { + __u32 argsz; +#define VFIO_IOMMU_FLAG_ALLOC_PASID (1 << 0) +#define VFIO_IOMMU_FLAG_FREE_PASID (1 << 1) + __u32 flags; + struct { + __u32 min; + __u32 max; + } range; +}; + +#define VFIO_PASID_REQUEST_MASK (VFIO_IOMMU_FLAG_ALLOC_PASID | \ + VFIO_IOMMU_FLAG_FREE_PASID) + +#define VFIO_IOMMU_PASID_REQUEST _IO(VFIO_TYPE, VFIO_BASE + 18) + +/** + * VFIO_IOMMU_NESTING_OP - _IOW(VFIO_TYPE, VFIO_BASE + 19, + * struct vfio_iommu_type1_nesting_op) + * + * This interface allows user space to utilize the nesting IOMMU + * capabilities as reported through VFIO_IOMMU_GET_INFO. + * + * @data[] types defined for each op: + * +=================+===============================================+ + * | NESTING OP | @data[] | + * +=================+===============================================+ + * | BIND_PGTBL | struct iommu_gpasid_bind_data | + * +-----------------+-----------------------------------------------+ + * | UNBIND_PGTBL | struct iommu_gpasid_bind_data | + * +-----------------+-----------------------------------------------+ + * | CACHE_INVLD | struct iommu_cache_invalidate_info | + * +-----------------+-----------------------------------------------+ + * + * returns: 0 on success, -errno on failure. + */ +struct vfio_iommu_type1_nesting_op { + __u32 argsz; + __u32 flags; +#define VFIO_NESTING_OP_MASK (0xffff) /* lower 16-bits for op */ + __u8 data[]; +}; + +#define VFIO_IOMMU_NESTING_OP_BIND_PGTBL (0) +#define VFIO_IOMMU_NESTING_OP_UNBIND_PGTBL (1) +#define VFIO_IOMMU_NESTING_OP_CACHE_INVLD (2) + +#define VFIO_IOMMU_NESTING_OP _IO(VFIO_TYPE, VFIO_BASE + 19) + /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ /* -- 2.7.4