In VFIO, common functions that used by VFIO variant drivers are managed in a set of "core" functions. E.g. the vfio-pci-core provides the common functions used by VFIO variant drviers to support PCI device passhthrough. Although the CXL type-2 device has a PCI-compatible interface for device configuration and programming, they still needs special handlings when initialize the device: - Probing the CXL DVSECs in the configuration. - Probing the CXL register groups implemented by the device. - Configuring the CXL device state required by the kernel CXL core. - Create the CXL region. - Special handlings of the CXL MMIO BAR. Introduce vfio-cxl core predules to hold all the common functions used by VFIO variant drivers to support CXL device passthrough. Signed-off-by: Zhi Wang <zhiw@xxxxxxxxxx> --- drivers/vfio/pci/Kconfig | 4 + drivers/vfio/pci/Makefile | 3 + drivers/vfio/pci/vfio_cxl_core.c | 264 +++++++++++++++++++++++++++++++ include/linux/vfio_pci_core.h | 37 +++++ 4 files changed, 308 insertions(+) create mode 100644 drivers/vfio/pci/vfio_cxl_core.c diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index bf50ffa10bde..2196e79b132b 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -7,6 +7,10 @@ config VFIO_PCI_CORE select VFIO_VIRQFD select IRQ_BYPASS_MANAGER +config VFIO_CXL_CORE + tristate + select VFIO_PCI_CORE + config VFIO_PCI_MMAP def_bool y if !S390 depends on VFIO_PCI_CORE diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile index cf00c0a7e55c..b51221b94b0b 100644 --- a/drivers/vfio/pci/Makefile +++ b/drivers/vfio/pci/Makefile @@ -8,6 +8,9 @@ vfio-pci-y := vfio_pci.o vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o obj-$(CONFIG_VFIO_PCI) += vfio-pci.o +vfio-cxl-core-y := vfio_cxl_core.o +obj-$(CONFIG_VFIO_CXL_CORE) += vfio-cxl-core.o + obj-$(CONFIG_MLX5_VFIO_PCI) += mlx5/ obj-$(CONFIG_HISI_ACC_VFIO_PCI) += hisilicon/ diff --git a/drivers/vfio/pci/vfio_cxl_core.c b/drivers/vfio/pci/vfio_cxl_core.c new file mode 100644 index 000000000000..6a7859333f67 --- /dev/null +++ b/drivers/vfio/pci/vfio_cxl_core.c @@ -0,0 +1,264 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/device.h> +#include <linux/eventfd.h> +#include <linux/file.h> +#include <linux/interrupt.h> +#include <linux/iommu.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/notifier.h> +#include <linux/pci.h> +#include <linux/pm_runtime.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/uaccess.h> + +#include "vfio_pci_priv.h" + +#define DRIVER_AUTHOR "Zhi Wang <zhiw@xxxxxxxxxx>" +#define DRIVER_DESC "core driver for VFIO based CXL devices" + +static int get_hpa_and_request_dpa(struct vfio_pci_core_device *core_dev) +{ + struct vfio_cxl *cxl = &core_dev->cxl; + struct pci_dev *pdev = core_dev->pdev; + u64 max; + + cxl->cxlrd = cxl_get_hpa_freespace(cxl->endpoint, 1, + CXL_DECODER_F_RAM | + CXL_DECODER_F_TYPE2, + &max); + if (IS_ERR(cxl->cxlrd)) { + pci_err(pdev, "Fail to get HPA space.\n"); + return PTR_ERR(cxl->cxlrd); + } + + if (max < cxl->region.size) { + pci_err(pdev, "No enough free HPA space %llu < %llu\n", + max, cxl->region.size); + return -ENOSPC; + } + + cxl->cxled = cxl_request_dpa(cxl->endpoint, true, cxl->region.size, + cxl->region.size); + if (IS_ERR(cxl->cxled)) { + pci_err(pdev, "Fail to request DPA\n"); + return PTR_ERR(cxl->cxled); + } + + return 0; +} + +static int create_cxl_region(struct vfio_pci_core_device *core_dev) +{ + struct vfio_cxl *cxl = &core_dev->cxl; + struct pci_dev *pdev = core_dev->pdev; + resource_size_t start, end; + int ret; + + ret = cxl_accel_request_resource(cxl->cxlds, true); + if (ret) { + pci_err(pdev, "Fail to request CXL resource\n"); + return ret; + } + + if (!cxl_await_media_ready(cxl->cxlds)) { + cxl_accel_set_media_ready(cxl->cxlds); + } else { + pci_err(pdev, "CXL media is not active\n"); + return ret; + } + + cxl->cxlmd = devm_cxl_add_memdev(&pdev->dev, cxl->cxlds); + if (IS_ERR(cxl->cxlmd)) { + pci_err(pdev, "Fail to create CXL memdev\n"); + return PTR_ERR(cxl->cxlmd); + } + + cxl->endpoint = cxl_acquire_endpoint(cxl->cxlmd); + if (IS_ERR(cxl->endpoint)) { + pci_err(pdev, "Fail to acquire CXL endpoint\n"); + return PTR_ERR(cxl->endpoint); + } + + ret = get_hpa_and_request_dpa(core_dev); + if (ret) + goto out; + + cxl->region.region = cxl_create_region(cxl->cxlrd, &cxl->cxled, 1); + if (IS_ERR(cxl->region.region)) { + ret = PTR_ERR(cxl->region.region); + pci_err(pdev, "Fail to create CXL region\n"); + cxl_dpa_free(cxl->cxled); + goto out; + } + + cxl_accel_get_region_params(cxl->region.region, &start, &end); + + cxl->region.addr = start; +out: + cxl_release_endpoint(cxl->cxlmd, cxl->endpoint); + return ret; +} + +/* Standard CXL-type 2 driver initialization sequence */ +static int enable_cxl(struct vfio_pci_core_device *core_dev, u16 dvsec) +{ + struct vfio_cxl *cxl = &core_dev->cxl; + struct pci_dev *pdev = core_dev->pdev; + u32 count; + u64 offset, size; + int ret; + + cxl->cxlds = cxl_accel_state_create(&pdev->dev, cxl->caps); + if (IS_ERR(cxl->cxlds)) + return PTR_ERR(cxl->cxlds); + + cxl_accel_set_dvsec(cxl->cxlds, dvsec); + cxl_accel_set_serial(cxl->cxlds, pdev->dev.id); + + cxl_accel_set_resource(cxl->cxlds, cxl->dpa_res, CXL_ACCEL_RES_DPA); + cxl_accel_set_resource(cxl->cxlds, cxl->ram_res, CXL_ACCEL_RES_RAM); + + ret = cxl_pci_accel_setup_regs(pdev, cxl->cxlds); + if (ret) { + pci_err(pdev, "Fail to setup CXL accel regs\n"); + return ret; + } + + ret = cxl_get_hdm_info(cxl->cxlds, &count, &offset, &size); + if (ret) + return ret; + + if (!count || !size) { + pci_err(pdev, "Fail to find CXL HDM reg offset\n"); + return -ENODEV; + } + + cxl->hdm_count = count; + cxl->hdm_reg_offset = offset; + cxl->hdm_reg_size = size; + + return create_cxl_region(core_dev); +} + +static void disable_cxl(struct vfio_pci_core_device *core_dev) +{ + struct vfio_cxl *cxl = &core_dev->cxl; + + if (cxl->region.region) + cxl_region_detach(cxl->cxled); + + if (cxl->cxled) + cxl_dpa_free(cxl->cxled); +} + +int vfio_cxl_core_enable(struct vfio_pci_core_device *core_dev) +{ + struct vfio_cxl *cxl = &core_dev->cxl; + struct pci_dev *pdev = core_dev->pdev; + u16 dvsec; + int ret; + + dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, + CXL_DVSEC_PCIE_DEVICE); + if (!dvsec) + return -ENODEV; + + if (!cxl->region.size) + return -EINVAL; + + ret = vfio_pci_core_enable(core_dev); + if (ret) + return ret; + + ret = enable_cxl(core_dev, dvsec); + if (ret) + goto err_enable_cxl_device; + + return 0; + +err_enable_cxl_device: + vfio_pci_core_disable(core_dev); + return ret; +} +EXPORT_SYMBOL(vfio_cxl_core_enable); + +void vfio_cxl_core_finish_enable(struct vfio_pci_core_device *core_dev) +{ + vfio_pci_core_finish_enable(core_dev); +} +EXPORT_SYMBOL(vfio_cxl_core_finish_enable); + +void vfio_cxl_core_close_device(struct vfio_device *vdev) +{ + struct vfio_pci_core_device *core_dev = + container_of(vdev, struct vfio_pci_core_device, vdev); + + disable_cxl(core_dev); + vfio_pci_core_close_device(vdev); +} +EXPORT_SYMBOL(vfio_cxl_core_close_device); + +/* + * Configure the resource required by the kernel CXL core: + * device DPA and device RAM size + */ +void vfio_cxl_core_set_resource(struct vfio_pci_core_device *core_dev, + struct resource res, + enum accel_resource type) +{ + struct vfio_cxl *cxl = &core_dev->cxl; + + switch (type) { + case CXL_ACCEL_RES_DPA: + cxl->dpa_size = res.end - res.start + 1; + cxl->dpa_res = res; + break; + + case CXL_ACCEL_RES_RAM: + cxl->ram_res = res; + break; + + default: + WARN(1, "invalid resource type: %d\n", type); + break; + } +} +EXPORT_SYMBOL(vfio_cxl_core_set_resource); + +/* Configure the expected CXL region size to be created */ +void vfio_cxl_core_set_region_size(struct vfio_pci_core_device *core_dev, + u64 size) +{ + struct vfio_cxl *cxl = &core_dev->cxl; + + if (WARN_ON(size > cxl->dpa_size)) + return; + + if (WARN_ON(cxl->region.region)) + return; + + cxl->region.size = size; +} +EXPORT_SYMBOL(vfio_cxl_core_set_region_size); + +/* Configure the driver cap required by the kernel CXL core */ +void vfio_cxl_core_set_driver_hdm_cap(struct vfio_pci_core_device *core_dev) +{ + struct vfio_cxl *cxl = &core_dev->cxl; + + cxl->caps |= CXL_ACCEL_DRIVER_CAP_HDM; +} +EXPORT_SYMBOL(vfio_cxl_core_set_driver_hdm_cap); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_IMPORT_NS(CXL); diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index fbb472dd99b3..7762d4a3e825 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -15,6 +15,8 @@ #include <linux/types.h> #include <linux/uuid.h> #include <linux/notifier.h> +#include <linux/cxl_accel_mem.h> +#include <linux/cxl_accel_pci.h> #ifndef VFIO_PCI_CORE_H #define VFIO_PCI_CORE_H @@ -49,6 +51,31 @@ struct vfio_pci_region { u32 flags; }; +struct vfio_cxl_region { + u64 size; + u64 addr; + struct cxl_region *region; +}; + +struct vfio_cxl { + u8 caps; + u64 dpa_size; + + u32 hdm_count; + u64 hdm_reg_offset; + u64 hdm_reg_size; + + struct cxl_dev_state *cxlds; + struct cxl_memdev *cxlmd; + struct cxl_root_decoder *cxlrd; + struct cxl_port *endpoint; + struct cxl_endpoint_decoder *cxled; + struct resource dpa_res; + struct resource ram_res; + + struct vfio_cxl_region region; +}; + struct vfio_pci_core_device { struct vfio_device vdev; struct pci_dev *pdev; @@ -94,6 +121,7 @@ struct vfio_pci_core_device { struct vfio_pci_core_device *sriov_pf_core_dev; struct notifier_block nb; struct rw_semaphore memory_lock; + struct vfio_cxl cxl; }; /* Will be exported for vfio pci drivers usage */ @@ -159,4 +187,13 @@ VFIO_IOREAD_DECLARATION(32) VFIO_IOREAD_DECLARATION(64) #endif +int vfio_cxl_core_enable(struct vfio_pci_core_device *core_dev); +void vfio_cxl_core_finish_enable(struct vfio_pci_core_device *core_dev); +void vfio_cxl_core_close_device(struct vfio_device *vdev); +void vfio_cxl_core_set_resource(struct vfio_pci_core_device *core_dev, + struct resource res, + enum accel_resource type); +void vfio_cxl_core_set_region_size(struct vfio_pci_core_device *core_dev, + u64 size); +void vfio_cxl_core_set_driver_hdm_cap(struct vfio_pci_core_device *core_dev); #endif /* VFIO_PCI_CORE_H */ -- 2.34.1