CXL Memory Device SW Guide rev1.0 2.11.2 provides instruction on how to caluclate latency and bandwidth for CXL memory device. Calculate minimum bandwidth and total latency for the path from the CXL device to the root port. The calculates values are stored in the cached DSMAS entries attached to the cxl_port of the CXL device. For example for a device that is directly attached to a host bus: Total Latency = Device Latency (from CDAT) + Dev to Host Bus (HB) Link Latency Min Bandwidth = Link Bandwidth between Host Bus and CXL device For a device that has a switch in between host bus and CXL device: Total Latency = Device (CDAT) Latency + Dev to Switch Link Latency + Switch (CDAT) Latency + Switch to HB Link Latency Min Bandwidth = min(dev to switch bandwidth, switch to HB bandwidth) Signed-off-by: Dave Jiang <dave.jiang@xxxxxxxxx> The internal latency for a switch can be retrieved from the CDAT of the switch PCI device. However, since there's no easy way to retrieve that right now on Linux, a guesstimated constant is used per switch to simplify the driver code. Signed-off-by: Dave Jiang <dave.jiang@xxxxxxxxx> --- drivers/cxl/core/port.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 9 +++++++ drivers/cxl/port.c | 42 +++++++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 2b27319cfd42..aa260361ba7d 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1899,6 +1899,66 @@ bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd) } EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL); +int cxl_port_get_downstream_qos(struct cxl_port *port, long *bw, long *lat) +{ + long total_lat = 0, latency; + long min_bw = INT_MAX; + struct pci_dev *pdev; + struct cxl_port *p; + struct device *dev; + int devices = 0; + + /* Grab the device that is the PCI device for CXL memdev */ + dev = port->uport->parent; + /* Skip if it's not PCI, most likely a cxl_test device */ + if (!dev_is_pci(dev)) + return 0; + + pdev = to_pci_dev(dev); + min_bw = pcie_bandwidth_available(pdev, NULL, NULL, NULL); + if (min_bw == 0) + return -ENXIO; + + /* convert to MB/s from Mb/s */ + min_bw >>= 3; + + p = port; + do { + struct cxl_dport *dport; + + latency = cxl_pci_get_latency(pdev); + if (latency < 0) + return latency; + + total_lat += latency; + devices++; + + dport = p->parent_dport; + if (!dport) + break; + + p = dport->port; + dev = p->uport; + if (!dev_is_pci(dev)) + break; + pdev = to_pci_dev(dev); + } while (1); + + /* + * Add an approximate latency to the switch. Currently there + * is no easy mechanism to read the CDAT for switches. 'devices' + * should account for all the PCI devices encountered minus the + * root device. So the number of switches would be 'devices - 1' + * to account for the CXL device. + */ + total_lat += CXL_SWITCH_APPROX_LAT * (devices - 1); + + *bw = min_bw; + *lat = total_lat; + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_port_get_downstream_qos, CXL); + /* for user tooling to ensure port disable work has completed */ static ssize_t flush_store(struct bus_type *bus, const char *buf, size_t count) { diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index ac6ea550ab0a..86668fab6e91 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -480,6 +480,13 @@ struct cxl_pmem_region { struct cxl_pmem_region_mapping mapping[]; }; +/* + * Set in picoseconds per ACPI spec 6.5 Table 5.148 Entry Base Unit. + * This is an approximate constant to use for switch latency calculation + * until there's a way to access switch CDAT. + */ +#define CXL_SWITCH_APPROX_LAT 5000 + /** * struct cxl_port - logical collection of upstream port devices and * downstream port devices to construct a CXL memory @@ -706,6 +713,7 @@ struct dsmas_entry { struct range dpa_range; u16 handle; u64 qos[ACPI_HMAT_WRITE_BANDWIDTH + 1]; + int qtg_id; }; typedef int (*cdat_tbl_entry_handler)(struct acpi_cdat_header *header, void *arg); @@ -734,6 +742,7 @@ struct qtg_dsm_output { struct qtg_dsm_output *cxl_acpi_evaluate_qtg_dsm(acpi_handle handle, struct qtg_dsm_input *input); acpi_handle cxl_acpi_get_rootdev_handle(struct device *dev); +int cxl_port_get_downstream_qos(struct cxl_port *port, long *bw, long *lat); /* * Unit test builds overrides this to __weak, find the 'strong' version diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 8de311208b37..d72e38f9ae44 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -30,6 +30,44 @@ static void schedule_detach(void *cxlmd) schedule_cxl_memdev_detach(cxlmd); } +static int cxl_port_qos_calculate(struct cxl_port *port) +{ + struct qtg_dsm_output *output; + struct qtg_dsm_input input; + struct dsmas_entry *dent; + long min_bw, total_lat; + acpi_handle handle; + int rc; + + rc = cxl_port_get_downstream_qos(port, &min_bw, &total_lat); + if (rc) + return rc; + + handle = cxl_acpi_get_rootdev_handle(&port->dev); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + mutex_lock(&port->cdat.dsmas_lock); + list_for_each_entry(dent, &port->cdat.dsmas_list, list) { + input.rd_lat = dent->qos[ACPI_HMAT_READ_LATENCY] + total_lat; + input.wr_lat = dent->qos[ACPI_HMAT_WRITE_LATENCY] + total_lat; + input.rd_bw = min_t(int, min_bw, + dent->qos[ACPI_HMAT_READ_BANDWIDTH]); + input.wr_bw = min_t(int, min_bw, + dent->qos[ACPI_HMAT_WRITE_BANDWIDTH]); + + output = cxl_acpi_evaluate_qtg_dsm(handle, &input); + if (IS_ERR(output)) + continue; + + dent->qtg_id = output->qtg_ids[0]; + kfree(output); + } + mutex_unlock(&port->cdat.dsmas_lock); + + return 0; +} + static int cxl_port_probe(struct device *dev) { struct cxl_port *port = to_cxl_port(dev); @@ -74,6 +112,10 @@ static int cxl_port_probe(struct device *dev) } else { dev_dbg(dev, "Failed to parse DSMAS: %d\n", rc); } + + rc = cxl_port_qos_calculate(port); + if (rc) + dev_dbg(dev, "Failed to do QoS calculations\n"); } rc = cxl_hdm_decode_init(cxlds, cxlhdm);