Re: [PATCH v3 01/16] cxl/pci: Fix CDAT retrieval on big endian

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 11/2/23 07:25, Lukas Wunner wrote:
The CDAT exposed in sysfs differs between little endian and big endian
arches:  On big endian, every 4 bytes are byte-swapped.


hexdump prints different byte streams on LE and BE? Does not seem right.


PCI Configuration Space is little endian (PCI r3.0 sec 6.1).  Accessors
such as pci_read_config_dword() implicitly swap bytes on big endian.
That way, the macros in include/uapi/linux/pci_regs.h work regardless of
the arch's endianness.  For an example of implicit byte-swapping, see
ppc4xx_pciex_read_config(), which calls in_le32(), which uses lwbrx
(Load Word Byte-Reverse Indexed).

DOE Read/Write Data Mailbox Registers are unlike other registers in
Configuration Space in that they contain or receive a 4 byte portion of
an opaque byte stream (a "Data Object" per PCIe r6.0 sec 7.9.24.5f).
They need to be copied to or from the request/response buffer verbatim.
So amend pci_doe_send_req() and pci_doe_recv_resp() to undo the implicit
byte-swapping.

The CXL_DOE_TABLE_ACCESS_* and PCI_DOE_DATA_OBJECT_DISC_* macros assume
implicit byte-swapping.  Byte-swap requests after constructing them with
those macros and byte-swap responses before parsing them.

Change the request and response type to __le32 to avoid sparse warnings.

Fixes: c97006046c79 ("cxl/port: Read CDAT table")
Tested-by: Ira Weiny <ira.weiny@xxxxxxxxx>
Signed-off-by: Lukas Wunner <lukas@xxxxxxxxx>
Cc: stable@xxxxxxxxxxxxxxx # v6.0+
---
  Changes v2 -> v3:
  * Newly added patch in v3

  drivers/cxl/core/pci.c  | 12 ++++++------
  drivers/pci/doe.c       | 13 ++++++++-----
  include/linux/pci-doe.h |  8 ++++++--
  3 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 57764e9cd19d..d3cf1d9d67d4 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -480,7 +480,7 @@ static struct pci_doe_mb *find_cdat_doe(struct device *uport)
  	return NULL;
  }
-#define CDAT_DOE_REQ(entry_handle) \
+#define CDAT_DOE_REQ(entry_handle) cpu_to_le32				\
  	(FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE,			\
  		    CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) |		\
  	 FIELD_PREP(CXL_DOE_TABLE_ACCESS_TABLE_TYPE,			\
@@ -493,8 +493,8 @@ static void cxl_doe_task_complete(struct pci_doe_task *task)
  }
struct cdat_doe_task {
-	u32 request_pl;
-	u32 response_pl[32];
+	__le32 request_pl;
+	__le32 response_pl[32];

This is ok as it is a binary format of DOE message (is it?)...

  	struct completion c;
  	struct pci_doe_task task;
  };
@@ -531,7 +531,7 @@ static int cxl_cdat_get_length(struct device *dev,
  	if (t.task.rv < sizeof(u32))
  		return -EIO;
- *length = t.response_pl[1];
+	*length = le32_to_cpu(t.response_pl[1]);
  	dev_dbg(dev, "CDAT length %zu\n", *length);
return 0;
@@ -548,7 +548,7 @@ static int cxl_cdat_read_table(struct device *dev,
  	do {
  		DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(entry_handle), t);
  		size_t entry_dw;
-		u32 *entry;
+		__le32 *entry;
  		int rc;
rc = pci_doe_submit_task(cdat_doe, &t.task);
@@ -563,7 +563,7 @@ static int cxl_cdat_read_table(struct device *dev,
/* Get the CXL table access header entry handle */
  		entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE,
-					 t.response_pl[0]);
+					 le32_to_cpu(t.response_pl[0]));
  		entry = t.response_pl + 1;
  		entry_dw = t.task.rv / sizeof(u32);
  		/* Skip Header */
diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c
index 66d9ab288646..69efa9a250b9 100644
--- a/drivers/pci/doe.c
+++ b/drivers/pci/doe.c
@@ -143,7 +143,7 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb,
  					  length));
  	for (i = 0; i < task->request_pl_sz / sizeof(u32); i++)
  		pci_write_config_dword(pdev, offset + PCI_DOE_WRITE,
-				       task->request_pl[i]);
+				       le32_to_cpu(task->request_pl[i]));

Does it really work on BE? My little brain explodes on all these convertions :)

char buf[] = { 1, 2, 3, 4 }
u32 *request_pl = buf;

request_pl[0] will be 0x01020304.
le32_to_cpu(request_pl[0]) will be 0x04030201
And then pci_write_config_dword() will do another swap.

Did I miss something? (/me is gone bringing up a BE system).

pci_doe_write_ctrl(doe_mb, PCI_DOE_CTRL_GO); @@ -198,8 +198,8 @@ static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *tas
  	payload_length = min(length, task->response_pl_sz / sizeof(u32));
  	/* Read the rest of the response payload */
  	for (i = 0; i < payload_length; i++) {
-		pci_read_config_dword(pdev, offset + PCI_DOE_READ,
-				      &task->response_pl[i]);
+		pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val);
+		task->response_pl[i] = cpu_to_le32(val);
  		/* Prior to the last ack, ensure Data Object Ready */
  		if (i == (payload_length - 1) && !pci_doe_data_obj_ready(doe_mb))
  			return -EIO;
@@ -322,15 +322,17 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
  	struct pci_doe_task task = {
  		.prot.vid = PCI_VENDOR_ID_PCI_SIG,
  		.prot.type = PCI_DOE_PROTOCOL_DISCOVERY,
-		.request_pl = &request_pl,
+		.request_pl = (__le32 *)&request_pl,
  		.request_pl_sz = sizeof(request_pl),
-		.response_pl = &response_pl,
+		.response_pl = (__le32 *)&response_pl,
  		.response_pl_sz = sizeof(response_pl),
  		.complete = pci_doe_task_complete,
  		.private = &c,
  	};
  	int rc;
+ cpu_to_le32s(&request_pl);
+
  	rc = pci_doe_submit_task(doe_mb, &task);
  	if (rc < 0)
  		return rc;
@@ -340,6 +342,7 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
  	if (task.rv != sizeof(response_pl))
  		return -EIO;
+ le32_to_cpus(&response_pl);
  	*vid = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID, response_pl);
  	*protocol = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL,
  			      response_pl);
diff --git a/include/linux/pci-doe.h b/include/linux/pci-doe.h
index ed9b4df792b8..43765eaf2342 100644
--- a/include/linux/pci-doe.h
+++ b/include/linux/pci-doe.h
@@ -34,6 +34,10 @@ struct pci_doe_mb;
   * @work: Used internally by the mailbox
   * @doe_mb: Used internally by the mailbox
   *
+ * Payloads are treated as opaque byte streams which are transmitted verbatim,
+ * without byte-swapping.  If payloads contain little-endian register values,
+ * the caller is responsible for conversion with cpu_to_le32() / le32_to_cpu().
+ *
   * The payload sizes and rv are specified in bytes with the following
   * restrictions concerning the protocol.
   *
@@ -45,9 +49,9 @@ struct pci_doe_mb;
   */
  struct pci_doe_task {
  	struct pci_doe_protocol prot;
-	u32 *request_pl;
+	__le32 *request_pl;
  	size_t request_pl_sz;
-	u32 *response_pl;
+	__le32 *response_pl;


This does not look right. Either:
- pci_doe() should also take __le32* or
- pci_doe() should do cpu_to_le32() for the request, and pci_doe_task_complete() - for the response.


Thanks,


  	size_t response_pl_sz;
  	int rv;
  	void (*complete)(struct pci_doe_task *task);

--
Alexey




[Index of Archives]     [DMA Engine]     [Linux Coverity]     [Linux USB]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Greybus]

  Powered by Linux