This patch adds IOCTLs to allow userspace to request & fetch dumps of the internal controller state. This is useful during debugging or when a fatal error on the controller has occurred. The expected flow of operations are: 1. IOCTL_OCXL_PMEM_CONTROLLER_DUMP to request the controller to take a dump. This IOCTL will complete after the dump is available for collection. 2. IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA called repeatedly to fetch chunks from the buffer 3. IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE to notify the controller that it can free any internal resources used for the dump Signed-off-by: Alastair D'Silva <alastair@xxxxxxxxxxx> --- drivers/nvdimm/ocxl/main.c | 161 +++++++++++++++++++++++++++++++++ include/uapi/nvdimm/ocxlpmem.h | 16 ++++ 2 files changed, 177 insertions(+) diff --git a/drivers/nvdimm/ocxl/main.c b/drivers/nvdimm/ocxl/main.c index e6be0029f658..d0db358ded43 100644 --- a/drivers/nvdimm/ocxl/main.c +++ b/drivers/nvdimm/ocxl/main.c @@ -566,6 +566,153 @@ static int ioctl_error_log(struct ocxlpmem *ocxlpmem, return 0; } +/** + * controller_dump_header_parse() - Parse the first 64 bits of the controller dump command response + * @ocxlpmem: the device metadata + * @length: out, returns the number of bytes in the response (excluding the 64 bit header) + */ +static int controller_dump_header_parse(struct ocxlpmem *ocxlpmem, u16 *length) +{ + int rc; + u64 val; + u16 data_identifier; + u32 data_length; + + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, + ocxlpmem->admin_command.data_offset, + OCXL_LITTLE_ENDIAN, &val); + if (rc) + return rc; + + data_identifier = val >> 48; + data_length = val & 0xFFFF; + + if (data_identifier != 0x4344) { // 'CD' + dev_err(&ocxlpmem->dev, + "Bad data identifier for error log data, expected 'CD', got '%2s' (%#x), data_length=%u\n", + (char *)&data_identifier, + (unsigned int)data_identifier, data_length); + return -EINVAL; + } + + *length = data_length; + return 0; +} + +static int ioctl_controller_dump_data(struct ocxlpmem *ocxlpmem, + struct ioctl_ocxlpmem_controller_dump_data __user *uarg) +{ + struct ioctl_ocxlpmem_controller_dump_data args; + u64 __user *buf; + u16 i, buf_size; + u64 val; + int rc; + + if (copy_from_user(&args, uarg, sizeof(args))) + return -EFAULT; + + if (args.buf_size % sizeof(u64)) + return -EINVAL; + + if (args.buf_size > ocxlpmem->admin_command.data_size) + return -EINVAL; + + buf = (u64 *)args.buf_ptr; + + mutex_lock(&ocxlpmem->admin_command.lock); + + val = ((u64)args.offset) << 32; + val |= args.buf_size; + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, + ocxlpmem->admin_command.request_offset + 0x08, + OCXL_LITTLE_ENDIAN, val); + if (rc) + goto out; + + rc = admin_command_execute(ocxlpmem, ADMIN_COMMAND_CONTROLLER_DUMP); + if (rc) + goto out; + if (rc != STATUS_SUCCESS) { + warn_status(ocxlpmem, + "Unexpected status from controller dump", + rc); + goto out; + } + + rc = controller_dump_header_parse(ocxlpmem, &buf_size); + if (rc) + goto out; + + buf_size = min((u16)(buf_size + sizeof(u64)), args.buf_size); + + for (i = 0; i < buf_size / sizeof(u64); i++) { + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, + ocxlpmem->admin_command.data_offset + + i * sizeof(u64), + OCXL_HOST_ENDIAN, &val); + if (rc) + goto out; + + if (copy_to_user(&buf[i], &val, sizeof(u64))) { + rc = -EFAULT; + goto out; + } + } + + args.buf_size = buf_size; + + if (copy_to_user(uarg, &args, sizeof(args))) { + rc = -EFAULT; + goto out; + } + + rc = admin_response_handled(ocxlpmem); + if (rc) + goto out; + +out: + mutex_unlock(&ocxlpmem->admin_command.lock); + return rc; +} + +int request_controller_dump(struct ocxlpmem *ocxlpmem) +{ + int rc; + u64 busy = 1; + + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_CHIC, + OCXL_LITTLE_ENDIAN, + GLOBAL_MMIO_CHI_CDA); + if (rc) + return rc; + + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI, + OCXL_LITTLE_ENDIAN, + GLOBAL_MMIO_HCI_CONTROLLER_DUMP); + if (rc) + return rc; + + while (busy) { + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, + GLOBAL_MMIO_HCI, + OCXL_LITTLE_ENDIAN, &busy); + if (rc) + return rc; + + busy &= GLOBAL_MMIO_HCI_CONTROLLER_DUMP; + cond_resched(); + } + + return 0; +} + +static int ioctl_controller_dump_complete(struct ocxlpmem *ocxlpmem) +{ + return ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI, + OCXL_LITTLE_ENDIAN, + GLOBAL_MMIO_HCI_CONTROLLER_DUMP_COLLECTED); +} + static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args) { struct ocxlpmem *ocxlpmem = file->private_data; @@ -576,7 +723,21 @@ static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args) rc = ioctl_error_log(ocxlpmem, (struct ioctl_ocxlpmem_error_log __user *)args); break; + + case IOCTL_OCXLPMEM_CONTROLLER_DUMP: + rc = request_controller_dump(ocxlpmem); + break; + + case IOCTL_OCXLPMEM_CONTROLLER_DUMP_DATA: + rc = ioctl_controller_dump_data(ocxlpmem, + (struct ioctl_ocxlpmem_controller_dump_data __user *)args); + break; + + case IOCTL_OCXLPMEM_CONTROLLER_DUMP_COMPLETE: + rc = ioctl_controller_dump_complete(ocxlpmem); + break; } + return rc; } diff --git a/include/uapi/nvdimm/ocxlpmem.h b/include/uapi/nvdimm/ocxlpmem.h index 5d3a03ea1e08..05e2b3f7b27c 100644 --- a/include/uapi/nvdimm/ocxlpmem.h +++ b/include/uapi/nvdimm/ocxlpmem.h @@ -38,9 +38,25 @@ struct ioctl_ocxlpmem_error_log { __u64 reserved2[2]; }; +struct ioctl_ocxlpmem_controller_dump_data { + __u64 buf_ptr; /* coerced pointer to output buffer */ + __u16 buf_size; /* in/out, buffer size provided/required. + * If required is greater than provided, the buffer + * will be truncated to the amount provided. If its + * less, then only the required bytes will be populated. + * If it is 0, then there is no more dump data available. + */ + __u16 reserved0; + __u32 offset; /* in, Offset within the dump */ + __u64 reserved[8]; +}; + /* ioctl numbers */ #define OCXLPMEM_MAGIC 0xCA /* OpenCAPI Persistent memory devices */ #define IOCTL_OCXLPMEM_ERROR_LOG _IOWR(OCXLPMEM_MAGIC, 0x30, struct ioctl_ocxlpmem_error_log) +#define IOCTL_OCXLPMEM_CONTROLLER_DUMP _IO(OCXLPMEM_MAGIC, 0x31) +#define IOCTL_OCXLPMEM_CONTROLLER_DUMP_DATA _IOWR(OCXLPMEM_MAGIC, 0x32, struct ioctl_ocxlpmem_controller_dump_data) +#define IOCTL_OCXLPMEM_CONTROLLER_DUMP_COMPLETE _IO(OCXLPMEM_MAGIC, 0x33) #endif /* _UAPI_OCXL_SCM_H */ -- 2.24.1