On Tue, 2020-03-03 at 19:04 +0100, Frederic Barrat wrote: > > Le 21/02/2020 à 04:27, Alastair D'Silva a écrit : > > From: Alastair D'Silva <alastair@xxxxxxxxxxx> > > > > This patch adds IOCTLs to allow userspace to request & fetch dumps > > of the internal controller state. > > > > This is useful during debugging or when a fatal error on the > > controller > > has occurred. > > > > Signed-off-by: Alastair D'Silva <alastair@xxxxxxxxxxx> > > --- > > arch/powerpc/platforms/powernv/pmem/ocxl.c | 132 > > +++++++++++++++++++++ > > include/uapi/nvdimm/ocxl-pmem.h | 15 +++ > > 2 files changed, 147 insertions(+) > > > > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c > > b/arch/powerpc/platforms/powernv/pmem/ocxl.c > > index 2b64504f9129..2cabafe1fc58 100644 > > --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c > > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c > > @@ -640,6 +640,124 @@ static int ioctl_error_log(struct ocxlpmem > > *ocxlpmem, > > return 0; > > } > > > > +static int ioctl_controller_dump_data(struct ocxlpmem *ocxlpmem, > > + struct ioctl_ocxl_pmem_controller_dump_data __user > > *uarg) > > +{ > > + struct ioctl_ocxl_pmem_controller_dump_data args; > > + u16 i; > > + u64 val; > > + int rc; > > + > > + if (copy_from_user(&args, uarg, sizeof(args))) > > + return -EFAULT; > > + > > + if (args.buf_size % 8) > > + return -EINVAL; > > + > > + if (args.buf_size > ocxlpmem->admin_command.data_size) > > + return -EINVAL; > > + > > + mutex_lock(&ocxlpmem->admin_command.lock); > > + > > + rc = admin_command_request(ocxlpmem, > > ADMIN_COMMAND_CONTROLLER_DUMP); > > + if (rc) > > + goto out; > > + > > + val = ((u64)args.offset) << 32; > > + val |= args.buf_size; > > + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, > > + ocxlpmem- > > >admin_command.request_offset + 0x08, > > + OCXL_LITTLE_ENDIAN, val); > > + if (rc) > > + goto out; > > + > > + rc = admin_command_execute(ocxlpmem); > > + if (rc) > > + goto out; > > + > > + rc = admin_command_complete_timeout(ocxlpmem, > > + ADMIN_COMMAND_CONTROLLER_DU > > MP); > > + if (rc < 0) { > > + dev_warn(&ocxlpmem->dev, "Controller dump timed > > out\n"); > > + goto out; > > + } > > + > > + rc = admin_response(ocxlpmem); > > + if (rc < 0) > > + goto out; > > + if (rc != STATUS_SUCCESS) { > > + warn_status(ocxlpmem, > > + "Unexpected status from retrieve error > > log", > > + rc); > > + goto out; > > + } > > > It would help if there was a comment indicating how the 3 ioctls are > used. My understanding is that the userland is: > - requesting the controller to prepare a state dump > - then one or more ioctls to fetch the data. The number of calls > required to get the full state really depends on the size of the > buffer > passed by user > - a last ioctl to tell the controller that we're done, presumably to > let > it free some resources. > Ok, will add it to the blurb. > > > + > > + for (i = 0; i < args.buf_size; i += 8) { > > + u64 val; > > + > > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, > > + ocxlpmem- > > >admin_command.data_offset + i, > > + OCXL_HOST_ENDIAN, &val); > > + if (rc) > > + goto out; > > + > > + if (copy_to_user(&args.buf[i], &val, sizeof(u64))) { > > + rc = -EFAULT; > > + goto out; > > + } > > + } > > + > > + if (copy_to_user(uarg, &args, sizeof(args))) { > > + rc = -EFAULT; > > + goto out; > > + } > > + > > + rc = admin_response_handled(ocxlpmem); > > + if (rc) > > + goto out; > > + > > +out: > > + mutex_unlock(&ocxlpmem->admin_command.lock); > > + return rc; > > +} > > + > > +int request_controller_dump(struct ocxlpmem *ocxlpmem) > > +{ > > + int rc; > > + u64 busy = 1; > > + > > + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, > > GLOBAL_MMIO_CHIC, > > + OCXL_LITTLE_ENDIAN, > > + GLOBAL_MMIO_CHI_CDA); > > + > > rc is not checked here. Whoops > > > > + > > + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, > > GLOBAL_MMIO_HCI, > > + OCXL_LITTLE_ENDIAN, > > + GLOBAL_MMIO_HCI_CONTROLLER_DUMP); > > + if (rc) > > + return rc; > > + > > + while (busy) { > > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, > > + GLOBAL_MMIO_HCI, > > + OCXL_LITTLE_ENDIAN, > > &busy); > > + if (rc) > > + return rc; > > + > > + busy &= GLOBAL_MMIO_HCI_CONTROLLER_DUMP; > > Setting 'busy' doesn't hurt, but it's not really useful, is it? > > We should add some kind of timeout so that if the controller hits an > issue, we don't spin in kernel space endlessly. > > Here we are polling the controller dump bit of the HCI register until the controller clears it - that line is masking off the bits we don't care about. I'll talk to the firmware team about adding a timeout for that to the spec so we know how long to wait for before giving up. > > > + cond_resched(); > > + } > > + > > + return 0; > > +} > > + > > +static int ioctl_controller_dump_complete(struct ocxlpmem > > *ocxlpmem) > > +{ > > + return ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, > > GLOBAL_MMIO_HCI, > > + OCXL_LITTLE_ENDIAN, > > + GLOBAL_MMIO_HCI_CONTROLLER_DUMP_COL > > LECTED); > > +} > > + > > static long file_ioctl(struct file *file, unsigned int cmd, > > unsigned long args) > > { > > struct ocxlpmem *ocxlpmem = file->private_data; > > @@ -650,7 +768,21 @@ static long file_ioctl(struct file *file, > > unsigned int cmd, unsigned long args) > > rc = ioctl_error_log(ocxlpmem, > > (struct ioctl_ocxl_pmem_error_log > > __user *)args); > > break; > > + > > + case IOCTL_OCXL_PMEM_CONTROLLER_DUMP: > > + rc = request_controller_dump(ocxlpmem); > > + break; > > + > > + case IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA: > > + rc = ioctl_controller_dump_data(ocxlpmem, > > + (struct > > ioctl_ocxl_pmem_controller_dump_data __user *)args); > > + break; > > + > > + case IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE: > > + rc = ioctl_controller_dump_complete(ocxlpmem); > > + break; > > } > > + > > return rc; > > } > > > > diff --git a/include/uapi/nvdimm/ocxl-pmem.h > > b/include/uapi/nvdimm/ocxl-pmem.h > > index b10f8ac0c20f..d4d8512d03f7 100644 > > --- a/include/uapi/nvdimm/ocxl-pmem.h > > +++ b/include/uapi/nvdimm/ocxl-pmem.h > > @@ -38,9 +38,24 @@ struct ioctl_ocxl_pmem_error_log { > > __u8 *buf; /* pointer to output buffer */ > > }; > > > > +struct ioctl_ocxl_pmem_controller_dump_data { > > + __u8 *buf; /* pointer to output buffer */ > > We only support 64-bit user app on powerpc, but using a pointer type > in > a kernel ABI is unusual. We should use a know size like __u64. > (also applies to buf pointer in struct ioctl_ocxl_pmem_error_log > from > previous patch too) > > The rest of the structure will also be padded by the compiler, which > we > should avoid. > > Fred > Ok, I'll co-erce the pointers into a __u64. > > > > + __u16 buf_size; /* in/out, buffer size provided/required. > > + * If required is greater than provided, the > > buffer > > + * will be truncated to the amount provided. If > > its > > + * less, then only the required bytes will be > > populated. > > + * If it is 0, then there is no more dump data > > available. > > + */ > > + __u32 offset; /* in, Offset within the dump */ > > + __u64 reserved[8]; > > +}; > > + > > /* ioctl numbers */ > > #define OCXL_PMEM_MAGIC 0x5C > > /* SCM devices */ > > #define IOCTL_OCXL_PMEM_ERROR_LOG _IOWR(OCXL_PMEM > > _MAGIC, 0x01, struct ioctl_ocxl_pmem_error_log) > > +#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP _IO(OCX > > L_PMEM_MAGIC, 0x02) > > +#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA _IOWR(O > > CXL_PMEM_MAGIC, 0x03, struct ioctl_ocxl_pmem_controller_dump_data) > > +#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE _IO(OCXL_PMEM_M > > AGIC, 0x04) > > > > #endif /* _UAPI_OCXL_SCM_H */ > > -- Alastair D'Silva Open Source Developer Linux Technology Centre, IBM Australia mob: 0423 762 819