Hi, On 12/3/20 10:26 PM, Maximilian Luz wrote: > This commit adds error injection hooks to the Surface Serial Hub > communication protocol implementation, to: > > - simulate simple serial transmission errors, > > - drop packets, requests, and responses, simulating communication > failures and potentially trigger retransmission timeouts, as well as > > - inject invalid data into submitted and received packets. > > Together with the trace points introduced in the previous commit, these > facilities are intended to aid in testing, validation, and debugging of > the Surface Aggregator communication layer. > > Signed-off-by: Maximilian Luz <luzmaximilian@xxxxxxxxx> Thanks, patch looks good to me: Reviewed-by: Hans de Goede <hdegoede@xxxxxxxxxx> Regards, Hans > --- > > Changes in v1 (from RFC): > - remove unnecessary default in Kconfig entry > > Changes in v2: > - use dedicated trace event class for data error injection > - spell check comments and strings, fix typos > - unify comment style > - run checkpatch --strict, fix warnings and style issues > > --- > drivers/platform/surface/aggregator/Kconfig | 14 + > .../surface/aggregator/ssh_packet_layer.c | 296 +++++++++++++++++- > .../surface/aggregator/ssh_request_layer.c | 35 +++ > drivers/platform/surface/aggregator/trace.h | 31 ++ > 4 files changed, 375 insertions(+), 1 deletion(-) > > diff --git a/drivers/platform/surface/aggregator/Kconfig b/drivers/platform/surface/aggregator/Kconfig > index ce34941ef91b..48f40c345e29 100644 > --- a/drivers/platform/surface/aggregator/Kconfig > +++ b/drivers/platform/surface/aggregator/Kconfig > @@ -41,3 +41,17 @@ menuconfig SURFACE_AGGREGATOR > Choose m if you want to build the SAM subsystem core and SSH driver as > module, y if you want to build it into the kernel and n if you don't > want it at all. > + > +config SURFACE_AGGREGATOR_ERROR_INJECTION > + bool "Surface System Aggregator Module Error Injection Capabilities" > + depends on SURFACE_AGGREGATOR > + depends on FUNCTION_ERROR_INJECTION > + help > + Provides error-injection capabilities for the Surface System > + Aggregator Module subsystem and Surface Serial Hub driver. > + > + Specifically, exports error injection hooks to be used with the > + kernel's function error injection capabilities to simulate underlying > + transport and communication problems, such as invalid data sent to or > + received from the EC, dropped data, and communication timeouts. > + Intended for development and debugging. > diff --git a/drivers/platform/surface/aggregator/ssh_packet_layer.c b/drivers/platform/surface/aggregator/ssh_packet_layer.c > index f5ec58a1faa2..0205be9380bf 100644 > --- a/drivers/platform/surface/aggregator/ssh_packet_layer.c > +++ b/drivers/platform/surface/aggregator/ssh_packet_layer.c > @@ -7,6 +7,7 @@ > > #include <asm/unaligned.h> > #include <linux/atomic.h> > +#include <linux/error-injection.h> > #include <linux/jiffies.h> > #include <linux/kfifo.h> > #include <linux/kref.h> > @@ -225,6 +226,286 @@ > */ > #define SSH_PTL_RX_FIFO_LEN 4096 > > +#ifdef CONFIG_SURFACE_AGGREGATOR_ERROR_INJECTION > + > +/** > + * ssh_ptl_should_drop_ack_packet() - Error injection hook to drop ACK packets. > + * > + * Useful to test detection and handling of automated re-transmits by the EC. > + * Specifically of packets that the EC considers not-ACKed but the driver > + * already considers ACKed (due to dropped ACK). In this case, the EC > + * re-transmits the packet-to-be-ACKed and the driver should detect it as > + * duplicate/already handled. Note that the driver should still send an ACK > + * for the re-transmitted packet. > + */ > +static noinline bool ssh_ptl_should_drop_ack_packet(void) > +{ > + return false; > +} > +ALLOW_ERROR_INJECTION(ssh_ptl_should_drop_ack_packet, TRUE); > + > +/** > + * ssh_ptl_should_drop_nak_packet() - Error injection hook to drop NAK packets. > + * > + * Useful to test/force automated (timeout-based) re-transmit by the EC. > + * Specifically, packets that have not reached the driver completely/with valid > + * checksums. Only useful in combination with receival of (injected) bad data. > + */ > +static noinline bool ssh_ptl_should_drop_nak_packet(void) > +{ > + return false; > +} > +ALLOW_ERROR_INJECTION(ssh_ptl_should_drop_nak_packet, TRUE); > + > +/** > + * ssh_ptl_should_drop_dsq_packet() - Error injection hook to drop sequenced > + * data packet. > + * > + * Useful to test re-transmit timeout of the driver. If the data packet has not > + * been ACKed after a certain time, the driver should re-transmit the packet up > + * to limited number of times defined in SSH_PTL_MAX_PACKET_TRIES. > + */ > +static noinline bool ssh_ptl_should_drop_dsq_packet(void) > +{ > + return false; > +} > +ALLOW_ERROR_INJECTION(ssh_ptl_should_drop_dsq_packet, TRUE); > + > +/** > + * ssh_ptl_should_fail_write() - Error injection hook to make > + * serdev_device_write() fail. > + * > + * Hook to simulate errors in serdev_device_write when transmitting packets. > + */ > +static noinline int ssh_ptl_should_fail_write(void) > +{ > + return 0; > +} > +ALLOW_ERROR_INJECTION(ssh_ptl_should_fail_write, ERRNO); > + > +/** > + * ssh_ptl_should_corrupt_tx_data() - Error injection hook to simulate invalid > + * data being sent to the EC. > + * > + * Hook to simulate corrupt/invalid data being sent from host (driver) to EC. > + * Causes the packet data to be actively corrupted by overwriting it with > + * pre-defined values, such that it becomes invalid, causing the EC to respond > + * with a NAK packet. Useful to test handling of NAK packets received by the > + * driver. > + */ > +static noinline bool ssh_ptl_should_corrupt_tx_data(void) > +{ > + return false; > +} > +ALLOW_ERROR_INJECTION(ssh_ptl_should_corrupt_tx_data, TRUE); > + > +/** > + * ssh_ptl_should_corrupt_rx_syn() - Error injection hook to simulate invalid > + * data being sent by the EC. > + * > + * Hook to simulate invalid SYN bytes, i.e. an invalid start of messages and > + * test handling thereof in the driver. > + */ > +static noinline bool ssh_ptl_should_corrupt_rx_syn(void) > +{ > + return false; > +} > +ALLOW_ERROR_INJECTION(ssh_ptl_should_corrupt_rx_syn, TRUE); > + > +/** > + * ssh_ptl_should_corrupt_rx_data() - Error injection hook to simulate invalid > + * data being sent by the EC. > + * > + * Hook to simulate invalid data/checksum of the message frame and test handling > + * thereof in the driver. > + */ > +static noinline bool ssh_ptl_should_corrupt_rx_data(void) > +{ > + return false; > +} > +ALLOW_ERROR_INJECTION(ssh_ptl_should_corrupt_rx_data, TRUE); > + > +static bool __ssh_ptl_should_drop_ack_packet(struct ssh_packet *packet) > +{ > + if (likely(!ssh_ptl_should_drop_ack_packet())) > + return false; > + > + trace_ssam_ei_tx_drop_ack_packet(packet); > + ptl_info(packet->ptl, "packet error injection: dropping ACK packet %p\n", > + packet); > + > + return true; > +} > + > +static bool __ssh_ptl_should_drop_nak_packet(struct ssh_packet *packet) > +{ > + if (likely(!ssh_ptl_should_drop_nak_packet())) > + return false; > + > + trace_ssam_ei_tx_drop_nak_packet(packet); > + ptl_info(packet->ptl, "packet error injection: dropping NAK packet %p\n", > + packet); > + > + return true; > +} > + > +static bool __ssh_ptl_should_drop_dsq_packet(struct ssh_packet *packet) > +{ > + if (likely(!ssh_ptl_should_drop_dsq_packet())) > + return false; > + > + trace_ssam_ei_tx_drop_dsq_packet(packet); > + ptl_info(packet->ptl, > + "packet error injection: dropping sequenced data packet %p\n", > + packet); > + > + return true; > +} > + > +static bool ssh_ptl_should_drop_packet(struct ssh_packet *packet) > +{ > + /* Ignore packets that don't carry any data (i.e. flush). */ > + if (!packet->data.ptr || !packet->data.len) > + return false; > + > + switch (packet->data.ptr[SSH_MSGOFFSET_FRAME(type)]) { > + case SSH_FRAME_TYPE_ACK: > + return __ssh_ptl_should_drop_ack_packet(packet); > + > + case SSH_FRAME_TYPE_NAK: > + return __ssh_ptl_should_drop_nak_packet(packet); > + > + case SSH_FRAME_TYPE_DATA_SEQ: > + return __ssh_ptl_should_drop_dsq_packet(packet); > + > + default: > + return false; > + } > +} > + > +static int ssh_ptl_write_buf(struct ssh_ptl *ptl, struct ssh_packet *packet, > + const unsigned char *buf, size_t count) > +{ > + int status; > + > + status = ssh_ptl_should_fail_write(); > + if (unlikely(status)) { > + trace_ssam_ei_tx_fail_write(packet, status); > + ptl_info(packet->ptl, > + "packet error injection: simulating transmit error %d, packet %p\n", > + status, packet); > + > + return status; > + } > + > + return serdev_device_write_buf(ptl->serdev, buf, count); > +} > + > +static void ssh_ptl_tx_inject_invalid_data(struct ssh_packet *packet) > +{ > + /* Ignore packets that don't carry any data (i.e. flush). */ > + if (!packet->data.ptr || !packet->data.len) > + return; > + > + /* Only allow sequenced data packets to be modified. */ > + if (packet->data.ptr[SSH_MSGOFFSET_FRAME(type)] != SSH_FRAME_TYPE_DATA_SEQ) > + return; > + > + if (likely(!ssh_ptl_should_corrupt_tx_data())) > + return; > + > + trace_ssam_ei_tx_corrupt_data(packet); > + ptl_info(packet->ptl, > + "packet error injection: simulating invalid transmit data on packet %p\n", > + packet); > + > + /* > + * NB: The value 0xb3 has been chosen more or less randomly so that it > + * doesn't have any (major) overlap with the SYN bytes (aa 55) and is > + * non-trivial (i.e. non-zero, non-0xff). > + */ > + memset(packet->data.ptr, 0xb3, packet->data.len); > +} > + > +static void ssh_ptl_rx_inject_invalid_syn(struct ssh_ptl *ptl, > + struct ssam_span *data) > +{ > + struct ssam_span frame; > + > + /* Check if there actually is something to corrupt. */ > + if (!sshp_find_syn(data, &frame)) > + return; > + > + if (likely(!ssh_ptl_should_corrupt_rx_syn())) > + return; > + > + trace_ssam_ei_rx_corrupt_syn(data->len); > + > + data->ptr[1] = 0xb3; /* Set second byte of SYN to "random" value. */ > +} > + > +static void ssh_ptl_rx_inject_invalid_data(struct ssh_ptl *ptl, > + struct ssam_span *frame) > +{ > + size_t payload_len, message_len; > + struct ssh_frame *sshf; > + > + /* Ignore incomplete messages, will get handled once it's complete. */ > + if (frame->len < SSH_MESSAGE_LENGTH(0)) > + return; > + > + /* Ignore incomplete messages, part 2. */ > + payload_len = get_unaligned_le16(&frame->ptr[SSH_MSGOFFSET_FRAME(len)]); > + message_len = SSH_MESSAGE_LENGTH(payload_len); > + if (frame->len < message_len) > + return; > + > + if (likely(!ssh_ptl_should_corrupt_rx_data())) > + return; > + > + sshf = (struct ssh_frame *)&frame->ptr[SSH_MSGOFFSET_FRAME(type)]; > + trace_ssam_ei_rx_corrupt_data(sshf); > + > + /* > + * Flip bits in first byte of payload checksum. This is basically > + * equivalent to a payload/frame data error without us having to worry > + * about (the, arguably pretty small, probability of) accidental > + * checksum collisions. > + */ > + frame->ptr[frame->len - 2] = ~frame->ptr[frame->len - 2]; > +} > + > +#else /* CONFIG_SURFACE_AGGREGATOR_ERROR_INJECTION */ > + > +static inline bool ssh_ptl_should_drop_packet(struct ssh_packet *packet) > +{ > + return false; > +} > + > +static inline int ssh_ptl_write_buf(struct ssh_ptl *ptl, > + struct ssh_packet *packet, > + const unsigned char *buf, > + size_t count) > +{ > + return serdev_device_write_buf(ptl->serdev, buf, count); > +} > + > +static inline void ssh_ptl_tx_inject_invalid_data(struct ssh_packet *packet) > +{ > +} > + > +static inline void ssh_ptl_rx_inject_invalid_syn(struct ssh_ptl *ptl, > + struct ssam_span *data) > +{ > +} > + > +static inline void ssh_ptl_rx_inject_invalid_data(struct ssh_ptl *ptl, > + struct ssam_span *frame) > +{ > +} > + > +#endif /* CONFIG_SURFACE_AGGREGATOR_ERROR_INJECTION */ > + > static void __ssh_ptl_packet_release(struct kref *kref) > { > struct ssh_packet *p = container_of(kref, struct ssh_packet, refcnt); > @@ -769,6 +1050,13 @@ static int ssh_ptl_tx_packet(struct ssh_ptl *ptl, struct ssh_packet *packet) > if (unlikely(!packet->data.ptr)) > return 0; > > + /* Error injection: drop packet to simulate transmission problem. */ > + if (ssh_ptl_should_drop_packet(packet)) > + return 0; > + > + /* Error injection: simulate invalid packet data. */ > + ssh_ptl_tx_inject_invalid_data(packet); > + > ptl_dbg(ptl, "tx: sending data (length: %zu)\n", packet->data.len); > print_hex_dump_debug("tx: ", DUMP_PREFIX_OFFSET, 16, 1, > packet->data.ptr, packet->data.len, false); > @@ -780,7 +1068,7 @@ static int ssh_ptl_tx_packet(struct ssh_ptl *ptl, struct ssh_packet *packet) > buf = packet->data.ptr + offset; > len = packet->data.len - offset; > > - status = serdev_device_write_buf(ptl->serdev, buf, len); > + status = ssh_ptl_write_buf(ptl, packet, buf, len); > if (status < 0) > return status; > > @@ -1389,6 +1677,9 @@ static size_t ssh_ptl_rx_eval(struct ssh_ptl *ptl, struct ssam_span *source) > bool syn_found; > int status; > > + /* Error injection: Modify data to simulate corrupt SYN bytes. */ > + ssh_ptl_rx_inject_invalid_syn(ptl, source); > + > /* Find SYN. */ > syn_found = sshp_find_syn(source, &aligned); > > @@ -1419,6 +1710,9 @@ static size_t ssh_ptl_rx_eval(struct ssh_ptl *ptl, struct ssam_span *source) > if (unlikely(!syn_found)) > return aligned.ptr - source->ptr; > > + /* Error injection: Modify data to simulate corruption. */ > + ssh_ptl_rx_inject_invalid_data(ptl, &aligned); > + > /* Parse and validate frame. */ > status = sshp_parse_frame(&ptl->serdev->dev, &aligned, &frame, &payload, > SSH_PTL_RX_BUF_LEN); > diff --git a/drivers/platform/surface/aggregator/ssh_request_layer.c b/drivers/platform/surface/aggregator/ssh_request_layer.c > index e91d2ed4c173..bf1782c85063 100644 > --- a/drivers/platform/surface/aggregator/ssh_request_layer.c > +++ b/drivers/platform/surface/aggregator/ssh_request_layer.c > @@ -8,6 +8,7 @@ > #include <asm/unaligned.h> > #include <linux/atomic.h> > #include <linux/completion.h> > +#include <linux/error-injection.h> > #include <linux/ktime.h> > #include <linux/limits.h> > #include <linux/list.h> > @@ -58,6 +59,30 @@ > */ > #define SSH_RTL_TX_BATCH 10 > > +#ifdef CONFIG_SURFACE_AGGREGATOR_ERROR_INJECTION > + > +/** > + * ssh_rtl_should_drop_response() - Error injection hook to drop request > + * responses. > + * > + * Useful to cause request transmission timeouts in the driver by dropping the > + * response to a request. > + */ > +static noinline bool ssh_rtl_should_drop_response(void) > +{ > + return false; > +} > +ALLOW_ERROR_INJECTION(ssh_rtl_should_drop_response, TRUE); > + > +#else > + > +static inline bool ssh_rtl_should_drop_response(void) > +{ > + return false; > +} > + > +#endif > + > static u16 ssh_request_get_rqid(struct ssh_request *rqst) > { > return get_unaligned_le16(rqst->packet.data.ptr > @@ -459,6 +484,16 @@ static void ssh_rtl_complete(struct ssh_rtl *rtl, > if (unlikely(ssh_request_get_rqid(p) != rqid)) > continue; > > + /* Simulate response timeout. */ > + if (ssh_rtl_should_drop_response()) { > + spin_unlock(&rtl->pending.lock); > + > + trace_ssam_ei_rx_drop_response(p); > + rtl_info(rtl, "request error injection: dropping response for request %p\n", > + &p->packet); > + return; > + } > + > /* > * Mark as "response received" and "locked" as we're going to > * complete it. > diff --git a/drivers/platform/surface/aggregator/trace.h b/drivers/platform/surface/aggregator/trace.h > index dcca8007d876..eb332bb53ae4 100644 > --- a/drivers/platform/surface/aggregator/trace.h > +++ b/drivers/platform/surface/aggregator/trace.h > @@ -565,6 +565,28 @@ DECLARE_EVENT_CLASS(ssam_pending_class, > TP_ARGS(pending) \ > ) > > +DECLARE_EVENT_CLASS(ssam_data_class, > + TP_PROTO(size_t length), > + > + TP_ARGS(length), > + > + TP_STRUCT__entry( > + __field(size_t, length) > + ), > + > + TP_fast_assign( > + __entry->length = length; > + ), > + > + TP_printk("length=%zu", __entry->length) > +); > + > +#define DEFINE_SSAM_DATA_EVENT(name) \ > + DEFINE_EVENT(ssam_data_class, ssam_##name, \ > + TP_PROTO(size_t length), \ > + TP_ARGS(length) \ > + ) > + > DEFINE_SSAM_FRAME_EVENT(rx_frame_received); > DEFINE_SSAM_COMMAND_EVENT(rx_response_received); > DEFINE_SSAM_COMMAND_EVENT(rx_event_received); > @@ -583,6 +605,15 @@ DEFINE_SSAM_REQUEST_EVENT(request_cancel); > DEFINE_SSAM_REQUEST_STATUS_EVENT(request_complete); > DEFINE_SSAM_PENDING_EVENT(rtl_timeout_reap); > > +DEFINE_SSAM_PACKET_EVENT(ei_tx_drop_ack_packet); > +DEFINE_SSAM_PACKET_EVENT(ei_tx_drop_nak_packet); > +DEFINE_SSAM_PACKET_EVENT(ei_tx_drop_dsq_packet); > +DEFINE_SSAM_PACKET_STATUS_EVENT(ei_tx_fail_write); > +DEFINE_SSAM_PACKET_EVENT(ei_tx_corrupt_data); > +DEFINE_SSAM_DATA_EVENT(ei_rx_corrupt_syn); > +DEFINE_SSAM_FRAME_EVENT(ei_rx_corrupt_data); > +DEFINE_SSAM_REQUEST_EVENT(ei_rx_drop_response); > + > DEFINE_SSAM_ALLOC_EVENT(ctrl_packet_alloc); > DEFINE_SSAM_FREE_EVENT(ctrl_packet_free); > > -- > 2.29.2 >