On 8/22/24 15:02, Mathias Nyman wrote: > Don't flush all pending DbC data requests when an endpoint halts. > > An endpoint may halt and xHC DbC trigger a STALL error event if there's an trigger -> triggers > issue with a bulk data transfer. The transfer should restart once xHC DbC > receives a ClearFeature(ENDPOINT_HALT) request from the host. > > Once xHC DbC restarts it will start from the TRB pointed to by dequeue > field in the endpoint context, which might be the same TRB we got the > STALL event for. Turn the TRB to a no-op in this case to make sure xHC > DbC doesn't reuse and tries to retransmit this same TRB after we already > handled it, and gave its corresponding data request back. > > Other STALL events might be completely bogus. > Lukasz Bartosik discovered that xHC DbC might issue spurious STALL events > if hosts sends a ClearFeature(ENDPOINT_HALT) request to non-halted > endpoints even without any active bulk tranfers. > tranfers - > transfers > Assume STALL event is spurious if it reports 0 bytes tranferred, and tranferred -> transferred > the endpoint stopped on the STALLED TRB. > Don't give back the data request corresponding to the TRB in this case. > > The halted status is per endpoint. Track it with a per endpoint flag > instead of the driver invented DbC wide DS_STALLED state. > DbC remains in DbC-Configured state even if endpoints halt. There is no > Stalled state in the DbC Port state Machine (xhci section 7.6.6) > > Reported-by: Łukasz Bartosik <ukaszb@xxxxxxxxxxxx> > Closes: https://lore.kernel.org/linux-usb/20240725074857.623299-1-ukaszb@xxxxxxxxxxxx/ > Signed-off-by: Mathias Nyman <mathias.nyman@xxxxxxxxxxxxxxx> > --- Tested-by: Łukasz Bartosik <ukaszb@xxxxxxxxxxxx> In the trace below stall errors happen and even though the short packet is processed instead of being dropped as it happened previously without this fix. # tracer: nop # # entries-in-buffer/entries-written: 23/23 #P:12 # # _-----=> irqs-off/BH-disabled # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / _-=> migrate-disable # |||| / delay # TASK-PID CPU# ||||| TIMESTAMP FUNCTION # | | | ||||| | | kworker/6:1-460 [006] d..1. 394.400680: xhci_dbc_handle_event: EVENT: TRB 000000018451a000 status 'Stall Error' len 0 slot 1 ep 2 type 'Transfer Event' flags e:C kworker/6:1-460 [006] d..1. 394.406346: xhci_dbc_handle_event: EVENT: TRB 000000018451e000 status 'Stall Error' len 1024 slot 1 ep 3 type 'Transfer Event' flags e:C kworker/6:1-460 [006] d..1. 394.406346: xhci_dbc_handle_transfer: BULK: Buffer 00000002663a4400 length 1024 TD size 0 intr 0 type 'Normal' flags b:i:I:c:s:i:e:C kworker/6:1-460 [006] d..1. 394.423119: xhci_dbc_handle_event: EVENT: TRB 000000018451e000 status 'Short Packet' len 1000 slot 1 ep 3 type 'Transfer Event' flags e:C kworker/6:1-460 [006] d..1. 394.423120: xhci_dbc_handle_transfer: BULK: Buffer 00000002663a4400 length 1024 TD size 0 intr 0 type 'Normal' flags b:i:I:c:s:i:e:C kworker/6:1-460 [006] d..1. 394.423121: xhci_dbc_giveback_request: bulk-in: req 000000008837f33e length 24/1024 ==> -6 kworker/6:1-460 [006] d..1. 394.423123: xhci_dbc_gadget_ep_queue: BULK: Buffer 00000002663a4400 length 1024 TD size 0 intr 0 type 'Normal' flags b:i:I:c:s:i:e:c kworker/6:1-460 [006] d.... 394.423124: xhci_dbc_queue_request: bulk-in: req 000000008837f33e length 0/1024 ==> -115 kworker/6:1-460 [006] d..1. 394.423124: xhci_dbc_handle_event: EVENT: TRB 000000018451e010 status 'Short Packet' len 733 slot 1 ep 3 type 'Transfer Event' flags e:C kworker/6:1-460 [006] d..1. 394.423124: xhci_dbc_handle_transfer: BULK: Buffer 00000002663a4800 length 1024 TD size 0 intr 0 type 'Normal' flags b:i:I:c:s:i:e:C kworker/6:1-460 [006] d..1. 394.423124: xhci_dbc_giveback_request: bulk-in: req 000000004ed9eda1 length 291/1024 ==> 0 kworker/6:1-460 [006] d..1. 394.423125: xhci_dbc_gadget_ep_queue: BULK: Buffer 00000002663a4800 length 1024 TD size 0 intr 0 type 'Normal' flags b:i:I:c:s:i:e:c kworker/6:1-460 [006] d.... 394.423125: xhci_dbc_queue_request: bulk-in: req 000000004ed9eda1 length 0/1024 ==> -115 sock->usb-8395 [004] d..1. 394.423930: xhci_dbc_gadget_ep_queue: BULK: Buffer 00000002663a3400 length 24 TD size 0 intr 0 type 'Normal' flags b:i:I:c:s:i:e:c sock->usb-8395 [004] d.... 394.423931: xhci_dbc_queue_request: bulk-out: req 0000000092869359 length 0/24 ==> -115 kworker/6:1-460 [006] d..1. 394.423939: xhci_dbc_handle_event: EVENT: TRB 000000018451a000 status 'Success' len 0 slot 1 ep 2 type 'Transfer Event' flags e:C kworker/6:1-460 [006] d..1. 394.423939: xhci_dbc_handle_transfer: BULK: Buffer 00000002663a3400 length 24 TD size 0 intr 0 type 'Normal' flags b:i:I:c:s:i:e:C kworker/6:1-460 [006] d..1. 394.423940: xhci_dbc_giveback_request: bulk-out: req 0000000092869359 length 24/24 ==> 0 sock->usb-8395 [004] d..1. 394.423951: xhci_dbc_gadget_ep_queue: BULK: Buffer 00000002663a3400 length 371 TD size 0 intr 0 type 'Normal' flags b:i:I:c:s:i:e:c sock->usb-8395 [004] d.... 394.423951: xhci_dbc_queue_request: bulk-out: req 0000000092869359 length 0/371 ==> -115 kworker/6:1-460 [006] d..1. 394.424094: xhci_dbc_handle_event: EVENT: TRB 000000018451a010 status 'Success' len 0 slot 1 ep 2 type 'Transfer Event' flags e:C kworker/6:1-460 [006] d..1. 394.424094: xhci_dbc_handle_transfer: BULK: Buffer 00000002663a3400 length 371 TD size 0 intr 0 type 'Normal' flags b:i:I:c:s:i:e:C kworker/6:1-460 [006] d..1. 394.424094: xhci_dbc_giveback_request: bulk-out: req 0000000092869359 length 371/371 ==> 0 > drivers/usb/host/xhci-dbgcap.c | 133 ++++++++++++++++++++------------- > drivers/usb/host/xhci-dbgcap.h | 2 +- > 2 files changed, 83 insertions(+), 52 deletions(-) > > diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c > index 161c09953c4e..241d7aa1fbc2 100644 > --- a/drivers/usb/host/xhci-dbgcap.c > +++ b/drivers/usb/host/xhci-dbgcap.c > @@ -173,16 +173,18 @@ static void xhci_dbc_giveback(struct dbc_request *req, int status) > spin_lock(&dbc->lock); > } > > -static void xhci_dbc_flush_single_request(struct dbc_request *req) > +static void trb_to_noop(union xhci_trb *trb) > { > - union xhci_trb *trb = req->trb; > - > trb->generic.field[0] = 0; > trb->generic.field[1] = 0; > trb->generic.field[2] = 0; > trb->generic.field[3] &= cpu_to_le32(TRB_CYCLE); > trb->generic.field[3] |= cpu_to_le32(TRB_TYPE(TRB_TR_NOOP)); > +} > > +static void xhci_dbc_flush_single_request(struct dbc_request *req) > +{ > + trb_to_noop(req->trb); > xhci_dbc_giveback(req, -ESHUTDOWN); > } > > @@ -649,7 +651,6 @@ static void xhci_dbc_stop(struct xhci_dbc *dbc) > case DS_DISABLED: > return; > case DS_CONFIGURED: > - case DS_STALLED: > if (dbc->driver->disconnect) > dbc->driver->disconnect(dbc); > break; > @@ -669,6 +670,23 @@ static void xhci_dbc_stop(struct xhci_dbc *dbc) > pm_runtime_put_sync(dbc->dev); /* note, was self.controller */ > } > > +static void > +handle_ep_halt_changes(struct xhci_dbc *dbc, struct dbc_ep *dep, bool halted) > +{ > + if (halted) { > + dev_info(dbc->dev, "DbC Endpoint halted\n"); > + dep->halted = 1; > + > + } else if (dep->halted) { > + dev_info(dbc->dev, "DbC Endpoint halt cleared\n"); > + dep->halted = 0; > + > + if (!list_empty(&dep->list_pending)) > + writel(DBC_DOOR_BELL_TARGET(dep->direction), > + &dbc->regs->doorbell); > + } > +} > + > static void > dbc_handle_port_status(struct xhci_dbc *dbc, union xhci_trb *event) > { > @@ -697,6 +715,7 @@ static void dbc_handle_xfer_event(struct xhci_dbc *dbc, union xhci_trb *event) > struct xhci_ring *ring; > int ep_id; > int status; > + struct xhci_ep_ctx *ep_ctx; > u32 comp_code; > size_t remain_length; > struct dbc_request *req = NULL, *r; > @@ -706,8 +725,30 @@ static void dbc_handle_xfer_event(struct xhci_dbc *dbc, union xhci_trb *event) > ep_id = TRB_TO_EP_ID(le32_to_cpu(event->generic.field[3])); > dep = (ep_id == EPID_OUT) ? > get_out_ep(dbc) : get_in_ep(dbc); > + ep_ctx = (ep_id == EPID_OUT) ? > + dbc_bulkout_ctx(dbc) : dbc_bulkin_ctx(dbc); > ring = dep->ring; > > + /* Match the pending request: */ > + list_for_each_entry(r, &dep->list_pending, list_pending) { > + if (r->trb_dma == event->trans_event.buffer) { > + req = r; > + break; > + } > + if (r->status == -COMP_STALL_ERROR) { > + dev_warn(dbc->dev, "Give back stale stalled req\n"); > + ring->num_trbs_free++; > + xhci_dbc_giveback(r, 0); > + } > + } > + > + if (!req) { > + dev_warn(dbc->dev, "no matched request\n"); > + return; > + } > + > + trace_xhci_dbc_handle_transfer(ring, &req->trb->generic); > + > switch (comp_code) { > case COMP_SUCCESS: > remain_length = 0; > @@ -718,31 +759,49 @@ static void dbc_handle_xfer_event(struct xhci_dbc *dbc, union xhci_trb *event) > case COMP_TRB_ERROR: > case COMP_BABBLE_DETECTED_ERROR: > case COMP_USB_TRANSACTION_ERROR: > - case COMP_STALL_ERROR: > dev_warn(dbc->dev, "tx error %d detected\n", comp_code); > status = -comp_code; > break; > + case COMP_STALL_ERROR: > + dev_warn(dbc->dev, "Stall error at bulk TRB %llx, remaining %zu, ep deq %llx\n", > + event->trans_event.buffer, remain_length, ep_ctx->deq); > + status = 0; > + dep->halted = 1; > + > + /* > + * xHC DbC may trigger a STALL bulk xfer event when host sends a > + * ClearFeature(ENDPOINT_HALT) request even if there wasn't an > + * active bulk transfer. > + * > + * Don't give back this transfer request as hardware will later > + * start processing TRBs starting from this 'STALLED' TRB, > + * causing TRBs and requests to be out of sync. > + * > + * If STALL event shows some bytes were transferred then assume > + * it's an actual transfer issue and give back the request. > + * In this case mark the TRB as No-Op to avoid hw from using the > + * TRB again. > + */ > + > + if ((ep_ctx->deq & ~TRB_CYCLE) == event->trans_event.buffer) { > + dev_dbg(dbc->dev, "Ep stopped on Stalled TRB\n"); > + if (remain_length == req->length) { > + dev_dbg(dbc->dev, "Spurious stall event, keep req\n"); > + req->status = -COMP_STALL_ERROR; > + req->actual = 0; > + return; > + } > + dev_dbg(dbc->dev, "Give back stalled req, but turn TRB to No-op\n"); > + trb_to_noop(req->trb); > + } > + break; > + > default: > dev_err(dbc->dev, "unknown tx error %d\n", comp_code); > status = -comp_code; > break; > } > > - /* Match the pending request: */ > - list_for_each_entry(r, &dep->list_pending, list_pending) { > - if (r->trb_dma == event->trans_event.buffer) { > - req = r; > - break; > - } > - } > - > - if (!req) { > - dev_warn(dbc->dev, "no matched request\n"); > - return; > - } > - > - trace_xhci_dbc_handle_transfer(ring, &req->trb->generic); > - > ring->num_trbs_free++; > req->actual = req->length - remain_length; > xhci_dbc_giveback(req, status); > @@ -762,7 +821,6 @@ static void inc_evt_deq(struct xhci_ring *ring) > static enum evtreturn xhci_dbc_do_handle_events(struct xhci_dbc *dbc) > { > dma_addr_t deq; > - struct dbc_ep *dep; > union xhci_trb *evt; > u32 ctrl, portsc; > bool update_erdp = false; > @@ -814,43 +872,17 @@ static enum evtreturn xhci_dbc_do_handle_events(struct xhci_dbc *dbc) > return EVT_DISC; > } > > - /* Handle endpoint stall event: */ > + /* Check and handle changes in endpoint halt status */ > ctrl = readl(&dbc->regs->control); > - if ((ctrl & DBC_CTRL_HALT_IN_TR) || > - (ctrl & DBC_CTRL_HALT_OUT_TR)) { > - dev_info(dbc->dev, "DbC Endpoint stall\n"); > - dbc->state = DS_STALLED; > - > - if (ctrl & DBC_CTRL_HALT_IN_TR) { > - dep = get_in_ep(dbc); > - xhci_dbc_flush_endpoint_requests(dep); > - } > - > - if (ctrl & DBC_CTRL_HALT_OUT_TR) { > - dep = get_out_ep(dbc); > - xhci_dbc_flush_endpoint_requests(dep); > - } > - > - return EVT_DONE; > - } > + handle_ep_halt_changes(dbc, get_in_ep(dbc), ctrl & DBC_CTRL_HALT_IN_TR); > + handle_ep_halt_changes(dbc, get_out_ep(dbc), ctrl & DBC_CTRL_HALT_OUT_TR); > > /* Clear DbC run change bit: */ > if (ctrl & DBC_CTRL_DBC_RUN_CHANGE) { > writel(ctrl, &dbc->regs->control); > ctrl = readl(&dbc->regs->control); > } > - > break; > - case DS_STALLED: > - ctrl = readl(&dbc->regs->control); > - if (!(ctrl & DBC_CTRL_HALT_IN_TR) && > - !(ctrl & DBC_CTRL_HALT_OUT_TR) && > - (ctrl & DBC_CTRL_DBC_RUN)) { > - dbc->state = DS_CONFIGURED; > - break; > - } > - > - return EVT_DONE; > default: > dev_err(dbc->dev, "Unknown DbC state %d\n", dbc->state); > break; > @@ -939,7 +971,6 @@ static const char * const dbc_state_strings[DS_MAX] = { > [DS_ENABLED] = "enabled", > [DS_CONNECTED] = "connected", > [DS_CONFIGURED] = "configured", > - [DS_STALLED] = "stalled", > }; > > static ssize_t dbc_show(struct device *dev, > diff --git a/drivers/usb/host/xhci-dbgcap.h b/drivers/usb/host/xhci-dbgcap.h > index 0118c6288a3c..97c5dc290138 100644 > --- a/drivers/usb/host/xhci-dbgcap.h > +++ b/drivers/usb/host/xhci-dbgcap.h > @@ -81,7 +81,6 @@ enum dbc_state { > DS_ENABLED, > DS_CONNECTED, > DS_CONFIGURED, > - DS_STALLED, > DS_MAX > }; > > @@ -90,6 +89,7 @@ struct dbc_ep { > struct list_head list_pending; > struct xhci_ring *ring; > unsigned int direction:1; > + unsigned int halted:1; > }; > > #define DBC_QUEUE_SIZE 16