This is a note to let you know that I've just added the patch titled thunderbolt: Restart XDomain discovery handshake after failure to the 6.5-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: thunderbolt-restart-xdomain-discovery-handshake-after-failure.patch and it can be found in the queue-6.5 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. >From 308092d080852f8997126e5b3507536162416f4a Mon Sep 17 00:00:00 2001 From: Mika Westerberg <mika.westerberg@xxxxxxxxxxxxxxx> Date: Thu, 7 Sep 2023 16:02:30 +0300 Subject: thunderbolt: Restart XDomain discovery handshake after failure From: Mika Westerberg <mika.westerberg@xxxxxxxxxxxxxxx> commit 308092d080852f8997126e5b3507536162416f4a upstream. Alex reported that after rebooting the other host the peer-to-peer link does not come up anymore. The reason for this is that the host that was not rebooted tries to send the UUID request only 10 times according to the USB4 Inter-Domain spec and gives up if it does not get reply. Then when the other side is actually ready it cannot get the link established anymore. The USB4 Inter-Domain spec requires that the discovery protocol is restarted in that case so implement this now. Reported-by: Alex Balcanquall <alex@xxxxxxxxxxx> Fixes: 8e1de7042596 ("thunderbolt: Add support for XDomain lane bonding") Cc: stable@xxxxxxxxxxxxxxx Signed-off-by: Mika Westerberg <mika.westerberg@xxxxxxxxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- drivers/thunderbolt/xdomain.c | 58 +++++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 17 deletions(-) --- a/drivers/thunderbolt/xdomain.c +++ b/drivers/thunderbolt/xdomain.c @@ -703,6 +703,27 @@ out_unlock: mutex_unlock(&xdomain_lock); } +static void start_handshake(struct tb_xdomain *xd) +{ + xd->state = XDOMAIN_STATE_INIT; + queue_delayed_work(xd->tb->wq, &xd->state_work, + msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT)); +} + +/* Can be called from state_work */ +static void __stop_handshake(struct tb_xdomain *xd) +{ + cancel_delayed_work_sync(&xd->properties_changed_work); + xd->properties_changed_retries = 0; + xd->state_retries = 0; +} + +static void stop_handshake(struct tb_xdomain *xd) +{ + cancel_delayed_work_sync(&xd->state_work); + __stop_handshake(xd); +} + static void tb_xdp_handle_request(struct work_struct *work) { struct xdomain_request_work *xw = container_of(work, typeof(*xw), work); @@ -765,6 +786,15 @@ static void tb_xdp_handle_request(struct case UUID_REQUEST: tb_dbg(tb, "%llx: received XDomain UUID request\n", route); ret = tb_xdp_uuid_response(ctl, route, sequence, uuid); + /* + * If we've stopped the discovery with an error such as + * timing out, we will restart the handshake now that we + * received UUID request from the remote host. + */ + if (!ret && xd && xd->state == XDOMAIN_STATE_ERROR) { + dev_dbg(&xd->dev, "restarting handshake\n"); + start_handshake(xd); + } break; case LINK_STATE_STATUS_REQUEST: @@ -1521,6 +1551,13 @@ static void tb_xdomain_queue_properties_ msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT)); } +static void tb_xdomain_failed(struct tb_xdomain *xd) +{ + xd->state = XDOMAIN_STATE_ERROR; + queue_delayed_work(xd->tb->wq, &xd->state_work, + msecs_to_jiffies(XDOMAIN_DEFAULT_TIMEOUT)); +} + static void tb_xdomain_state_work(struct work_struct *work) { struct tb_xdomain *xd = container_of(work, typeof(*xd), state_work.work); @@ -1547,7 +1584,7 @@ static void tb_xdomain_state_work(struct if (ret) { if (ret == -EAGAIN) goto retry_state; - xd->state = XDOMAIN_STATE_ERROR; + tb_xdomain_failed(xd); } else { tb_xdomain_queue_properties_changed(xd); if (xd->bonding_possible) @@ -1612,7 +1649,7 @@ static void tb_xdomain_state_work(struct if (ret) { if (ret == -EAGAIN) goto retry_state; - xd->state = XDOMAIN_STATE_ERROR; + tb_xdomain_failed(xd); } else { xd->state = XDOMAIN_STATE_ENUMERATED; } @@ -1623,6 +1660,8 @@ static void tb_xdomain_state_work(struct break; case XDOMAIN_STATE_ERROR: + dev_dbg(&xd->dev, "discovery failed, stopping handshake\n"); + __stop_handshake(xd); break; default: @@ -1833,21 +1872,6 @@ static void tb_xdomain_release(struct de kfree(xd); } -static void start_handshake(struct tb_xdomain *xd) -{ - xd->state = XDOMAIN_STATE_INIT; - queue_delayed_work(xd->tb->wq, &xd->state_work, - msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT)); -} - -static void stop_handshake(struct tb_xdomain *xd) -{ - cancel_delayed_work_sync(&xd->properties_changed_work); - cancel_delayed_work_sync(&xd->state_work); - xd->properties_changed_retries = 0; - xd->state_retries = 0; -} - static int __maybe_unused tb_xdomain_suspend(struct device *dev) { stop_handshake(tb_to_xdomain(dev)); Patches currently in stable-queue which might be from mika.westerberg@xxxxxxxxxxxxxxx are queue-6.5/thunderbolt-workaround-an-iommu-fault-on-certain-systems-with-intel-maple-ridge.patch queue-6.5/thunderbolt-restart-xdomain-discovery-handshake-after-failure.patch queue-6.5/thunderbolt-correct-tmu-mode-initialization-from-hardware.patch queue-6.5/thunderbolt-check-that-lane-1-is-in-cl0-before-enabling-lane-bonding.patch