Re: [PATCH V16 08/23] mmc: core: Support UHS-II Auto Command Error Recovery

Adrian Hunter <adrian.hunter@xxxxxxxxx> · Fri, 31 May 2024 14:23:09 +0300

On 31/05/24 13:31, Victor Shih wrote:
> On Fri, May 24, 2024 at 2:54 PM Adrian Hunter <adrian.hunter@xxxxxxxxx> wrote:
>>
>> On 22/05/24 14:08, Victor Shih wrote:
>>> From: Victor Shih <victor.shih@xxxxxxxxxxxxxxxxxxx>
>>>
>>> Add UHS-II Auto Command Error Recovery functionality
>>> into the MMC request processing flow.
>>
>> Not sure what "auto" means here, but the commit message
>> should outline what the spec. requires for error recovery.
>>
> 
> Hi, Adrian
> 
>      I will add instructions in the v17 version.
> 
> Thanks, Victor Shih
> 
>>>
>>> Signed-off-by: Ben Chuang <ben.chuang@xxxxxxxxxxxxxxxxxxx>
>>> Signed-off-by: Victor Shih <victor.shih@xxxxxxxxxxxxxxxxxxx>
>>> ---
>>>
>>> Updates in V16:
>>>  - Separate the Error Recovery mechanism from patch#7 to patch#8.
>>>
>>> ---
>>>
>>>  drivers/mmc/core/core.c    |  4 ++
>>>  drivers/mmc/core/core.h    |  1 +
>>>  drivers/mmc/core/sd_uhs2.c | 80 ++++++++++++++++++++++++++++++++++++++
>>>  include/linux/mmc/host.h   |  6 +++
>>>  4 files changed, 91 insertions(+)
>>>
>>> diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
>>> index 68496c51a521..18642afc405f 100644
>>> --- a/drivers/mmc/core/core.c
>>> +++ b/drivers/mmc/core/core.c
>>> @@ -403,6 +403,10 @@ void mmc_wait_for_req_done(struct mmc_host *host, struct mmc_request *mrq)
>>>       while (1) {
>>>               wait_for_completion(&mrq->completion);
>>>
>>> +             if (host->ops->get_cd(host))
>>> +                     if (mrq->cmd->error || (mrq->data && mrq->data->error))
>>> +                             mmc_sd_uhs2_error_recovery(host, mrq);
>>
>> There are several issues with this:
>>
>> 1. It is not OK to start a request from within the request path
>> because it is recursive:
>>
>>    mmc_wait_for_req_done()                      <--
>>       mmc_sd_uhs2_error_recovery()
>>          sd_uhs2_abort_trans()
>>             mmc_wait_for_cmd()
>>                mmc_wait_for_req()
>>                   mmc_wait_for_req_done()       <--
>>
>> 2. The mmc block driver does not use this path
>>
>> 3. No need to always call ->get_cd() if there is no error
>>
>> It is worth considering whether the host controller could
>> send the abort command as part of the original request, as
>> is done with the stop command.
>>
> 
> Hi, Adrian
> 
>      1. It looks like just issuing a command in
> mmc_wait_for_req_done() will cause a recursion.
>          I will drop sd_uhs2_abort_trans() and
> sd_uhs2_abort_status_read() in the v17 version.
>      2. I have no idea about this part, could you please give me some advice?

The mmc block driver sets the ->done() callback and so
mmc_wait_for_req_done() is never called for data transfers.

That won't matter if the host controller handles doing
the abort command, as was suggested elsewhere.

>      3. I will try to modify this part in the v17 version.
> 
> Thanks, Victor Shih
> 
>>> +
>>>               cmd = mrq->cmd;
>>>
>>>               if (!cmd->error || !cmd->retries ||
>>> diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
>>> index 920323faa834..259d47c8bb19 100644
>>> --- a/drivers/mmc/core/core.h
>>> +++ b/drivers/mmc/core/core.h
>>> @@ -82,6 +82,7 @@ int mmc_attach_mmc(struct mmc_host *host);
>>>  int mmc_attach_sd(struct mmc_host *host);
>>>  int mmc_attach_sdio(struct mmc_host *host);
>>>  int mmc_attach_sd_uhs2(struct mmc_host *host);
>>> +void mmc_sd_uhs2_error_recovery(struct mmc_host *mmc, struct mmc_request *mrq);
>>>
>>>  /* Module parameters */
>>>  extern bool use_spi_crc;
>>> diff --git a/drivers/mmc/core/sd_uhs2.c b/drivers/mmc/core/sd_uhs2.c
>>> index 85939a2582dc..d5acb4e6ccac 100644
>>> --- a/drivers/mmc/core/sd_uhs2.c
>>> +++ b/drivers/mmc/core/sd_uhs2.c
>>> @@ -1324,3 +1324,83 @@ int mmc_attach_sd_uhs2(struct mmc_host *host)
>>>
>>>       return err;
>>>  }
>>> +
>>> +static void sd_uhs2_abort_trans(struct mmc_host *mmc)
>>> +{
>>> +     struct mmc_request mrq = {};
>>> +     struct mmc_command cmd = {0};
>>> +     struct uhs2_command uhs2_cmd = {};
>>> +     int err;
>>> +
>>> +     mrq.cmd = &cmd;
>>> +     mmc->ongoing_mrq = &mrq;
>>> +
>>> +     uhs2_cmd.header = UHS2_NATIVE_PACKET | UHS2_PACKET_TYPE_CCMD |
>>> +                       mmc->card->uhs2_config.node_id;
>>> +     uhs2_cmd.arg = ((UHS2_DEV_CMD_TRANS_ABORT & 0xFF) << 8) |
>>> +                     UHS2_NATIVE_CMD_WRITE |
>>> +                     (UHS2_DEV_CMD_TRANS_ABORT >> 8);
>>> +
>>> +     sd_uhs2_cmd_assemble(&cmd, &uhs2_cmd, 0, 0);
>>> +     err = mmc_wait_for_cmd(mmc, &cmd, 0);
>>> +
>>> +     if (err)
>>> +             pr_err("%s: %s: UHS2 CMD send fail, err= 0x%x!\n",
>>> +                    mmc_hostname(mmc), __func__, err);
>>> +}
>>> +
>>> +static void sd_uhs2_abort_status_read(struct mmc_host *mmc)
>>> +{
>>> +     struct mmc_request mrq = {};
>>> +     struct mmc_command cmd = {0};
>>> +     struct uhs2_command uhs2_cmd = {};
>>> +     int err;
>>> +
>>> +     mrq.cmd = &cmd;
>>> +     mmc->ongoing_mrq = &mrq;
>>> +
>>> +     uhs2_cmd.header = UHS2_NATIVE_PACKET |
>>> +                       UHS2_PACKET_TYPE_CCMD |
>>> +                       mmc->card->uhs2_config.node_id;
>>> +     uhs2_cmd.arg = ((UHS2_DEV_STATUS_REG & 0xFF) << 8) |
>>> +                     UHS2_NATIVE_CMD_READ |
>>> +                     UHS2_NATIVE_CMD_PLEN_4B |
>>> +                     (UHS2_DEV_STATUS_REG >> 8);
>>> +
>>> +     sd_uhs2_cmd_assemble(&cmd, &uhs2_cmd, 0, 0);
>>> +     err = mmc_wait_for_cmd(mmc, &cmd, 0);
>>> +
>>> +     if (err)
>>> +             pr_err("%s: %s: UHS2 CMD send fail, err= 0x%x!\n",
>>> +                    mmc_hostname(mmc), __func__, err);
>>> +}
>>> +
>>> +void mmc_sd_uhs2_error_recovery(struct mmc_host *mmc, struct mmc_request *mrq)
>>> +{
>>> +     mmc->ops->uhs2_reset_cmd_data(mmc);
>>
>> The host controller should already have done any resets needed.
>> sdhci already has support for doing that - see host->pending_reset
>>
> 
> Hi, Adrian
> 
>      I'm not sure what this means. Could you please give me more information?

sdhci_uhs2_request_done() checks sdhci_needs_reset() and does
sdhci_uhs2_reset().

sdhci_needs_reset() does not cater for data errors because
the reset for data errors is done directly in what becomes
__sdhci_finish_data_common().

You may need to:
 1. add a parameter to __sdhci_finish_data_common() to
 skip doing the sdhci reset and instead set
 host->pending_reset
 2. amend sdhci_uhs2_request_done() to check for data error
 also to decide if a reset is needed

> 
> Thanks, Victor Shih
> 
>>> +
>>> +     if (mrq->data) {
>>> +             if (mrq->data->error && mmc_card_uhs2(mmc)) {
>>> +                     if (mrq->cmd) {
>>> +                             switch (mrq->cmd->error) {
>>> +                             case ETIMEDOUT:
>>> +                             case EILSEQ:
>>> +                             case EIO:
>>> +                                     sd_uhs2_abort_trans(mmc);
>>> +                                     sd_uhs2_abort_status_read(mmc);
>>
>> What is the purpose of sd_uhs2_abort_status_read() here?
>> It is not obvious it does anything.
>>
> 
> Hi, Adrian
> 
>      sd_uhs2_abort_status_read() seems to only have read status,
>      I will drop this in the v17 version.
> 
> Thanks, Victor Shih
> 
>>> +                                     break;
>>> +                             default:
>>> +                                     break;
>>> +                             }
>>> +                     }
>>> +             }
>>> +     } else {
>>> +             if (mrq->cmd) {
>>> +                     switch (mrq->cmd->error) {
>>> +                     case ETIMEDOUT:
>>> +                             sd_uhs2_abort_trans(mmc);
>>> +                             break;
>>> +                     }
>>> +             }
>>> +     }
>>> +}
>>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
>>> index fc9520b3bfa4..c914a58f7e1e 100644
>>> --- a/include/linux/mmc/host.h
>>> +++ b/include/linux/mmc/host.h
>>> @@ -271,6 +271,12 @@ struct mmc_host_ops {
>>>        * negative errno in case of a failure or zero for success.
>>>        */
>>>       int     (*uhs2_control)(struct mmc_host *host, enum sd_uhs2_operation op);
>>> +
>>> +     /*
>>> +      * The uhs2_reset_cmd_data callback is used to excute reset
>>> +      * when a auto command error occurs.
>>> +      */
>>> +     void    (*uhs2_reset_cmd_data)(struct mmc_host *host);
>>>  };
>>>
>>>  struct mmc_cqe_ops {
>>