On Fri, 2023-11-24 at 15:08 +0800, Axe Yang wrote: > Previously, during the MSDC calibration process, a full clock cycle > actually not be covered, which in some cases didn't yield the best > results and could cause CRC errors. This problem is particularly > evident when MSDC is used as an SDIO host. To solve this issue, the > tuning step should be extended from 32 to 64. By using 64-steps > tuning, a complete clock cycle can be covered. > > To illustrate, when tuning 32-steps, if the obtained window has a > hole > near the middle, like this: 0xffc07ff (hex), then the selected delay > will be the 6 (counting from right to left). > > (32 <- 1) > 1111 1111 1100 0000 0000 0111 11(1)1 1111 > > However, if we tune 64-steps, the window obtained may look like this: > 0xfffffffffffc07ff. The final selected delay will be 44, which is > safer as it is further away from the hole: > > (64 <- 1) > 1111 ... (1)111 1111 1111 1111 1111 1100 0000 0000 0111 1111 1111 > > In this case, delay 6 selected through 32-steps tuning is obviously > not optimal, and this delay is closer to the hole, using it would > easily cause CRC problems. > > You will need to add property "mediatek,tune-64-steps" in MSDC dts > node to enable the feature. > > Signed-off-by: Axe Yang <axe.yang@xxxxxxxxxxxx> > --- > drivers/mmc/host/mtk-sd.c | 133 +++++++++++++++++++++++++++--------- > -- > 1 file changed, 97 insertions(+), 36 deletions(-) > > diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c > index 97f7c3d4be6e..aa955240e441 100644 > --- a/drivers/mmc/host/mtk-sd.c > +++ b/drivers/mmc/host/mtk-sd.c > @@ -252,12 +252,16 @@ > > #define MSDC_PAD_TUNE_DATWRDLY GENMASK(4, 0) /* RW > */ > #define MSDC_PAD_TUNE_DATRRDLY GENMASK(12, 8) /* RW */ > +#define MSDC_PAD_TUNE_DATRRDLY2 GENMASK(12, 8) /* RW */ > #define MSDC_PAD_TUNE_CMDRDLY GENMASK(20, 16) /* RW */ > +#define MSDC_PAD_TUNE_CMDRDLY2 GENMASK(20, 16) /* RW */ > #define MSDC_PAD_TUNE_CMDRRDLY GENMASK(26, 22) /* RW */ > #define MSDC_PAD_TUNE_CLKTDLY GENMASK(31, 27) /* RW */ > #define MSDC_PAD_TUNE_RXDLYSEL BIT(15) /* RW */ > #define MSDC_PAD_TUNE_RD_SEL BIT(13) /* RW */ > #define MSDC_PAD_TUNE_CMD_SEL BIT(21) /* RW */ > +#define MSDC_PAD_TUNE_RD2_SEL BIT(13) /* RW */ > +#define MSDC_PAD_TUNE_CMD2_SEL BIT(21) /* RW */ > > #define PAD_DS_TUNE_DLY_SEL BIT(0) /* RW */ > #define PAD_DS_TUNE_DLY1 GENMASK(6, 2) /* RW */ > @@ -325,7 +329,8 @@ > > #define DEFAULT_DEBOUNCE (8) /* 8 cycles CD debounce */ > > -#define PAD_DELAY_MAX 32 /* PAD delay cells */ > +#define PAD_DELAY_HALF 32 /* PAD delay cells */ > +#define PAD_DELAY_FULL 64 > /*---------------------------------------------------------------- > ----------*/ > /* Descriptor > Structure */ > /*---------------------------------------------------------------- > ----------*/ > @@ -465,6 +470,7 @@ struct msdc_host { > /* cmd response sample selection for > HS400 */ > bool hs400_mode; /* current eMMC will run at hs400 mode */ > bool hs400_tuning; /* hs400 mode online tuning */ > + bool tune_64steps; > bool internal_cd; /* Use internal card-detect logic */ > bool cqhci; /* support eMMC hw cmdq */ > struct msdc_save_para save_para; /* used when gate HCLK */ > @@ -1615,7 +1621,7 @@ static irqreturn_t msdc_cmdq_irq(struct > msdc_host *host, u32 intsts) > } > > if (cmd_err || dat_err) { > - dev_err(host->dev, "cmd_err = %d, dat_err =%d, intsts = > 0x%x", > + dev_err(host->dev, "cmd_err = %d, dat_err = %d, intsts > = 0x%x", > cmd_err, dat_err, intsts); > } > > @@ -1780,10 +1786,20 @@ static void msdc_init_hw(struct msdc_host > *host) > DATA_K_VALUE_SEL); > sdr_set_bits(host->top_base + EMMC_TOP_CMD, > PAD_CMD_RD_RXDLY_SEL); > + if (host->tune_64steps) { > + sdr_set_bits(host->top_base + > EMMC_TOP_CONTROL, > + PAD_DAT_RD_RXDLY2_SEL); > + sdr_set_bits(host->top_base + > EMMC_TOP_CMD, > + PAD_CMD_RD_RXDLY2_SEL); > + } > } else { > sdr_set_bits(host->base + tune_reg, > MSDC_PAD_TUNE_RD_SEL | > MSDC_PAD_TUNE_CMD_SEL); > + if (host->tune_64steps) > + sdr_set_bits(host->base + tune_reg + 4, > + MSDC_PAD_TUNE_RD2_SEL | > + MSDC_PAD_TUNE_CMD2_SEL); > } > } else { > /* choose clock tune */ > @@ -1925,24 +1941,24 @@ static void msdc_ops_set_ios(struct mmc_host > *mmc, struct mmc_ios *ios) > msdc_set_mclk(host, ios->timing, ios->clock); > } > > -static u32 test_delay_bit(u32 delay, u32 bit) > +static u64 test_delay_bit(u64 delay, u32 bit) > { > - bit %= PAD_DELAY_MAX; > + bit %= PAD_DELAY_FULL; > return delay & BIT(bit); Since you are testing > 32 bits, you should use BIT_ULL() to instead of BIT(). > } > > -static int get_delay_len(u32 delay, u32 start_bit) > +static int get_delay_len(u64 delay, u32 start_bit) > { > int i; > > - for (i = 0; i < (PAD_DELAY_MAX - start_bit); i++) { > + for (i = 0; i < (PAD_DELAY_FULL - start_bit); i++) { > if (test_delay_bit(delay, start_bit + i) == 0) > return i; > } > - return PAD_DELAY_MAX - start_bit; > + return PAD_DELAY_FULL - start_bit; > } > > -static struct msdc_delay_phase get_best_delay(struct msdc_host > *host, u32 delay) > +static struct msdc_delay_phase get_best_delay(struct msdc_host > *host, u64 delay) > { > int start = 0, len = 0; > int start_final = 0, len_final = 0; > @@ -1950,28 +1966,28 @@ static struct msdc_delay_phase > get_best_delay(struct msdc_host *host, u32 delay) > struct msdc_delay_phase delay_phase = { 0, }; > > if (delay == 0) { > - dev_err(host->dev, "phase error: [map:%x]\n", delay); > + dev_err(host->dev, "phase error: [map:%llx]\n", delay); > delay_phase.final_phase = final_phase; > return delay_phase; > } > > - while (start < PAD_DELAY_MAX) { > + while (start < PAD_DELAY_FULL) { > len = get_delay_len(delay, start); > if (len_final < len) { > start_final = start; > len_final = len; > } > start += len ? len : 1; > - if (len >= 12 && start_final < 4) > + if (!upper_32_bits(delay) && len >= 12 && start_final < > 4) > break; > } > > /* The rule is that to find the smallest delay cell */ > if (start_final == 0) > - final_phase = (start_final + len_final / 3) % > PAD_DELAY_MAX; > + final_phase = (start_final + len_final / 3) % > PAD_DELAY_FULL; > else > - final_phase = (start_final + len_final / 2) % > PAD_DELAY_MAX; > - dev_dbg(host->dev, "phase: [map:%x] [maxlen:%d] [final:%d]\n", > + final_phase = (start_final + len_final / 2) % > PAD_DELAY_FULL; > + dev_dbg(host->dev, "phase: [map:%llx] [maxlen:%d] > [final:%d]\n", > delay, len_final, final_phase); > > delay_phase.maxlen = len_final; > @@ -1984,24 +2000,62 @@ static inline void msdc_set_cmd_delay(struct > msdc_host *host, u32 value) > { > u32 tune_reg = host->dev_comp->pad_tune_reg; > > - if (host->top_base) > - sdr_set_field(host->top_base + EMMC_TOP_CMD, > PAD_CMD_RXDLY, > - value); > - else > - sdr_set_field(host->base + tune_reg, > MSDC_PAD_TUNE_CMDRDLY, > - value); > + if (host->top_base) { > + if (value < PAD_DELAY_HALF) { > + sdr_set_field(host->top_base + EMMC_TOP_CMD, > PAD_CMD_RXDLY, > + value); > + sdr_set_field(host->top_base + EMMC_TOP_CMD, > PAD_CMD_RXDLY2, > + 0); > + } else { > + sdr_set_field(host->top_base + EMMC_TOP_CMD, > PAD_CMD_RXDLY, > + PAD_DELAY_HALF - 1); > + sdr_set_field(host->top_base + EMMC_TOP_CMD, > PAD_CMD_RXDLY2, > + value - PAD_DELAY_HALF); > + } > + } else { > + if (value < PAD_DELAY_HALF) { > + sdr_set_field(host->base + tune_reg, > MSDC_PAD_TUNE_CMDRDLY, > + value); > + sdr_set_field(host->base + tune_reg + 4, > MSDC_PAD_TUNE_CMDRDLY2, > + 0); > + } else { > + sdr_set_field(host->base + tune_reg, > MSDC_PAD_TUNE_CMDRDLY, > + PAD_DELAY_HALF - 1); > + sdr_set_field(host->base + tune_reg + 4, > MSDC_PAD_TUNE_CMDRDLY2, > + value - PAD_DELAY_HALF); > + } > + } > } > > static inline void msdc_set_data_delay(struct msdc_host *host, u32 > value) > { > u32 tune_reg = host->dev_comp->pad_tune_reg; > > - if (host->top_base) > - sdr_set_field(host->top_base + EMMC_TOP_CONTROL, > - PAD_DAT_RD_RXDLY, value); > - else > - sdr_set_field(host->base + tune_reg, > MSDC_PAD_TUNE_DATRRDLY, > - value); > + if (host->top_base) { > + if (value < PAD_DELAY_HALF) { > + sdr_set_field(host->top_base + > EMMC_TOP_CONTROL, > + PAD_DAT_RD_RXDLY, value); > + sdr_set_field(host->top_base + > EMMC_TOP_CONTROL, > + PAD_DAT_RD_RXDLY2, 0); > + } else { > + sdr_set_field(host->top_base + > EMMC_TOP_CONTROL, > + PAD_DAT_RD_RXDLY, PAD_DELAY_HALF > - 1); > + sdr_set_field(host->top_base + > EMMC_TOP_CONTROL, > + PAD_DAT_RD_RXDLY2, value - > PAD_DELAY_HALF); > + } > + } else { > + if (value < PAD_DELAY_HALF) { > + sdr_set_field(host->base + tune_reg, > MSDC_PAD_TUNE_DATRRDLY, > + value); > + sdr_set_field(host->base + tune_reg + 4, > MSDC_PAD_TUNE_DATRRDLY2, > + 0); > + } else { > + sdr_set_field(host->base + tune_reg, > MSDC_PAD_TUNE_DATRRDLY, > + PAD_DELAY_HALF - 1); > + sdr_set_field(host->base + tune_reg + 4, > MSDC_PAD_TUNE_DATRRDLY2, > + value - PAD_DELAY_HALF); > + } > + } > } > > static int msdc_tune_response(struct mmc_host *mmc, u32 opcode) > @@ -2023,7 +2077,7 @@ static int msdc_tune_response(struct mmc_host > *mmc, u32 opcode) > host->hs200_cmd_int_delay); > > sdr_clr_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL); > - for (i = 0 ; i < PAD_DELAY_MAX; i++) { > + for (i = 0 ; i < PAD_DELAY_HALF; i++) { > msdc_set_cmd_delay(host, i); > /* > * Using the same parameters, it may sometimes pass the > test, > @@ -2047,7 +2101,7 @@ static int msdc_tune_response(struct mmc_host > *mmc, u32 opcode) > goto skip_fall; > > sdr_set_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL); > - for (i = 0; i < PAD_DELAY_MAX; i++) { > + for (i = 0; i < PAD_DELAY_HALF; i++) { > msdc_set_cmd_delay(host, i); > /* > * Using the same parameters, it may sometimes pass the > test, > @@ -2082,7 +2136,7 @@ static int msdc_tune_response(struct mmc_host > *mmc, u32 opcode) > if (host->dev_comp->async_fifo || host->hs200_cmd_int_delay) > goto skip_internal; > > - for (i = 0; i < PAD_DELAY_MAX; i++) { > + for (i = 0; i < PAD_DELAY_HALF; i++) { > sdr_set_field(host->base + tune_reg, > MSDC_PAD_TUNE_CMDRRDLY, i); > mmc_send_tuning(mmc, opcode, &cmd_err); > @@ -2121,7 +2175,7 @@ static int hs400_tune_response(struct mmc_host > *mmc, u32 opcode) > sdr_clr_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL); > else > sdr_set_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL); > - for (i = 0 ; i < PAD_DELAY_MAX; i++) { > + for (i = 0 ; i < PAD_DELAY_HALF; i++) { > sdr_set_field(host->base + PAD_CMD_TUNE, > PAD_CMD_TUNE_RX_DLY3, i); > /* > @@ -2160,7 +2214,7 @@ static int msdc_tune_data(struct mmc_host *mmc, > u32 opcode) > host->latch_ck); > sdr_clr_bits(host->base + MSDC_IOCON, MSDC_IOCON_DSPL); > sdr_clr_bits(host->base + MSDC_IOCON, MSDC_IOCON_W_DSPL); > - for (i = 0 ; i < PAD_DELAY_MAX; i++) { > + for (i = 0 ; i < PAD_DELAY_HALF; i++) { > msdc_set_data_delay(host, i); > ret = mmc_send_tuning(mmc, opcode, NULL); > if (!ret) > @@ -2174,7 +2228,7 @@ static int msdc_tune_data(struct mmc_host *mmc, > u32 opcode) > > sdr_set_bits(host->base + MSDC_IOCON, MSDC_IOCON_DSPL); > sdr_set_bits(host->base + MSDC_IOCON, MSDC_IOCON_W_DSPL); > - for (i = 0; i < PAD_DELAY_MAX; i++) { > + for (i = 0; i < PAD_DELAY_HALF; i++) { > msdc_set_data_delay(host, i); > ret = mmc_send_tuning(mmc, opcode, NULL); > if (!ret) > @@ -2206,10 +2260,11 @@ static int msdc_tune_data(struct mmc_host > *mmc, u32 opcode) > static int msdc_tune_together(struct mmc_host *mmc, u32 opcode) > { > struct msdc_host *host = mmc_priv(mmc); > - u32 rise_delay = 0, fall_delay = 0; > + u64 rise_delay = 0, fall_delay = 0; > struct msdc_delay_phase final_rise_delay, final_fall_delay = { > 0,}; > u8 final_delay, final_maxlen; > int i, ret; > + int tune_steps = host->tune_64steps ? PAD_DELAY_FULL : > PAD_DELAY_HALF; > > sdr_set_field(host->base + MSDC_PATCH_BIT, > MSDC_INT_DAT_LATCH_CK_SEL, > host->latch_ck); > @@ -2217,7 +2272,7 @@ static int msdc_tune_together(struct mmc_host > *mmc, u32 opcode) > sdr_clr_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL); > sdr_clr_bits(host->base + MSDC_IOCON, > MSDC_IOCON_DSPL | MSDC_IOCON_W_DSPL); > - for (i = 0 ; i < PAD_DELAY_MAX; i++) { > + for (i = 0 ; i < tune_steps; i++) { > msdc_set_cmd_delay(host, i); > msdc_set_data_delay(host, i); > ret = mmc_send_tuning(mmc, opcode, NULL); > @@ -2233,7 +2288,7 @@ static int msdc_tune_together(struct mmc_host > *mmc, u32 opcode) > sdr_set_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL); > sdr_set_bits(host->base + MSDC_IOCON, > MSDC_IOCON_DSPL | MSDC_IOCON_W_DSPL); > - for (i = 0; i < PAD_DELAY_MAX; i++) { > + for (i = 0; i < tune_steps; i++) { > msdc_set_cmd_delay(host, i); > msdc_set_data_delay(host, i); > ret = mmc_send_tuning(mmc, opcode, NULL); > @@ -2346,7 +2401,7 @@ static int msdc_execute_hs400_tuning(struct > mmc_host *mmc, struct mmc_card *card > } > > host->hs400_tuning = true; > - for (i = 0; i < PAD_DELAY_MAX; i++) { > + for (i = 0; i < PAD_DELAY_HALF; i++) { > if (host->top_base) > sdr_set_field(host->top_base + > EMMC50_PAD_DS_TUNE, > PAD_DS_DLY1, i); > @@ -2601,6 +2656,12 @@ static void msdc_of_property_parse(struct > platform_device *pdev, > else > host->hs400_cmd_resp_sel_rising = false; > > + if (of_property_read_bool(pdev->dev.of_node, > + "mediatek,tune-64-steps")) > + host->tune_64steps = true; > + else > + host->tune_64steps = false; > + > if (of_property_read_bool(pdev->dev.of_node, > "supports-cqe")) > host->cqhci = true;