Hi Cédric, > -----Original Message----- > From: Cédric Le Goater <clg@xxxxxxxx> > Sent: Wednesday, March 30, 2022 8:15 PM > To: Chin-Ting Kuo <chin-ting_kuo@xxxxxxxxxxxxxx>; linux-spi@xxxxxxxxxxxxxxx; > linux-mtd@xxxxxxxxxxxxxxxxxxx > Subject: Re: [PATCH v4 08/11] spi: aspeed: Calibrate read timings > > On 3/30/22 13:53, Chin-Ting Kuo wrote: > > Hi Cédric Le, > > > >> -----Original Message----- > >> From: Cédric Le Goater <clg@xxxxxxxx> > >> Sent: Friday, March 25, 2022 6:09 PM > >> To: linux-spi@xxxxxxxxxxxxxxx; linux-mtd@xxxxxxxxxxxxxxxxxxx > >> Subject: [PATCH v4 08/11] spi: aspeed: Calibrate read timings > >> > >> To accommodate the different response time of SPI transfers on > >> different boards and different SPI NOR devices, the Aspeed > >> controllers provide a set of Read Timing Compensation registers to > >> tune the timing delays depending on the frequency being used. The > >> AST2600 SoC has one of these registers per device. On the AST2500 and > >> AST2400 SoCs, the timing register is shared by all devices which is > problematic to get good results other than for one device. > >> > >> The algorithm first reads a golden buffer at low speed and then > >> performs reads with different clocks and delay cycle settings to find > >> a breaking point. This selects a default good frequency for the CEx control > register. > >> The current settings are a bit optimistic as we pick the first delay > >> giving good results. A safer approach would be to determine an > >> interval and choose the middle value. > >> > >> Calibration is performed when the direct mapping for reads is created. > >> Since the underlying spi-nor object needs to be initialized to create > >> the spi_mem operation for direct mapping, we should be fine. Having a > >> specific API would clarify the requirements though. > >> > >> Cc: Pratyush Yadav <p.yadav@xxxxxx> > >> Reviewed-by: Joel Stanley <joel@xxxxxxxxx> > >> Tested-by: Joel Stanley <joel@xxxxxxxxx> > >> Tested-by: Tao Ren <rentao.bupt@xxxxxxxxx> > >> Signed-off-by: Cédric Le Goater <clg@xxxxxxxx> > >> --- > >> drivers/spi/spi-aspeed-smc.c | 281 > >> +++++++++++++++++++++++++++++++++++ > >> 1 file changed, 281 insertions(+) > >> > >> diff --git a/drivers/spi/spi-aspeed-smc.c > >> b/drivers/spi/spi-aspeed-smc.c index > >> 7f306da7c44e..660451667a39 100644 > >> --- a/drivers/spi/spi-aspeed-smc.c > >> +++ b/drivers/spi/spi-aspeed-smc.c > >> @@ -33,6 +33,8 @@ > >> #define CTRL_IO_ADDRESS_4B BIT(13) /* AST2400 SPI only > */ > >> #define CTRL_IO_DUMMY_SET(dummy) \ > >> (((((dummy) >> 2) & 0x1) << 14) | (((dummy) & 0x3) << 6)) > >> +#define CTRL_FREQ_SEL_SHIFT 8 > >> +#define CTRL_FREQ_SEL_MASK GENMASK(11, > >> CTRL_FREQ_SEL_SHIFT) > >> #define CTRL_CE_STOP_ACTIVE BIT(2) > >> #define CTRL_IO_MODE_CMD_MASK GENMASK(1, 0) > >> #define CTRL_IO_MODE_NORMAL 0x0 > >> @@ -45,6 +47,9 @@ > >> /* CEx Address Decoding Range Register */ > >> #define CE0_SEGMENT_ADDR_REG 0x30 > >> > >> +/* CEx Read timing compensation register */ > >> +#define CE0_TIMING_COMPENSATION_REG 0x94 > >> + > >> enum aspeed_spi_ctl_reg_value { > >> ASPEED_SPI_BASE, > >> ASPEED_SPI_READ, > >> @@ -70,10 +75,15 @@ struct aspeed_spi_data { > >> bool hastype; > >> u32 mode_bits; > >> u32 we0; > >> + u32 timing; > >> + u32 hclk_mask; > >> + u32 hdiv_max; > >> > >> u32 (*segment_start)(struct aspeed_spi *aspi, u32 reg); > >> u32 (*segment_end)(struct aspeed_spi *aspi, u32 reg); > >> u32 (*segment_reg)(struct aspeed_spi *aspi, u32 start, u32 end); > >> + int (*calibrate)(struct aspeed_spi_chip *chip, u32 hdiv, > >> + const u8 *golden_buf, u8 *test_buf); > >> }; > >> > >> #define ASPEED_SPI_MAX_NUM_CS 5 > >> @@ -517,6 +527,8 @@ static int aspeed_spi_chip_adjust_window(struct > >> aspeed_spi_chip *chip, > >> return 0; > >> } > >> > >> +static int aspeed_spi_do_calibration(struct aspeed_spi_chip *chip); > >> + > >> static int aspeed_spi_dirmap_create(struct spi_mem_dirmap_desc > *desc) { > >> struct aspeed_spi *aspi = > >> spi_controller_get_devdata(desc->mem->spi->master); > >> @@ -565,6 +577,8 @@ static int aspeed_spi_dirmap_create(struct > >> spi_mem_dirmap_desc *desc) > >> chip->ctl_val[ASPEED_SPI_READ] = ctl_val; > >> writel(chip->ctl_val[ASPEED_SPI_READ], chip->ctl); > >> > >> + ret = aspeed_spi_do_calibration(chip); > >> + > >> dev_info(aspi->dev, "CE%d read buswidth:%d [0x%08x]\n", > >> chip->cs, op->data.buswidth, > chip->ctl_val[ASPEED_SPI_READ]); > >> > >> @@ -812,6 +826,249 @@ static u32 > >> aspeed_spi_segment_ast2600_reg(struct > >> aspeed_spi *aspi, > >> ((end - 1) & AST2600_SEG_ADDR_MASK); > >> } > >> > >> +/* > >> + * Read timing compensation sequences */ > >> + > >> +#define CALIBRATE_BUF_SIZE SZ_16K > >> + > >> +static bool aspeed_spi_check_reads(struct aspeed_spi_chip *chip, > >> + const u8 *golden_buf, u8 *test_buf) { > >> + int i; > >> + > >> + for (i = 0; i < 10; i++) { > >> + memcpy_fromio(test_buf, chip->ahb_base, CALIBRATE_BUF_SIZE); > >> + if (memcmp(test_buf, golden_buf, CALIBRATE_BUF_SIZE) != 0) { #if > >> +defined(VERBOSE_DEBUG) > >> + print_hex_dump_bytes(DEVICE_NAME " fail: ", > >> DUMP_PREFIX_NONE, > >> + test_buf, 0x100); > >> +#endif > >> + return false; > >> + } > >> + } > >> + return true; > >> +} > >> + > >> +#define FREAD_TPASS(i) (((i) / 2) | (((i) & 1) ? 0 : 8)) > >> + > >> +/* > >> + * The timing register is shared by all devices. Only update for CE0. > >> + */ > >> +static int aspeed_spi_calibrate(struct aspeed_spi_chip *chip, u32 hdiv, > >> + const u8 *golden_buf, u8 *test_buf) { > >> + struct aspeed_spi *aspi = chip->aspi; > >> + const struct aspeed_spi_data *data = aspi->data; > >> + int i; > >> + int good_pass = -1, pass_count = 0; > >> + u32 shift = (hdiv - 1) << 2; > >> + u32 mask = ~(0xfu << shift); > >> + u32 fread_timing_val = 0; > >> + > >> + /* Try HCLK delay 0..5, each one with/without delay and look for a > >> + * good pair. > >> + */ > >> + for (i = 0; i < 12; i++) { > >> + bool pass; > >> + > >> + if (chip->cs == 0) { > >> + fread_timing_val &= mask; > >> + fread_timing_val |= FREAD_TPASS(i) << shift; > >> + writel(fread_timing_val, aspi->regs + data->timing); > >> + } > >> + pass = aspeed_spi_check_reads(chip, golden_buf, test_buf); > >> + dev_dbg(aspi->dev, > >> + " * [%08x] %d HCLK delay, %dns DI delay : %s", > >> + fread_timing_val, i / 2, (i & 1) ? 0 : 4, > >> + pass ? "PASS" : "FAIL"); > >> + if (pass) { > >> + pass_count++; > >> + if (pass_count == 3) { > >> + good_pass = i - 1; > >> + break; > >> + } > >> + } else { > >> + pass_count = 0; > >> + } > >> + } > >> + > >> + /* No good setting for this frequency */ > >> + if (good_pass < 0) > >> + return -1; > >> + > >> + /* We have at least one pass of margin, let's use first pass */ > >> + if (chip->cs == 0) { > >> + fread_timing_val &= mask; > >> + fread_timing_val |= FREAD_TPASS(good_pass) << shift; > >> + writel(fread_timing_val, aspi->regs + data->timing); > >> + } > >> + dev_dbg(aspi->dev, " * -> good is pass %d [0x%08x]", > >> + good_pass, fread_timing_val); > >> + return 0; > >> +} > >> + > >> +static bool aspeed_spi_check_calib_data(const u8 *test_buf, u32 size) { > >> + const u32 *tb32 = (const u32 *)test_buf; > >> + u32 i, cnt = 0; > >> + > >> + /* We check if we have enough words that are neither all 0 > >> + * nor all 1's so the calibration can be considered valid. > >> + * > >> + * I use an arbitrary threshold for now of 64 > >> + */ > >> + size >>= 2; > >> + for (i = 0; i < size; i++) { > >> + if (tb32[i] != 0 && tb32[i] != 0xffffffff) > >> + cnt++; > >> + } > >> + return cnt >= 64; > >> +} > >> + > >> +static const u32 aspeed_spi_hclk_divs[] = { > >> + 0xf, /* HCLK */ > >> + 0x7, /* HCLK/2 */ > >> + 0xe, /* HCLK/3 */ > >> + 0x6, /* HCLK/4 */ > >> + 0xd, /* HCLK/5 */ > >> +}; > >> + > >> +#define ASPEED_SPI_HCLK_DIV(i) \ > >> + (aspeed_spi_hclk_divs[(i) - 1] << CTRL_FREQ_SEL_SHIFT) > >> + > >> +static int aspeed_spi_do_calibration(struct aspeed_spi_chip *chip) { > >> + struct aspeed_spi *aspi = chip->aspi; > >> + const struct aspeed_spi_data *data = aspi->data; > >> + u32 ahb_freq = aspi->clk_freq; > >> + u32 max_freq = chip->clk_freq; > >> + u32 ctl_val; > >> + u8 *golden_buf = NULL; > >> + u8 *test_buf = NULL; > >> + int i, rc, best_div = -1; > >> + > >> + dev_dbg(aspi->dev, "calculate timing compensation - AHB freq: %d MHz", > >> + ahb_freq / 1000000); > >> + > >> + /* > >> + * use the related low frequency to get check calibration data > >> + * and get golden data. > >> + */ > >> + ctl_val = chip->ctl_val[ASPEED_SPI_READ] & data->hclk_mask; > >> + writel(ctl_val, chip->ctl); > >> + > >> + test_buf = kzalloc(CALIBRATE_BUF_SIZE * 2, GFP_KERNEL); > >> + if (!test_buf) > >> + return -ENOMEM; > >> + > >> + golden_buf = test_buf + CALIBRATE_BUF_SIZE; > >> + > >> + memcpy_fromio(golden_buf, chip->ahb_base, CALIBRATE_BUF_SIZE); > >> + if (!aspeed_spi_check_calib_data(golden_buf, CALIBRATE_BUF_SIZE)) { > >> + dev_info(aspi->dev, "Calibration area too uniform, using low speed"); > >> + goto no_calib; > >> + } > >> + > >> +#if defined(VERBOSE_DEBUG) > >> + print_hex_dump_bytes(DEVICE_NAME " good: ", DUMP_PREFIX_NONE, > >> + golden_buf, 0x100); > >> +#endif > >> + > >> + /* Now we iterate the HCLK dividers until we find our breaking point */ > >> + for (i = ARRAY_SIZE(aspeed_spi_hclk_divs); i > data->hdiv_max - 1; i--) { > >> + u32 tv, freq; > >> + > >> + freq = ahb_freq / i; > >> + if (freq > max_freq) > >> + continue; > >> + > >> + /* Set the timing */ > >> + tv = chip->ctl_val[ASPEED_SPI_READ] | ASPEED_SPI_HCLK_DIV(i); > >> + writel(tv, chip->ctl); > >> + dev_dbg(aspi->dev, "Trying HCLK/%d [%08x] ...", i, tv); > >> + rc = data->calibrate(chip, i, golden_buf, test_buf); > >> + if (rc == 0) > >> + best_div = i; > >> + } > >> + > >> + /* Nothing found ? */ > >> + if (best_div < 0) { > >> + dev_warn(aspi->dev, "No good frequency, using dumb slow"); > >> + } else { > >> + dev_dbg(aspi->dev, "Found good read timings at HCLK/%d", > >> best_div); > >> + > >> + /* Record the freq */ > >> + for (i = 0; i < ASPEED_SPI_MAX; i++) > >> + chip->ctl_val[i] = (chip->ctl_val[i] & data->hclk_mask) | > >> + ASPEED_SPI_HCLK_DIV(best_div); > >> + } > >> + > >> +no_calib: > > > > - Maybe, if the calibration process is not executed, the frequency setting > calculated from max_frequency in the device tree can be filled in FMC10 > instead of using dumb slow one, 12.5MHz, always. > > Indeed. > > > For example, except for uniform content case, the calibration process will > be ignored when SPI clock frequency in the device tree is smaller than 40MHz. > > - The function, aspeed_2600_spi_clk_basic_setting, in [2] can be added to > support lower SPI clock frequency, e.g., 4MHz. > > For AST2600, SPI clock frequency can be calculated by > HCLK/(FMC10[27:24] + FMC10[11:8]). > > Could you please send patches on top of this series ? Here are the branches : > Of course. How do I provide you the patch? By private mail or send a PR? Besides, I may add a new callback function for this part due to difference between AST2500 and AST2600. Thanks. Chin-Ting > https://github.com/legoater/linux/commits/openbmc-5.15 > https://github.com/legoater/linux/commits/aspeed (mainline) > > I can include them when I resend a v5. > > Thanks, > > C. > > > > > >> + writel(chip->ctl_val[ASPEED_SPI_READ], chip->ctl); > >> + kfree(test_buf); > >> + return 0; > >> +} > >> + > >> +#define TIMING_DELAY_DI BIT(3) > >> +#define TIMING_DELAY_HCYCLE_MAX 5 > >> +#define TIMING_REG_AST2600(chip) \ > >> + ((chip)->aspi->regs + (chip)->aspi->data->timing + \ > >> + (chip)->cs * 4) > >> + > >> +static int aspeed_spi_ast2600_calibrate(struct aspeed_spi_chip > >> +*chip, u32 > >> hdiv, > >> + const u8 *golden_buf, u8 *test_buf) { > >> + struct aspeed_spi *aspi = chip->aspi; > >> + int hcycle; > >> + u32 shift = (hdiv - 2) << 3; > >> + u32 mask = ~(0xfu << shift); > >> + u32 fread_timing_val = 0; > >> + > >> + for (hcycle = 0; hcycle <= TIMING_DELAY_HCYCLE_MAX; hcycle++) { > >> + int delay_ns; > >> + bool pass = false; > >> + > >> + fread_timing_val &= mask; > >> + fread_timing_val |= hcycle << shift; > >> + > >> + /* no DI input delay first */ > >> + writel(fread_timing_val, TIMING_REG_AST2600(chip)); > >> + pass = aspeed_spi_check_reads(chip, golden_buf, test_buf); > >> + dev_dbg(aspi->dev, > >> + " * [%08x] %d HCLK delay, DI delay none : %s", > >> + fread_timing_val, hcycle, pass ? "PASS" : "FAIL"); > >> + if (pass) > >> + return 0; > >> + > >> + /* Add DI input delays */ > >> + fread_timing_val &= mask; > >> + fread_timing_val |= (TIMING_DELAY_DI | hcycle) << shift; > >> + > >> + for (delay_ns = 0; delay_ns < 0x10; delay_ns++) { > >> + fread_timing_val &= ~(0xf << (4 + shift)); > >> + fread_timing_val |= delay_ns << (4 + shift); > >> + > >> + writel(fread_timing_val, TIMING_REG_AST2600(chip)); > >> + pass = aspeed_spi_check_reads(chip, golden_buf, test_buf); > >> + dev_dbg(aspi->dev, > >> + " * [%08x] %d HCLK delay, DI delay %d.%dns : %s", > >> + fread_timing_val, hcycle, (delay_ns + 1) / 2, > >> + (delay_ns + 1) & 1 ? 5 : 5, pass ? "PASS" : "FAIL"); > >> + /* > >> + * TODO: This is optimistic. We should look > >> + * for a working interval and save the middle > >> + * value in the read timing register. > >> + */ > >> + if (pass) > >> + return 0; > >> + } > >> + } > >> + > >> + /* No good setting for this frequency */ > >> + return -1; > >> +} > >> + > >> /* > >> * Platform definitions > >> */ > >> @@ -820,6 +1077,10 @@ static const struct aspeed_spi_data > >> ast2400_fmc_data = { > >> .hastype = true, > >> .we0 = 16, > >> .ctl0 = CE0_CTRL_REG, > >> + .timing = CE0_TIMING_COMPENSATION_REG, > >> + .hclk_mask = 0xfffff0ff, > >> + .hdiv_max = 1, > >> + .calibrate = aspeed_spi_calibrate, > >> .segment_start = aspeed_spi_segment_start, > >> .segment_end = aspeed_spi_segment_end, > >> .segment_reg = aspeed_spi_segment_reg, > >> @@ -830,6 +1091,10 @@ static const struct aspeed_spi_data > >> ast2400_spi_data = { > >> .hastype = false, > >> .we0 = 0, > >> .ctl0 = 0x04, > >> + .timing = 0x14, > >> + .hclk_mask = 0xfffff0ff, > >> + .hdiv_max = 1, > >> + .calibrate = aspeed_spi_calibrate, > >> /* No segment registers */ > >> }; > >> > >> @@ -838,6 +1103,10 @@ static const struct aspeed_spi_data > >> ast2500_fmc_data = { > >> .hastype = true, > >> .we0 = 16, > >> .ctl0 = CE0_CTRL_REG, > >> + .timing = CE0_TIMING_COMPENSATION_REG, > >> + .hclk_mask = 0xfffff0ff, > >> + .hdiv_max = 1, > >> + .calibrate = aspeed_spi_calibrate, > >> .segment_start = aspeed_spi_segment_start, > >> .segment_end = aspeed_spi_segment_end, > >> .segment_reg = aspeed_spi_segment_reg, > >> @@ -848,6 +1117,10 @@ static const struct aspeed_spi_data > >> ast2500_spi_data = { > >> .hastype = false, > >> .we0 = 16, > >> .ctl0 = CE0_CTRL_REG, > >> + .timing = CE0_TIMING_COMPENSATION_REG, > >> + .hclk_mask = 0xfffff0ff, > >> + .hdiv_max = 1, > >> + .calibrate = aspeed_spi_calibrate, > >> .segment_start = aspeed_spi_segment_start, > >> .segment_end = aspeed_spi_segment_end, > >> .segment_reg = aspeed_spi_segment_reg, > >> @@ -859,6 +1132,10 @@ static const struct aspeed_spi_data > >> ast2600_fmc_data = { > >> .mode_bits = SPI_RX_QUAD | SPI_RX_QUAD, > >> .we0 = 16, > >> .ctl0 = CE0_CTRL_REG, > >> + .timing = CE0_TIMING_COMPENSATION_REG, > >> + .hclk_mask = 0xf0fff0ff, > >> + .hdiv_max = 2, > >> + .calibrate = aspeed_spi_ast2600_calibrate, > >> .segment_start = aspeed_spi_segment_ast2600_start, > >> .segment_end = aspeed_spi_segment_ast2600_end, > >> .segment_reg = aspeed_spi_segment_ast2600_reg, > >> @@ -870,6 +1147,10 @@ static const struct aspeed_spi_data > >> ast2600_spi_data = { > >> .mode_bits = SPI_RX_QUAD | SPI_RX_QUAD, > >> .we0 = 16, > >> .ctl0 = CE0_CTRL_REG, > >> + .timing = CE0_TIMING_COMPENSATION_REG, > >> + .hclk_mask = 0xf0fff0ff, > >> + .hdiv_max = 2, > >> + .calibrate = aspeed_spi_ast2600_calibrate, > >> .segment_start = aspeed_spi_segment_ast2600_start, > >> .segment_end = aspeed_spi_segment_ast2600_end, > >> .segment_reg = aspeed_spi_segment_ast2600_reg, > >> -- > >> 2.34.1 > > > > > > Best Wishes, > > Chin-Ting