On 16/10/23 14:42, Sergey Khimich wrote: > Hello Adrian! > > Thanks for review and comments! There are few questions I`d like to clarify: > > On 09.10.2023 18:39, Adrian Hunter wrote: >> On 2/10/23 14:33, Sergey Khimich wrote: >>> From: Sergey Khimich <serghox@xxxxxxxxx> >>> >>> For enabling CQE support just set 'supports-cqe' in your DevTree file >>> for appropriate mmc node. >>> >>> Signed-off-by: Sergey Khimich <serghox@xxxxxxxxx> >>> --- >>> drivers/mmc/host/Kconfig | 1 + >>> drivers/mmc/host/sdhci-of-dwcmshc.c | 233 +++++++++++++++++++++++++++- >>> 2 files changed, 232 insertions(+), 2 deletions(-) >>> >>> diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig >>> index 554e67103c1a..f3380b014ca9 100644 >>> --- a/drivers/mmc/host/Kconfig >>> +++ b/drivers/mmc/host/Kconfig >>> @@ -233,6 +233,7 @@ config MMC_SDHCI_OF_DWCMSHC >>> depends on MMC_SDHCI_PLTFM >>> depends on OF >>> depends on COMMON_CLK >>> + select MMC_CQHCI >>> help >>> This selects Synopsys DesignWare Cores Mobile Storage Controller >>> support. >>> diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c >>> index 3a3bae6948a8..7d43ae011811 100644 >>> --- a/drivers/mmc/host/sdhci-of-dwcmshc.c >>> +++ b/drivers/mmc/host/sdhci-of-dwcmshc.c >>> @@ -20,6 +20,7 @@ >>> #include <linux/sizes.h> >>> #include "sdhci-pltfm.h" >>> +#include "cqhci.h" >>> #define SDHCI_DWCMSHC_ARG2_STUFF GENMASK(31, 16) >>> @@ -36,6 +37,9 @@ >>> #define DWCMSHC_ENHANCED_STROBE BIT(8) >>> #define DWCMSHC_EMMC_ATCTRL 0x40 >>> +/* DWC IP vendor area 2 pointer */ >>> +#define DWCMSHC_P_VENDOR_AREA2 0xea >>> + >>> /* Rockchip specific Registers */ >>> #define DWCMSHC_EMMC_DLL_CTRL 0x800 >>> #define DWCMSHC_EMMC_DLL_RXCLK 0x804 >>> @@ -75,6 +79,10 @@ >>> #define BOUNDARY_OK(addr, len) \ >>> ((addr | (SZ_128M - 1)) == ((addr + len - 1) | (SZ_128M - 1))) >>> +#define DWCMSHC_SDHCI_CQE_TRNS_MODE (SDHCI_TRNS_MULTI | \ >>> + SDHCI_TRNS_BLK_CNT_EN | \ >>> + SDHCI_TRNS_DMA) >>> + >>> enum dwcmshc_rk_type { >>> DWCMSHC_RK3568, >>> DWCMSHC_RK3588, >>> @@ -90,7 +98,8 @@ struct rk35xx_priv { >>> struct dwcmshc_priv { >>> struct clk *bus_clk; >>> - int vendor_specific_area1; /* P_VENDOR_SPECIFIC_AREA reg */ >>> + int vendor_specific_area1; /* P_VENDOR_SPECIFIC_AREA1 reg */ >>> + int vendor_specific_area2; /* P_VENDOR_SPECIFIC_AREA2 reg */ >>> void *priv; /* pointer to SoC private stuff */ >>> }; >>> @@ -210,6 +219,147 @@ static void dwcmshc_hs400_enhanced_strobe(struct mmc_host *mmc, >>> sdhci_writel(host, vendor, reg); >>> } >>> +static u32 dwcmshc_cqe_irq_handler(struct sdhci_host *host, u32 intmask) >>> +{ >>> + int cmd_error = 0; >>> + int data_error = 0; >>> + >>> + if (!sdhci_cqe_irq(host, intmask, &cmd_error, &data_error)) >>> + return intmask; >>> + >>> + cqhci_irq(host->mmc, intmask, cmd_error, data_error); >>> + >>> + return 0; >>> +} >>> + >>> +static void dwcmshc_sdhci_cqe_enable(struct mmc_host *mmc) >>> +{ >>> + struct sdhci_host *host = mmc_priv(mmc); >>> + u32 pstate; >>> + u8 ctrl; >>> + int count = 10; >>> + >>> + /* >>> + * CQE gets stuck if it sees Buffer Read Enable bit set, which can be >>> + * the case after tuning, so ensure the buffer is drained. >>> + */ >>> + pstate = sdhci_readl(host, SDHCI_PRESENT_STATE); >>> + while (pstate & SDHCI_DATA_AVAILABLE) { >>> + sdhci_readl(host, SDHCI_BUFFER); >>> + pstate = sdhci_readl(host, SDHCI_PRESENT_STATE); >>> + if (count-- == 0) { >>> + dev_warn(mmc_dev(host->mmc), >>> + "CQE may get stuck because the Buffer Read Enable bit is set\n"); >>> + break; >>> + } >>> + mdelay(1); >>> + } >> An alternative, which might be easier, is to do a >> data reset which may also help allow the device to >> subsequently enter low power states. >> Refer commit f8870ae6e2d6be75b1accc2db981169fdfbea7ab >> and commit 7b7d57fd1b773d25d8358c6017592b4928bf76ce > > Thanks, I'll fix it in the next version of the patch. > >> >>> + >>> + sdhci_writew(host, DWCMSHC_SDHCI_CQE_TRNS_MODE, SDHCI_TRANSFER_MODE); >>> + >>> + sdhci_cqe_enable(mmc); >>> + >>> + /* >>> + * The "DesignWare Cores Mobile Storage Host Controller >>> + * DWC_mshc / DWC_mshc_lite Databook" says: >>> + * when Host Version 4 Enable" is 1 in Host Control 2 register, >>> + * SDHCI_CTRL_ADMA32 bit means ADMA2 is selected. >>> + * Selection of 32-bit/64-bit System Addressing: >>> + * either 32-bit or 64-bit system addressing is selected by >>> + * 64-bit Addressing bit in Host Control 2 register. >>> + * >>> + * On the other hand the "DesignWare Cores Mobile Storage Host >>> + * Controller DWC_mshc / DWC_mshc_lite User Guide" says, that we have to >>> + * set DMA_SEL to ADMA2 _only_ mode in the Host Control 2 register. >>> + */ >>> + ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); >>> + ctrl &= ~SDHCI_CTRL_DMA_MASK; >>> + ctrl |= SDHCI_CTRL_ADMA32; >>> + sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL); >>> +} >>> + >>> +static void dwcmshc_sdhci_cqe_disable(struct mmc_host *mmc, bool recovery) >>> +{ >>> + /* >>> + * If an ioctl was issued, cqe_disable will be called. >>> + * For CQE of sdhci-of-dwcmshc, the previous in-flight cmd will be lost quietly. >>> + * So wait for mmc idle state. >> This sounds like it should be fixed in the mmc block driver. >> Can you provide an example of when this happens? > Unfortunately I can't provide an example. > But this part of the patch is updated by me on the advice of Shawn Lin after his reviewing V1 of the patch. > Please find his message here: > https://patchwork.kernel.org/project/linux-mmc/patch/20230825143525.869906-2-serghox@xxxxxxxxx/ > > Just in case here I also quote the part of his comment that refers to this part of the patch: > "And another issue was found when Rockchip added CQE support for > sdhci-of-dwcmshc internally, is that if a ioctl was issued, cqe_disable > will be called. For CQE of sdhci-of-dwcmshc, the previous in-flight cmd > will be lost quietly. So a mmc->cqe_ops->cqe_wait_for_idle(mmc) should > be added before sdhci_cqe_disable(), so you need a dwcmshc specified > cqe_disable hook in sdhci-of-dwcmshc." mmc block driver already does a wait for idle before ioctl commands, refer mmc_blk_mq_issue_rq() case MMC_ISSUE_SYNC. Without more informaton we cannot assume the upstream kernel has a problem with this. > >> >>> + */ >>> + mmc->cqe_ops->cqe_wait_for_idle(mmc); >>> + >>> + return sdhci_cqe_disable(mmc, recovery); >>> +} >>> + >>> +static void dwcmshc_cqhci_set_tran_desc(u8 *desc, dma_addr_t addr, int len, bool end, >>> + bool dma64) >>> +{ >>> + __le32 *attr = (__le32 __force *)desc; >>> + >>> + *attr = (CQHCI_VALID(1) | >>> + CQHCI_END(end ? 1 : 0) | >>> + CQHCI_INT(0) | >>> + CQHCI_ACT(0x4) | >>> + CQHCI_DAT_LENGTH(len)); >>> + >>> + if (dma64) { >>> + __le64 *dataddr = (__le64 __force *)(desc + 4); >>> + >>> + dataddr[0] = cpu_to_le64(addr); >>> + } else { >>> + __le32 *dataddr = (__le32 __force *)(desc + 4); >>> + >>> + dataddr[0] = cpu_to_le32(addr); >>> + } >>> +} >> This is the same as cqhci_set_tran_desc(). Might as well export that >> instead. > Thanks, I'll fix it in the next version of the patch. >>> + >>> +static void dwcmshc_cqhci_prep_tran_desc(struct mmc_data *data, >>> + struct cqhci_host *cq_host, >>> + u8 *desc, int sg_count) >>> +{ >>> + int i, len, tmplen, offset; >>> + bool end = false; >>> + bool dma64 = cq_host->dma64; >>> + dma_addr_t addr; >>> + struct scatterlist *sg; >>> + >>> + for_each_sg(data->sg, sg, sg_count, i) { >>> + addr = sg_dma_address(sg); >>> + len = sg_dma_len(sg); >>> + >>> + /* >>> + * According to the "DesignWare Cores Mobile Storage Host Controller >>> + * DWC_mshc / DWC_mshc_lite Databook" the host memory data buffer size >>> + * and start address must not exceed 128 Mb. If it exceeds, >>> + * the data buffer must be split using two descritors. >>> + */ >>> + >>> + if (likely(BOUNDARY_OK(addr, len))) { >>> + if ((i + 1) == sg_count) >>> + end = true; >>> + dwcmshc_cqhci_set_tran_desc(desc, addr, len, end, dma64); >>> + desc += cq_host->trans_desc_len; >>> + } else { >>> + offset = addr & (SZ_128M - 1); >>> + tmplen = SZ_128M - offset; >>> + dwcmshc_cqhci_set_tran_desc(desc, addr, tmplen, end, dma64); >>> + desc += cq_host->trans_desc_len; >>> + >>> + if ((i + 1) == sg_count) >>> + end = true; >>> + >>> + addr += tmplen; >>> + len -= tmplen; >>> + dwcmshc_cqhci_set_tran_desc(desc, addr, len, end, dma64); >>> + desc += cq_host->trans_desc_len; >>> + } >>> + } >>> +} >> Could this be done more like dwcmshc_adma_write_desc() > Actually I'm not sure what do you mean. I reused checking boundary construction with > "BOUNDARY_OK" macro and used the same variable names. I would appreciate it if you could clarify > what does mean "more like dwcmshc_adma_write_desc()" Provide a hook for cqhci_set_tran_desc() instead of cqhci_prep_tran_desc() You'll need to check the details, but something like: diff --git a/drivers/mmc/host/cqhci-core.c b/drivers/mmc/host/cqhci-core.c index b3d7d6d8d654..98e7e9d3030d 100644 --- a/drivers/mmc/host/cqhci-core.c +++ b/drivers/mmc/host/cqhci-core.c @@ -522,7 +522,10 @@ static int cqhci_prep_tran_desc(struct mmc_request *mrq, if ((i+1) == sg_count) end = true; - cqhci_set_tran_desc(desc, addr, len, end, dma64); + if (cq_host->ops->set_tran_desc) + cq_host->ops->set_tran_desc(&desc, addr, len, end, dma64); + else + cqhci_set_tran_desc(desc, addr, len, end, dma64); desc += cq_host->trans_desc_len; } And: static void dwcmshc_set_tran_desc(u8 **desc, dma_addr_t addr, int len, bool end, bool dma64) { int tmplen, offset; if (likely(!len || BOUNDARY_OK(addr, len))) { cqhci_set_tran_desc(*desc, addr, len, end, dma64); return; } offset = addr & (SZ_128M - 1); tmplen = SZ_128M - offset; cqhci_set_tran_desc(*desc, addr, tmplen, false, dma64); addr += tmplen; len -= tmplen; *desc += cq_host->trans_desc_len; cqhci_set_tran_desc(*desc, addr, len, end, dma64); } >> >>> + >>> +static void dwcmshc_cqhci_dumpregs(struct mmc_host *mmc) >>> +{ >>> + sdhci_dumpregs(mmc_priv(mmc)); >>> +} >>> + >>> static void dwcmshc_rk3568_set_clock(struct sdhci_host *host, unsigned int clock) >>> { >>> struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); >>> @@ -345,6 +495,7 @@ static const struct sdhci_ops sdhci_dwcmshc_ops = { >>> .get_max_clock = dwcmshc_get_max_clock, >>> .reset = sdhci_reset, >>> .adma_write_desc = dwcmshc_adma_write_desc, >>> + .irq = dwcmshc_cqe_irq_handler, >>> }; >>> static const struct sdhci_ops sdhci_dwcmshc_rk35xx_ops = { >>> @@ -379,6 +530,70 @@ static const struct sdhci_pltfm_data sdhci_dwcmshc_rk35xx_pdata = { >>> SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN, >>> }; >>> +static const struct cqhci_host_ops dwcmshc_cqhci_ops = { >>> + .enable = dwcmshc_sdhci_cqe_enable, >>> + .disable = dwcmshc_sdhci_cqe_disable, >>> + .dumpregs = dwcmshc_cqhci_dumpregs, >>> + .prep_tran_desc = dwcmshc_cqhci_prep_tran_desc, >>> +}; >>> + >>> +static void dwcmshc_cqhci_init(struct sdhci_host *host, struct platform_device *pdev) >>> +{ >>> + struct cqhci_host *cq_host; >>> + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); >>> + struct dwcmshc_priv *priv = sdhci_pltfm_priv(pltfm_host); >>> + bool dma64 = false; >>> + u16 clk; >>> + int err; >>> + >>> + host->mmc->caps2 |= MMC_CAP2_CQE | MMC_CAP2_CQE_DCMD; >>> + cq_host = devm_kzalloc(&pdev->dev, sizeof(*cq_host), GFP_KERNEL); >>> + if (!cq_host) { >>> + dev_err(mmc_dev(host->mmc), "Unable to setup CQE: not enough memory\n"); >>> + return; >>> + } >>> + >>> + /* >>> + * For dwcmshc host controller we have to enable internal clock >>> + * before access to some registers from Vendor Specific Aria 2. >>> + */ >>> + clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL); >>> + clk |= SDHCI_CLOCK_INT_EN; >>> + sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL); >>> + clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL); >>> + if (!(clk & SDHCI_CLOCK_INT_EN)) { >>> + dev_err(mmc_dev(host->mmc), "Unable to setup CQE: internal clock enable error\n"); >>> + goto free_cq_host; >>> + } >>> + >>> + cq_host->mmio = host->ioaddr + priv->vendor_specific_area2; >>> + cq_host->ops = &dwcmshc_cqhci_ops; >>> + >>> + /* Enable using of 128-bit task descriptors */ >>> + dma64 = host->flags & SDHCI_USE_64_BIT_DMA; >>> + if (dma64) { >>> + dev_dbg(mmc_dev(host->mmc), "128-bit task descriptors\n"); >>> + cq_host->caps |= CQHCI_TASK_DESC_SZ_128; >>> + } >>> + err = cqhci_init(cq_host, host->mmc, dma64); >>> + if (err) { >>> + dev_err(mmc_dev(host->mmc), "Unable to setup CQE: error %d\n", err); >>> + goto int_clok_disable; >>> + } >>> + >>> + dev_dbg(mmc_dev(host->mmc), "CQE init done\n"); >>> + >>> + return; >>> + >>> +int_clok_disable: >> 'clok' is an odd abbreviation of 'clock'. Perhaps 'clk' or just 'clock' > Thanks, I'll fix it in the next version of the patch. >> >>> + clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL); >>> + clk &= ~SDHCI_CLOCK_INT_EN; >>> + sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL); >>> + >>> +free_cq_host: >>> + devm_kfree(&pdev->dev, cq_host); >>> +} >>> + >>> static int dwcmshc_rk35xx_init(struct sdhci_host *host, struct dwcmshc_priv *dwc_priv) >>> { >>> int err; >>> @@ -471,7 +686,7 @@ static int dwcmshc_probe(struct platform_device *pdev) >>> struct rk35xx_priv *rk_priv = NULL; >>> const struct sdhci_pltfm_data *pltfm_data; >>> int err; >>> - u32 extra; >>> + u32 extra, caps; >>> pltfm_data = device_get_match_data(&pdev->dev); >>> if (!pltfm_data) { >>> @@ -519,6 +734,8 @@ static int dwcmshc_probe(struct platform_device *pdev) >>> priv->vendor_specific_area1 = >>> sdhci_readl(host, DWCMSHC_P_VENDOR_AREA1) & DWCMSHC_AREA1_MASK; >>> + priv->vendor_specific_area2 = >>> + sdhci_readw(host, DWCMSHC_P_VENDOR_AREA2); >>> host->mmc_host_ops.request = dwcmshc_request; >>> host->mmc_host_ops.hs400_enhanced_strobe = dwcmshc_hs400_enhanced_strobe; >>> @@ -547,6 +764,10 @@ static int dwcmshc_probe(struct platform_device *pdev) >>> sdhci_enable_v4_mode(host); >>> #endif >>> + caps = sdhci_readl(host, SDHCI_CAPABILITIES); >>> + if (caps & SDHCI_CAN_64BIT_V4) >>> + sdhci_enable_v4_mode(host); >>> + >>> host->mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY; >>> pm_runtime_get_noresume(dev); >>> @@ -557,6 +778,14 @@ static int dwcmshc_probe(struct platform_device *pdev) >>> if (err) >>> goto err_rpm; >>> + /* Setup Command Queue Engine if enabled */ >>> + if (device_property_read_bool(&pdev->dev, "supports-cqe")) { >>> + if (caps & SDHCI_CAN_64BIT_V4) >>> + dwcmshc_cqhci_init(host, pdev); >>> + else >>> + dev_warn(dev, "Cannot enable CQE without V4 mode support\n"); >>> + } >>> + >>> if (rk_priv) >>> dwcmshc_rk35xx_postinit(host, priv); >>>