In the AXI SPI Engine driver, compiling the message is an expensive operation. Previously, it was done per message transfer in the prepare_message hook. This patch moves the message compile to the optimize_message hook so that it is only done once per message in cases where the peripheral driver calls spi_optimize_message(). This can be a significant performance improvement for some peripherals. For example, the ad7380 driver saw a 13% improvement in throughput when using the AXI SPI Engine driver with this patch. Since we now need two message states, one for the optimization stage that doesn't change for the lifetime of the message and one that is reset on each transfer for managing the current transfer state, the old msg->state is split into msg->opt_state and spi_engine->msg_state. The latter is included in the driver struct now since there is only one current message at a time that can ever use it and it is in a hot path so avoiding allocating a new one on each message transfer saves a few cpu cycles and lets us get rid of the prepare_message callback. Signed-off-by: David Lechner <dlechner@xxxxxxxxxxxx> --- drivers/spi/spi-axi-spi-engine.c | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c index ca66d202f0e2..6177c1a8d56e 100644 --- a/drivers/spi/spi-axi-spi-engine.c +++ b/drivers/spi/spi-axi-spi-engine.c @@ -109,6 +109,7 @@ struct spi_engine { spinlock_t lock; void __iomem *base; + struct spi_engine_message_state msg_state; struct completion msg_complete; unsigned int int_enable; }; @@ -499,17 +500,11 @@ static irqreturn_t spi_engine_irq(int irq, void *devid) return IRQ_HANDLED; } -static int spi_engine_prepare_message(struct spi_controller *host, - struct spi_message *msg) +static int spi_engine_optimize_message(struct spi_message *msg) { struct spi_engine_program p_dry, *p; - struct spi_engine_message_state *st; size_t size; - st = kzalloc(sizeof(*st), GFP_KERNEL); - if (!st) - return -ENOMEM; - spi_engine_precompile_message(msg); p_dry.length = 0; @@ -517,31 +512,22 @@ static int spi_engine_prepare_message(struct spi_controller *host, size = sizeof(*p->instructions) * (p_dry.length + 1); p = kzalloc(sizeof(*p) + size, GFP_KERNEL); - if (!p) { - kfree(st); + if (!p) return -ENOMEM; - } spi_engine_compile_message(msg, false, p); spi_engine_program_add_cmd(p, false, SPI_ENGINE_CMD_SYNC( AXI_SPI_ENGINE_CUR_MSG_SYNC_ID)); - st->p = p; - st->cmd_buf = p->instructions; - st->cmd_length = p->length; - msg->state = st; + msg->opt_state = p; return 0; } -static int spi_engine_unprepare_message(struct spi_controller *host, - struct spi_message *msg) +static int spi_engine_unoptimize_message(struct spi_message *msg) { - struct spi_engine_message_state *st = msg->state; - - kfree(st->p); - kfree(st); + kfree(msg->opt_state); return 0; } @@ -550,10 +536,18 @@ static int spi_engine_transfer_one_message(struct spi_controller *host, struct spi_message *msg) { struct spi_engine *spi_engine = spi_controller_get_devdata(host); - struct spi_engine_message_state *st = msg->state; + struct spi_engine_message_state *st = &spi_engine->msg_state; + struct spi_engine_program *p = msg->opt_state; unsigned int int_enable = 0; unsigned long flags; + /* reinitialize message state for this transfer */ + memset(st, 0, sizeof(*st)); + st->p = p; + st->cmd_buf = p->instructions; + st->cmd_length = p->length; + msg->state = st; + reinit_completion(&spi_engine->msg_complete); spin_lock_irqsave(&spi_engine->lock, flags); @@ -658,8 +652,8 @@ static int spi_engine_probe(struct platform_device *pdev) host->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32); host->max_speed_hz = clk_get_rate(spi_engine->ref_clk) / 2; host->transfer_one_message = spi_engine_transfer_one_message; - host->prepare_message = spi_engine_prepare_message; - host->unprepare_message = spi_engine_unprepare_message; + host->optimize_message = spi_engine_optimize_message; + host->unoptimize_message = spi_engine_unoptimize_message; host->num_chipselect = 8; if (host->max_speed_hz == 0) -- 2.43.0