On Mon, 2024-02-19 at 16:33 -0600, David Lechner wrote: > In the AXI SPI Engine driver, compiling the message is an expensive > operation. Previously, it was done per message transfer in the > prepare_message hook. This patch moves the message compile to the > optimize_message hook so that it is only done once per message in > cases where the peripheral driver calls spi_optimize_message(). > > This can be a significant performance improvement for some peripherals. > For example, the ad7380 driver saw a 13% improvement in throughput > when using the AXI SPI Engine driver with this patch. > > Since we now need two message states, one for the optimization stage > that doesn't change for the lifetime of the message and one that is > reset on each transfer for managing the current transfer state, the old > msg->state is split into msg->opt_state and spi_engine->msg_state. The > latter is included in the driver struct now since there is only one > current message at a time that can ever use it and it is in a hot path > so avoiding allocating a new one on each message transfer saves a few > cpu cycles and lets us get rid of the prepare_message callback. > > Signed-off-by: David Lechner <dlechner@xxxxxxxxxxxx> > --- Reviewed-by: Nuno Sa <nuno.sa@xxxxxxxxxx> > > v2 changes: none > > drivers/spi/spi-axi-spi-engine.c | 40 +++++++++++++++++----------------------- > 1 file changed, 17 insertions(+), 23 deletions(-) > > diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c > index ca66d202f0e2..6177c1a8d56e 100644 > --- a/drivers/spi/spi-axi-spi-engine.c > +++ b/drivers/spi/spi-axi-spi-engine.c > @@ -109,6 +109,7 @@ struct spi_engine { > spinlock_t lock; > > void __iomem *base; > + struct spi_engine_message_state msg_state; > struct completion msg_complete; > unsigned int int_enable; > }; > @@ -499,17 +500,11 @@ static irqreturn_t spi_engine_irq(int irq, void *devid) > return IRQ_HANDLED; > } > > -static int spi_engine_prepare_message(struct spi_controller *host, > - struct spi_message *msg) > +static int spi_engine_optimize_message(struct spi_message *msg) > { > struct spi_engine_program p_dry, *p; > - struct spi_engine_message_state *st; > size_t size; > > - st = kzalloc(sizeof(*st), GFP_KERNEL); > - if (!st) > - return -ENOMEM; > - > spi_engine_precompile_message(msg); > > p_dry.length = 0; > @@ -517,31 +512,22 @@ static int spi_engine_prepare_message(struct spi_controller > *host, > > size = sizeof(*p->instructions) * (p_dry.length + 1); > p = kzalloc(sizeof(*p) + size, GFP_KERNEL); > - if (!p) { > - kfree(st); > + if (!p) > return -ENOMEM; > - } > > spi_engine_compile_message(msg, false, p); > > spi_engine_program_add_cmd(p, false, SPI_ENGINE_CMD_SYNC( > AXI_SPI_ENGINE_CUR_MSG_SYNC_ID)); > > - st->p = p; > - st->cmd_buf = p->instructions; > - st->cmd_length = p->length; > - msg->state = st; > + msg->opt_state = p; > > return 0; > } > > -static int spi_engine_unprepare_message(struct spi_controller *host, > - struct spi_message *msg) > +static int spi_engine_unoptimize_message(struct spi_message *msg) > { > - struct spi_engine_message_state *st = msg->state; > - > - kfree(st->p); > - kfree(st); > + kfree(msg->opt_state); > > return 0; > } > @@ -550,10 +536,18 @@ static int spi_engine_transfer_one_message(struct > spi_controller *host, > struct spi_message *msg) > { > struct spi_engine *spi_engine = spi_controller_get_devdata(host); > - struct spi_engine_message_state *st = msg->state; > + struct spi_engine_message_state *st = &spi_engine->msg_state; > + struct spi_engine_program *p = msg->opt_state; > unsigned int int_enable = 0; > unsigned long flags; > > + /* reinitialize message state for this transfer */ > + memset(st, 0, sizeof(*st)); > + st->p = p; > + st->cmd_buf = p->instructions; > + st->cmd_length = p->length; > + msg->state = st; > + > reinit_completion(&spi_engine->msg_complete); > > spin_lock_irqsave(&spi_engine->lock, flags); > @@ -658,8 +652,8 @@ static int spi_engine_probe(struct platform_device *pdev) > host->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32); > host->max_speed_hz = clk_get_rate(spi_engine->ref_clk) / 2; > host->transfer_one_message = spi_engine_transfer_one_message; > - host->prepare_message = spi_engine_prepare_message; > - host->unprepare_message = spi_engine_unprepare_message; > + host->optimize_message = spi_engine_optimize_message; > + host->unoptimize_message = spi_engine_unoptimize_message; > host->num_chipselect = 8; > > if (host->max_speed_hz == 0) >