Re: [PATCH] i2c: aspeed: Improve driver to support multi-master use cases stably

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Thanks Brendan for the review. Please see my answers inline.

On 6/27/2018 12:36 AM, Brendan Higgins wrote:
On Tue, Jun 26, 2018 at 9:58 AM Jae Hyun Yoo
<jae.hyun.yoo@xxxxxxxxxxxxxxx> wrote:

BMC firmware should support some multi-master use cases such as multi-node,
IPMB, BMC-ME link and so on but the current ASPEED I2C driver is a bit
unstable for the multi-master use case. So this patch improves ASPEED I2C
driver to support the multi-master use case stably.

Changes:
* Added XFER_MODE status register checking logic into
   aspeed_i2c_master_xfer to improve the current bus busy checking logic.
* Changed the order of enum aspeed_i2c_master_state and
   enum aspeed_i2c_slave_state defines to make their initial values set to
   ASPEED_I2C_MASTER_INACTIVE and ASPEED_I2C_SLAVE_STOP respectively.
   In case of multi-master use with previous code, if a slave data comes
   ahead of the first master xfer, master_state starts from an invalid
   state. This change fixed the issue.
* Adjusted spin_lock scope to make it wrap the whole irq handler using
   a single lock and unlock pair covers both master and slave handlers.
* Added irq_status variable as a member of the struct aspeed_i2c_bus to
   collect handled interrupt bits throughout the master and the slave irq
   handlers.
* Added control logic to put an order on calling the master and the slave
   irq handlers based on their current states.

Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@xxxxxxxxxxxxxxx>
---
  drivers/i2c/busses/i2c-aspeed.c | 200 +++++++++++++++++++-------------
  1 file changed, 118 insertions(+), 82 deletions(-)

diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
index 60e4d0e939a3..ac3e17d9a365 100644
--- a/drivers/i2c/busses/i2c-aspeed.c
+++ b/drivers/i2c/busses/i2c-aspeed.c
@@ -4,6 +4,7 @@
   *  Copyright (C) 2012-2017 ASPEED Technology Inc.
   *  Copyright 2017 IBM Corporation
   *  Copyright 2017 Google, Inc.
+ *  Copyright (c) 2018 Intel Corporation
   *
   *  This program is free software; you can redistribute it and/or modify
   *  it under the terms of the GNU General Public License version 2 as
@@ -12,6 +13,7 @@

  #include <linux/clk.h>
  #include <linux/completion.h>
+#include <linux/delay.h>
  #include <linux/err.h>
  #include <linux/errno.h>
  #include <linux/i2c.h>
@@ -82,6 +84,11 @@
  #define ASPEED_I2CD_INTR_RX_DONE                       BIT(2)
  #define ASPEED_I2CD_INTR_TX_NAK                                BIT(1)
  #define ASPEED_I2CD_INTR_TX_ACK                                BIT(0)
+#define ASPEED_I2CD_INTR_ERRORS                                                       \
+               (ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |                             \
+                ASPEED_I2CD_INTR_SCL_TIMEOUT |                                \
+                ASPEED_I2CD_INTR_ABNORMAL |                                   \
+                ASPEED_I2CD_INTR_ARBIT_LOSS)
  #define ASPEED_I2CD_INTR_ALL                                                  \
                 (ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |                             \
                  ASPEED_I2CD_INTR_BUS_RECOVER_DONE |                           \
@@ -94,6 +101,7 @@
                  ASPEED_I2CD_INTR_TX_ACK)

  /* 0x14 : I2CD Command/Status Register   */
+#define ASPEED_I2CD_XFER_MODE_STS_MASK                 GENMASK(22, 19)
  #define ASPEED_I2CD_SCL_LINE_STS                       BIT(18)
  #define ASPEED_I2CD_SDA_LINE_STS                       BIT(17)
  #define ASPEED_I2CD_BUS_BUSY_STS                       BIT(16)
@@ -110,23 +118,27 @@
  /* 0x18 : I2CD Slave Device Address Register   */
  #define ASPEED_I2CD_DEV_ADDR_MASK                      GENMASK(6, 0)

+/* Timeout for bus busy checking */
+#define BUS_BUSY_CHECK_TIMEOUT                         250000 /* 250ms */
+#define BUS_BUSY_CHECK_INTERVAL                                10000  /* 10ms */

Could you add a comment on where you got these values from?


These are coming from ASPEED SDK code. Actually, they use 100ms for
timeout and 10ms for interval but I increased the timeout value to
250ms so that it covers a various range of bus speed. I think, it
should be computed at run time based on the current bus speed, or
we could add these as device tree settings. How do you think about it?

>
Also, please use the same naming pattern as above.


Sure. Will change it using the same naming pattern as above.

+
  enum aspeed_i2c_master_state {
+       ASPEED_I2C_MASTER_INACTIVE,
         ASPEED_I2C_MASTER_START,
         ASPEED_I2C_MASTER_TX_FIRST,
         ASPEED_I2C_MASTER_TX,
         ASPEED_I2C_MASTER_RX_FIRST,
         ASPEED_I2C_MASTER_RX,
         ASPEED_I2C_MASTER_STOP,
-       ASPEED_I2C_MASTER_INACTIVE,
  };

  enum aspeed_i2c_slave_state {
+       ASPEED_I2C_SLAVE_STOP,
         ASPEED_I2C_SLAVE_START,
         ASPEED_I2C_SLAVE_READ_REQUESTED,
         ASPEED_I2C_SLAVE_READ_PROCESSED,
         ASPEED_I2C_SLAVE_WRITE_REQUESTED,
         ASPEED_I2C_SLAVE_WRITE_RECEIVED,
-       ASPEED_I2C_SLAVE_STOP,
  };

  struct aspeed_i2c_bus {
@@ -150,6 +162,7 @@ struct aspeed_i2c_bus {
         int                             cmd_err;
         /* Protected only by i2c_lock_bus */
         int                             master_xfer_result;
+       u32                             irq_status;
  #if IS_ENABLED(CONFIG_I2C_SLAVE)
         struct i2c_client               *slave;
         enum aspeed_i2c_slave_state     slave_state;
@@ -229,37 +242,30 @@ static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus)
  #if IS_ENABLED(CONFIG_I2C_SLAVE)
  static bool aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus)
  {
-       u32 command, irq_status, status_ack = 0;
+       u32 command, status_ack = 0;
         struct i2c_client *slave = bus->slave;
-       bool irq_handled = true;
         u8 value;

-       spin_lock(&bus->lock);
-       if (!slave) {
-               irq_handled = false;
-               goto out;
-       }
+       if (!slave)
+               return false;

         command = readl(bus->base + ASPEED_I2C_CMD_REG);
-       irq_status = readl(bus->base + ASPEED_I2C_INTR_STS_REG);

         /* Slave was requested, restart state machine. */
-       if (irq_status & ASPEED_I2CD_INTR_SLAVE_MATCH) {
+       if (bus->irq_status & ASPEED_I2CD_INTR_SLAVE_MATCH) {
                 status_ack |= ASPEED_I2CD_INTR_SLAVE_MATCH;
                 bus->slave_state = ASPEED_I2C_SLAVE_START;
         }

         /* Slave is not currently active, irq was for someone else. */
-       if (bus->slave_state == ASPEED_I2C_SLAVE_STOP) {
-               irq_handled = false;
-               goto out;
-       }
+       if (bus->slave_state == ASPEED_I2C_SLAVE_STOP)
+               return false;

         dev_dbg(bus->dev, "slave irq status 0x%08x, cmd 0x%08x\n",
-               irq_status, command);
+               bus->irq_status, command);

         /* Slave was sent something. */
-       if (irq_status & ASPEED_I2CD_INTR_RX_DONE) {
+       if (bus->irq_status & ASPEED_I2CD_INTR_RX_DONE) {
                 value = readl(bus->base + ASPEED_I2C_BYTE_BUF_REG) >> 8;
                 /* Handle address frame. */
                 if (bus->slave_state == ASPEED_I2C_SLAVE_START) {
@@ -274,28 +280,29 @@ static bool aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus)
         }

         /* Slave was asked to stop. */
-       if (irq_status & ASPEED_I2CD_INTR_NORMAL_STOP) {
+       if (bus->irq_status & ASPEED_I2CD_INTR_NORMAL_STOP) {
                 status_ack |= ASPEED_I2CD_INTR_NORMAL_STOP;
                 bus->slave_state = ASPEED_I2C_SLAVE_STOP;
         }
-       if (irq_status & ASPEED_I2CD_INTR_TX_NAK) {
+       if (bus->irq_status & ASPEED_I2CD_INTR_TX_NAK) {
                 status_ack |= ASPEED_I2CD_INTR_TX_NAK;
                 bus->slave_state = ASPEED_I2C_SLAVE_STOP;
         }
+       if (bus->irq_status & ASPEED_I2CD_INTR_TX_ACK) {
+               status_ack |= ASPEED_I2CD_INTR_TX_ACK;
+       }

         switch (bus->slave_state) {
         case ASPEED_I2C_SLAVE_READ_REQUESTED:
-               if (irq_status & ASPEED_I2CD_INTR_TX_ACK)
+               if (bus->irq_status & ASPEED_I2CD_INTR_TX_ACK)
                         dev_err(bus->dev, "Unexpected ACK on read request.\n");
                 bus->slave_state = ASPEED_I2C_SLAVE_READ_PROCESSED;
-
                 i2c_slave_event(slave, I2C_SLAVE_READ_REQUESTED, &value);
                 writel(value, bus->base + ASPEED_I2C_BYTE_BUF_REG);
                 writel(ASPEED_I2CD_S_TX_CMD, bus->base + ASPEED_I2C_CMD_REG);
                 break;
         case ASPEED_I2C_SLAVE_READ_PROCESSED:
-               status_ack |= ASPEED_I2CD_INTR_TX_ACK;
-               if (!(irq_status & ASPEED_I2CD_INTR_TX_ACK))
+               if (!(bus->irq_status & ASPEED_I2CD_INTR_TX_ACK))
                         dev_err(bus->dev,
                                 "Expected ACK after processed read.\n");
                 i2c_slave_event(slave, I2C_SLAVE_READ_PROCESSED, &value);
@@ -318,15 +325,8 @@ static bool aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus)
                 break;
         }

-       if (status_ack != irq_status)
-               dev_err(bus->dev,
-                       "irq handled != irq. expected %x, but was %x\n",
-                       irq_status, status_ack);
-       writel(status_ack, bus->base + ASPEED_I2C_INTR_STS_REG);
-
-out:
-       spin_unlock(&bus->lock);
-       return irq_handled;
+       bus->irq_status ^= status_ack;
+       return !bus->irq_status;
  }
  #endif /* CONFIG_I2C_SLAVE */

@@ -384,20 +384,19 @@ static int aspeed_i2c_is_irq_error(u32 irq_status)

  static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
  {
-       u32 irq_status, status_ack = 0, command = 0;
+       u32 status_ack = 0, command = 0;
         struct i2c_msg *msg;
         u8 recv_byte;
         int ret;

-       spin_lock(&bus->lock);
-       irq_status = readl(bus->base + ASPEED_I2C_INTR_STS_REG);
-       /* Ack all interrupt bits. */
-       writel(irq_status, bus->base + ASPEED_I2C_INTR_STS_REG);
-
-       if (irq_status & ASPEED_I2CD_INTR_BUS_RECOVER_DONE) {
+       if (bus->irq_status & ASPEED_I2CD_INTR_BUS_RECOVER_DONE) {
                 bus->master_state = ASPEED_I2C_MASTER_INACTIVE;
                 status_ack |= ASPEED_I2CD_INTR_BUS_RECOVER_DONE;
                 goto out_complete;
+       } else {
+               /* Master is not currently active, irq was for someone else. */
+               if (bus->master_state == ASPEED_I2C_MASTER_INACTIVE)
+                       goto out_no_complete;
         }

         /*
@@ -405,20 +404,23 @@ static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
          * should clear the command queue effectively taking us back to the
          * INACTIVE state.
          */
-       ret = aspeed_i2c_is_irq_error(irq_status);
-       if (ret < 0) {
-               dev_dbg(bus->dev, "received error interrupt: 0x%08x",
-                       irq_status);
+       ret = aspeed_i2c_is_irq_error(bus->irq_status);
+       if (ret) {
+               dev_dbg(bus->dev, "received error interrupt: 0x%08x\n",
+                       bus->irq_status);
                 bus->cmd_err = ret;
                 bus->master_state = ASPEED_I2C_MASTER_INACTIVE;
+               status_ack |= (bus->irq_status & ASPEED_I2CD_INTR_ERRORS);
                 goto out_complete;
         }

         /* We are in an invalid state; reset bus to a known state. */
         if (!bus->msgs) {
-               dev_err(bus->dev, "bus in unknown state");
+               dev_err(bus->dev, "bus in unknown state irq_status: 0x%x\n",
+                       bus->irq_status);
                 bus->cmd_err = -EIO;
-               if (bus->master_state != ASPEED_I2C_MASTER_STOP)
+               if (bus->master_state != ASPEED_I2C_MASTER_STOP &&
+                   bus->master_state != ASPEED_I2C_MASTER_INACTIVE)
                         aspeed_i2c_do_stop(bus);
                 goto out_no_complete;
         }
@@ -430,7 +432,13 @@ static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
          * then update the state and handle the new state below.
          */
         if (bus->master_state == ASPEED_I2C_MASTER_START) {
-               if (unlikely(!(irq_status & ASPEED_I2CD_INTR_TX_ACK))) {
+               if (unlikely(!(bus->irq_status & ASPEED_I2CD_INTR_TX_ACK))) {
+                       if (unlikely(!(bus->irq_status &
+                                    ASPEED_I2CD_INTR_TX_NAK))) {
+                               bus->cmd_err = -ENXIO;
+                               bus->master_state = ASPEED_I2C_MASTER_INACTIVE;
+                               goto out_complete;
+                       }
                         pr_devel("no slave present at %02x", msg->addr);
                         status_ack |= ASPEED_I2CD_INTR_TX_NAK;
                         bus->cmd_err = -ENXIO;
@@ -450,12 +458,13 @@ static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)

         switch (bus->master_state) {
         case ASPEED_I2C_MASTER_TX:
-               if (unlikely(irq_status & ASPEED_I2CD_INTR_TX_NAK)) {
-                       dev_dbg(bus->dev, "slave NACKed TX");
+               if (unlikely(bus->irq_status & ASPEED_I2CD_INTR_TX_NAK)) {
+                       dev_dbg(bus->dev, "slave NACKed TX\n");
                         status_ack |= ASPEED_I2CD_INTR_TX_NAK;
                         goto error_and_stop;
-               } else if (unlikely(!(irq_status & ASPEED_I2CD_INTR_TX_ACK))) {
-                       dev_err(bus->dev, "slave failed to ACK TX");
+               } else if (unlikely(!(bus->irq_status &
+                                     ASPEED_I2CD_INTR_TX_ACK))) {
+                       dev_err(bus->dev, "slave failed to ACK TX\n");
                         goto error_and_stop;
                 }
                 status_ack |= ASPEED_I2CD_INTR_TX_ACK;
@@ -473,12 +482,12 @@ static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
                 goto out_no_complete;
         case ASPEED_I2C_MASTER_RX_FIRST:
                 /* RX may not have completed yet (only address cycle) */
-               if (!(irq_status & ASPEED_I2CD_INTR_RX_DONE))
+               if (!(bus->irq_status & ASPEED_I2CD_INTR_RX_DONE))
                         goto out_no_complete;
                 /* fallthrough intended */
         case ASPEED_I2C_MASTER_RX:
-               if (unlikely(!(irq_status & ASPEED_I2CD_INTR_RX_DONE))) {
-                       dev_err(bus->dev, "master failed to RX");
+               if (unlikely(!(bus->irq_status & ASPEED_I2CD_INTR_RX_DONE))) {
+                       dev_err(bus->dev, "master failed to RX\n");
                         goto error_and_stop;
                 }
                 status_ack |= ASPEED_I2CD_INTR_RX_DONE;
@@ -508,8 +517,11 @@ static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
                 }
                 goto out_no_complete;
         case ASPEED_I2C_MASTER_STOP:
-               if (unlikely(!(irq_status & ASPEED_I2CD_INTR_NORMAL_STOP))) {
-                       dev_err(bus->dev, "master failed to STOP");
+               if (unlikely(!(bus->irq_status &
+                              ASPEED_I2CD_INTR_NORMAL_STOP))) {
+                       dev_err(bus->dev,
+                               "master failed to STOP irq_status:0x%x\n",
+                               bus->irq_status);
                         bus->cmd_err = -EIO;
                         /* Do not STOP as we have already tried. */
                 } else {
@@ -520,8 +532,8 @@ static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
                 goto out_complete;
         case ASPEED_I2C_MASTER_INACTIVE:
                 dev_err(bus->dev,
-                       "master received interrupt 0x%08x, but is inactive",
-                       irq_status);
+                       "master received interrupt 0x%08x, but is inactive\n",
+                       bus->irq_status);
                 bus->cmd_err = -EIO;
                 /* Do not STOP as we should be inactive. */
                 goto out_complete;
@@ -543,26 +555,61 @@ static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
                 bus->master_xfer_result = bus->msgs_index + 1;
         complete(&bus->cmd_complete);
  out_no_complete:
-       if (irq_status != status_ack)
-               dev_err(bus->dev,
-                       "irq handled != irq. expected 0x%08x, but was 0x%08x\n",
-                       irq_status, status_ack);
-       spin_unlock(&bus->lock);
-       return !!irq_status;
+       bus->irq_status ^= status_ack;
+       return !bus->irq_status;
  }

  static irqreturn_t aspeed_i2c_bus_irq(int irq, void *dev_id)
  {
         struct aspeed_i2c_bus *bus = dev_id;
+       u32 irq_received;
+
+       spin_lock(&bus->lock);
+       irq_received = readl(bus->base + ASPEED_I2C_INTR_STS_REG);
+       bus->irq_status = irq_received;

  #if IS_ENABLED(CONFIG_I2C_SLAVE)
-       if (aspeed_i2c_slave_irq(bus)) {
-               dev_dbg(bus->dev, "irq handled by slave.\n");
-               return IRQ_HANDLED;
+       if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) {
+               if (!aspeed_i2c_master_irq(bus))

Why do you check the slave if master fails (or vice versa)? I
understand that there are some status bits that have not been handled,
but it doesn't seem reasonable to assume that there is state that the
other should do something with; the only way this would happen is if
the state that you think you are in does not match the status bits you
have been given, but if this is the case, you are already hosed; I
don't think trying the other handler is likely to make things better,
unless there is something that I am missing.


In most of cases, interrupt bits are set one by one but there are also a
lot of other cases that ASPEED I2C H/W sends multiple interrupt bits
with combining master and slave events using a single interrupt call. It
happens much in multi-master environment than single-master. For
example, when master is waiting for a NORMAL_STOP interrupt in its
MASTER_STOP state, SLAVE_MATCH and RX_DONE interrupts could come along
with the NORMAL_STOP in case of an another master immediately sends data
just after acquiring the bus - it happens a lot in BMC-ME connection
practically. In this case, the NORMAL_STOP interrupt should be handled
by master_irq and the SLAVE_MATCH and RX_DONE interrupts should be
handled by slave_irq so it's the reason why this code is added.

+                       aspeed_i2c_slave_irq(bus);
+       } else {
+               if (!aspeed_i2c_slave_irq(bus))
+                       aspeed_i2c_master_irq(bus);
         }
+#else
+       aspeed_i2c_master_irq(bus);
  #endif /* CONFIG_I2C_SLAVE */

-       return aspeed_i2c_master_irq(bus) ? IRQ_HANDLED : IRQ_NONE;
+       if (bus->irq_status)
+               dev_err(bus->dev,
+                       "irq handled != irq. expected 0x%08x, but was 0x%08x\n",
+                       irq_received, irq_received ^ bus->irq_status);
+
+       /* Ack all interrupt bits. */
+       writel(irq_received, bus->base + ASPEED_I2C_INTR_STS_REG);
+       spin_unlock(&bus->lock);
+       return bus->irq_status ? IRQ_NONE : IRQ_HANDLED;
+}
+
+static int aspeed_i2c_check_bus_busy_timeout(struct aspeed_i2c_bus *bus)
+{
+       ktime_t timeout = ktime_add_us(ktime_get(), BUS_BUSY_CHECK_TIMEOUT);
+
+       might_sleep();
+
+       for (;;) {
+               if (!(readl(bus->base + ASPEED_I2C_CMD_REG) &
+                     (ASPEED_I2CD_BUS_BUSY_STS |
+                      ASPEED_I2CD_XFER_MODE_STS_MASK)))

Is using the Transfer Mode State Machine bits necessary? The
documentation marks it as "for debugging purpose only," so relying on
it makes me nervous.


As you said, the documentation marks it as "for debugging purpose only."
but ASPEED also uses this way in their SDK code because it's the best
way for checking bus busy status which can cover both single and
multi-master use cases.

+                       return 0;
+               if (ktime_compare(ktime_get(), timeout) > 0)
+                       break;
+               usleep_range((BUS_BUSY_CHECK_INTERVAL >> 2) + 1,

Where did you get this minimum value?


No source for the minimum value. ASPEED uses mdelay(10) in their SDK
but I changed that code using usleep_range and the range value was set
with considering time stretching of usleep_range.
regmap_read_poll_timeout was a reference for this code.

Thanks,

Jae

+                            BUS_BUSY_CHECK_INTERVAL);
+       }
+
+       dev_err(bus->dev, "timeout waiting for idle. attempting recovery\n");
+       return aspeed_i2c_recover_bus(bus);
  }

  static int aspeed_i2c_master_xfer(struct i2c_adapter *adap,
@@ -570,22 +617,11 @@ static int aspeed_i2c_master_xfer(struct i2c_adapter *adap,
  {
         struct aspeed_i2c_bus *bus = i2c_get_adapdata(adap);
         unsigned long time_left, flags;
-       int ret = 0;
-
-       spin_lock_irqsave(&bus->lock, flags);
-       bus->cmd_err = 0;

-       /* If bus is busy, attempt recovery. We assume a single master
-        * environment.
-        */
-       if (readl(bus->base + ASPEED_I2C_CMD_REG) & ASPEED_I2CD_BUS_BUSY_STS) {
-               spin_unlock_irqrestore(&bus->lock, flags);
-               ret = aspeed_i2c_recover_bus(bus);
-               if (ret)
-                       return ret;
-               spin_lock_irqsave(&bus->lock, flags);
-       }
+       if (aspeed_i2c_check_bus_busy_timeout(bus))
+               return -EAGAIN;

+       spin_lock_irqsave(&bus->lock, flags);
         bus->cmd_err = 0;
         bus->msgs = msgs;
         bus->msgs_index = 0;
@@ -851,7 +887,7 @@ static int aspeed_i2c_probe_bus(struct platform_device *pdev)
         bus->rst = devm_reset_control_get_shared(&pdev->dev, NULL);
         if (IS_ERR(bus->rst)) {
                 dev_err(&pdev->dev,
-                       "missing or invalid reset controller device tree entry");
+                       "missing or invalid reset controller device tree entry\n");
                 return PTR_ERR(bus->rst);
         }
         reset_control_deassert(bus->rst);
--
2.17.1




[Index of Archives]     [Linux GPIO]     [Linux SPI]     [Linux Hardward Monitoring]     [LM Sensors]     [Linux USB Devel]     [Linux Media]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux