Re: [PATCH v3] rbd: support timeout in rbd_wait_state_locked

Dongsheng Yang <dongsheng.yang@xxxxxxxxxxxx> · Tue, 27 Mar 2018 09:09:23 +0800

On 03/26/2018 10:53 PM, Ilya Dryomov wrote:
On Mon, Mar 26, 2018 at 4:22 PM, Dongsheng Yang
<dongsheng.yang@xxxxxxxxxxxx> wrote:
currently, the rbd_wait_state_locked() will wait forever if we
can't get our state locked. Example:

rbd map --exclusive test1  --> /dev/rbd0
rbd map test1  --> /dev/rbd1
dd if=/dev/zero of=/dev/rbd1 bs=1M count=1 --> IO blocked

To avoid this problem, this patch introduce a timeout design
in rbd_wait_state_locked(). Then rbd_wait_state_locked() will
return error when we reach a timeout.

This patch allow user to set the lock_timeout in rbd mapping.

Signed-off-by: Dongsheng Yang <dongsheng.yang@xxxxxxxxxxxx>
---
  drivers/block/rbd.c | 35 +++++++++++++++++++++++++++++++----
  1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index f1d9e60..58edead 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -726,6 +726,7 @@ static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts)
   */
  enum {
         Opt_queue_depth,
+       Opt_lock_timeout,
         Opt_last_int,
         /* int args above */
         Opt_last_string,
@@ -739,6 +740,7 @@ enum {

  static match_table_t rbd_opts_tokens = {
         {Opt_queue_depth, "queue_depth=%d"},
+       {Opt_lock_timeout, "lock_timeout=%d"},
         /* int args above */
         /* string args above */
         {Opt_read_only, "read_only"},
@@ -752,6 +754,7 @@ enum {

  struct rbd_options {
         int     queue_depth;
+       unsigned long   lock_timeout;
         bool    read_only;
         bool    lock_on_read;
         bool    exclusive;
@@ -761,6 +764,7 @@ struct rbd_options {
  #define RBD_READ_ONLY_DEFAULT  false
  #define RBD_LOCK_ON_READ_DEFAULT false
  #define RBD_EXCLUSIVE_DEFAULT  false
+#define RBD_LOCK_TIMEOUT_DEFAULT       0  /* no timeout */

  static int parse_rbd_opts_token(char *c, void *private)
  {
@@ -790,6 +794,14 @@ static int parse_rbd_opts_token(char *c, void *private)
                 }
                 rbd_opts->queue_depth = intval;
                 break;
+       case Opt_lock_timeout:
+               /* 0 is "wait forever" (i.e. infinite timeout) */
+               if (intval < 0 || intval > INT_MAX / 1000) {
+                       pr_err("lock_timeout out of range\n");
+                       return -EINVAL;
+               }
+               rbd_opts->lock_timeout = msecs_to_jiffies(intval * 1000);
+               break;
         case Opt_read_only:
                 rbd_opts->read_only = true;
                 break;
@@ -3494,8 +3506,9 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
  /*
   * lock_rwsem must be held for read
   */
-static void rbd_wait_state_locked(struct rbd_device *rbd_dev)
+static int rbd_wait_state_locked(struct rbd_device *rbd_dev)
  {
+       unsigned long timeout = 0;
         DEFINE_WAIT(wait);

         do {
@@ -3508,12 +3521,19 @@ static void rbd_wait_state_locked(struct rbd_device *rbd_dev)
                 prepare_to_wait_exclusive(&rbd_dev->lock_waitq, &wait,
                                           TASK_UNINTERRUPTIBLE);
                 up_read(&rbd_dev->lock_rwsem);
-               schedule();
+               timeout = schedule_timeout(ceph_timeout_jiffies(
+                                               rbd_dev->opts->lock_timeout));
                 down_read(&rbd_dev->lock_rwsem);
+               if (!timeout) {
+                       finish_wait(&rbd_dev->lock_waitq, &wait);
+                       rbd_warn(rbd_dev, "timed out in waiting for lock");
+                       return -ETIMEDOUT;
+               }
         } while (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED &&
                  !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags));

         finish_wait(&rbd_dev->lock_waitq, &wait);
+       return 0;
  }

  static void rbd_queue_workfn(struct work_struct *work)
@@ -3606,7 +3626,9 @@ static void rbd_queue_workfn(struct work_struct *work)
                                 result = -EROFS;
                                 goto err_unlock;
                         }
-                       rbd_wait_state_locked(rbd_dev);
+                       result = rbd_wait_state_locked(rbd_dev);
+                       if (result)
+                               goto err_unlock;
                 }
                 if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
                         result = -EBLACKLISTED;
@@ -5139,6 +5161,7 @@ static int rbd_add_parse_args(const char *buf,

         rbd_opts->read_only = RBD_READ_ONLY_DEFAULT;
         rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT;
+       rbd_opts->lock_timeout = RBD_LOCK_TIMEOUT_DEFAULT;
         rbd_opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT;
         rbd_opts->exclusive = RBD_EXCLUSIVE_DEFAULT;

@@ -5209,6 +5232,8 @@ static void rbd_dev_image_unlock(struct rbd_device *rbd_dev)

  static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
  {
+       int ret = 0;
+
         if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) {
                 rbd_warn(rbd_dev, "exclusive-lock feature is not enabled");
                 return -EINVAL;
@@ -5216,8 +5241,10 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)

         /* FIXME: "rbd map --exclusive" should be in interruptible */
         down_read(&rbd_dev->lock_rwsem);
-       rbd_wait_state_locked(rbd_dev);
+       ret = rbd_wait_state_locked(rbd_dev);
         up_read(&rbd_dev->lock_rwsem);
+       if (ret)
+               return ret;
         if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
                 rbd_warn(rbd_dev, "failed to acquire exclusive lock");
                 return -EROFS;
Looks good to me.

I don't like that rbd_wait_state_locked() has kind of two return values
now: the error code and RBD_DEV_FLAG_BLACKLISTED flag which needs to be
tested afterwards, but I'll fix it myself.

That's okey, thanx.

Yang

Thanks,

                 Ilya



--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html