Duplicate entries with rebalance

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



hi,
Duplicate entries are seen in a dir because of a bug 916449 in afr(fix is the attached patch), dht (fix: http://review.gluster.com/4587). Even with the fixes I am still able to observe duplicate entries sporadically. I added some logs in dht_readdirp_cbk. According to the logs the entry is read from subvolume replicate-0 once and suvolume replicate-2 the next time. I see that the sticky flag is set in the iatt buf received from replicate-0 subvol where as the sticky flag is not set in the iatt buf received from replicate-2. Please note that the issue is not consistent. when I run the script in a while loop, it hits the bug once in 5-10 runs.

Run it as:
while prove -rfv tests/bugs/test.t; do :; done

Here is the script:

#!/bin/bash

. $(dirname $0)/../include.rc
. $(dirname $0)/../volume.rc

cleanup;

TEST glusterd
TEST pidof glusterd

function get_dup_files {
        local d=$1
        echo $(ls -rt -d -1 $d/* | sort | uniq -c | egrep -v "^\s*1\s\s*")
}

TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2 $H0:$B0/${V0}3
TEST $CLI volume start $V0

TEST glusterfs -s $H0 --volfile-id $V0 $M0;

TEST mkdir $M0/dir{1..10};
#TEST cp /bin/* $M0/;
TEST touch $M0/dir{1..10}/files{1..100};

TEST $CLI volume add-brick $V0 $H0:$B0/${V0}4 $H0:/$B0/${V0}5

TEST $CLI volume rebalance $V0 start force
kill -9 `cat /var/lib/glusterd/vols/$V0/run/$H0-d-backends-${V0}0.pid`;
TEST $CLI volume start $V0 force;
EXPECT_WITHIN 60 "success:" rebalance_status_field $V0
find $M0 |xargs stat 1>/dev/null;

EXPECT "" get_dup_files $M0/dir1
EXPECT "" get_dup_files $M0/dir2
EXPECT "" get_dup_files $M0/dir3
EXPECT "" get_dup_files $M0/dir4
EXPECT "" get_dup_files $M0/dir5
EXPECT "" get_dup_files $M0/dir6
EXPECT "" get_dup_files $M0/dir7
EXPECT "" get_dup_files $M0/dir8
EXPECT "" get_dup_files $M0/dir9
EXPECT "" get_dup_files $M0/dir10

Pranith.
>From 39b180fa54a2adbdf55d78454514b9a403a20709 Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@xxxxxxxxxx>
Date: Thu, 28 Feb 2013 14:50:16 +0530
Subject: [PATCH 1/1] wip

Change-Id: I3b3a7931954da2166a9cba19ff9f76f37739d751
Signed-off-by: Pranith Kumar K <pkarampu@xxxxxxxxxx>
---
 xlators/cluster/afr/src/afr-dir-write.c   |   85 ++++++++++++++++++++++++++++-
 xlators/cluster/afr/src/afr-transaction.c |    2 +-
 xlators/cluster/afr/src/afr-transaction.h |    3 +
 3 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index a988ea2..a32daaf 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -231,7 +231,88 @@ afr_dir_fop_mark_entry_pending_changelog (call_frame_t *frame, xlator_t *this)
         afr_mark_new_entry_changelog (frame, this);
 
 out:
-        local->transaction.resume (frame, this);
+        return;
+}
+
+static gf_boolean_t
+did_all_winds_fail_with_same_errno (afr_local_t *local, afr_private_t *priv,
+                                    int32_t op_errno)
+{
+        int     i = 0;
+        for (i = 0; i < priv->child_count; i++) {
+                if (local->transaction.pre_op[i] &&
+                    (local->child_errno[i] != op_errno))
+                        return _gf_false;
+        }
+        return _gf_true;
+}
+
+static gf_boolean_t
+are_fops_wound (afr_local_t *local, afr_private_t *priv)
+{
+        int     i = 0;
+
+        for (i = 0; i < priv->child_count; i++) {
+                if (local->transaction.pre_op[i])
+                        return _gf_true;
+        }
+        return _gf_false;
+}
+
+void
+afr_dir_fop_handle_all_fop_failures (call_frame_t *frame)
+{
+        xlator_t        *this = NULL;
+        afr_local_t     *local = NULL;
+        afr_private_t   *priv = NULL;
+        int32_t         skip_errnos[3] = {0, 0, 0};
+        int             i = 0;
+
+        this = frame->this;
+        local = frame->local;
+        priv = this->private;
+
+        if (local->op_ret >= 0)
+                goto out;
+
+        if (!are_fops_wound (local, priv))
+                goto out;
+
+        switch (local->op) {
+        case GF_FOP_CREATE:
+        case GF_FOP_MKNOD:
+        case GF_FOP_LINK:
+        case GF_FOP_SYMLINK:
+        case GF_FOP_MKDIR:
+                skip_errnos[0] = EEXIST;
+                break;
+        case GF_FOP_RMDIR:
+        case GF_FOP_UNLINK:
+                skip_errnos[0] = ENOENT;
+                break;
+        case GF_FOP_RENAME:
+                skip_errnos[0] = ENOENT;
+                skip_errnos[1] = EEXIST;
+                break;
+        default:
+                break;
+        }
+
+        /*
+         * If a new entry creation fop fails with EEXIST or a delete entry fop
+         * fails with ENOENT, on all the subvols the fop is wound, then no
+         * change took place to the directory. So treat these cases as success
+         * from the point of view of changelog.
+         */
+
+        for (i = 0; skip_errnos[i]; i++) {
+                if (did_all_winds_fail_with_same_errno (local, priv,
+                                                        skip_errnos[i]))
+                        __mark_all_success (local->pending, priv->child_count,
+                                            local->transaction.type);
+        }
+out:
+        return;
 }
 
 void
@@ -253,6 +334,8 @@ afr_dir_fop_done (call_frame_t *frame, xlator_t *this)
 done:
         local->transaction.unwind (frame, this);
         afr_dir_fop_mark_entry_pending_changelog (frame, this);
+        afr_dir_fop_handle_all_fop_failures (frame);
+        local->transaction.resume (frame, this);
 }
 
 /* {{{ create */
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index d20928d..b38da54 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -192,7 +192,7 @@ __mark_non_participant_children (int32_t *pending[], int child_count,
 }
 
 
-static void
+void
 __mark_all_success (int32_t *pending[], int child_count,
                     afr_transaction_type type)
 {
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index e95bc5b..55e8bbc 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -37,4 +37,7 @@ afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this);
 void
 afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd);
 
+void
+__mark_all_success (int32_t *pending[], int child_count,
+                    afr_transaction_type type);
 #endif /* __TRANSACTION_H__ */
-- 
1.7.10.2


[Index of Archives]     [Gluster Users]     [Ceph Users]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux