hi,
Duplicate entries are seen in a dir because of a bug 916449 in
afr(fix is the attached patch), dht (fix:
http://review.gluster.com/4587). Even with the fixes I am still able to
observe duplicate entries sporadically.
I added some logs in dht_readdirp_cbk. According to the logs the entry
is read from subvolume replicate-0 once and suvolume replicate-2 the
next time. I see that the sticky flag is set in the iatt buf received
from replicate-0 subvol where as the sticky flag is not set in the iatt
buf received from replicate-2. Please note that the issue is not
consistent. when I run the script in a while loop, it hits the bug once
in 5-10 runs.
Run it as:
while prove -rfv tests/bugs/test.t; do :; done
Here is the script:
#!/bin/bash
. $(dirname $0)/../include.rc
. $(dirname $0)/../volume.rc
cleanup;
TEST glusterd
TEST pidof glusterd
function get_dup_files {
local d=$1
echo $(ls -rt -d -1 $d/* | sort | uniq -c | egrep -v "^\s*1\s\s*")
}
TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
$H0:$B0/${V0}2 $H0:$B0/${V0}3
TEST $CLI volume start $V0
TEST glusterfs -s $H0 --volfile-id $V0 $M0;
TEST mkdir $M0/dir{1..10};
#TEST cp /bin/* $M0/;
TEST touch $M0/dir{1..10}/files{1..100};
TEST $CLI volume add-brick $V0 $H0:$B0/${V0}4 $H0:/$B0/${V0}5
TEST $CLI volume rebalance $V0 start force
kill -9 `cat /var/lib/glusterd/vols/$V0/run/$H0-d-backends-${V0}0.pid`;
TEST $CLI volume start $V0 force;
EXPECT_WITHIN 60 "success:" rebalance_status_field $V0
find $M0 |xargs stat 1>/dev/null;
EXPECT "" get_dup_files $M0/dir1
EXPECT "" get_dup_files $M0/dir2
EXPECT "" get_dup_files $M0/dir3
EXPECT "" get_dup_files $M0/dir4
EXPECT "" get_dup_files $M0/dir5
EXPECT "" get_dup_files $M0/dir6
EXPECT "" get_dup_files $M0/dir7
EXPECT "" get_dup_files $M0/dir8
EXPECT "" get_dup_files $M0/dir9
EXPECT "" get_dup_files $M0/dir10
Pranith.
>From 39b180fa54a2adbdf55d78454514b9a403a20709 Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@xxxxxxxxxx>
Date: Thu, 28 Feb 2013 14:50:16 +0530
Subject: [PATCH 1/1] wip
Change-Id: I3b3a7931954da2166a9cba19ff9f76f37739d751
Signed-off-by: Pranith Kumar K <pkarampu@xxxxxxxxxx>
---
xlators/cluster/afr/src/afr-dir-write.c | 85 ++++++++++++++++++++++++++++-
xlators/cluster/afr/src/afr-transaction.c | 2 +-
xlators/cluster/afr/src/afr-transaction.h | 3 +
3 files changed, 88 insertions(+), 2 deletions(-)
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index a988ea2..a32daaf 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -231,7 +231,88 @@ afr_dir_fop_mark_entry_pending_changelog (call_frame_t *frame, xlator_t *this)
afr_mark_new_entry_changelog (frame, this);
out:
- local->transaction.resume (frame, this);
+ return;
+}
+
+static gf_boolean_t
+did_all_winds_fail_with_same_errno (afr_local_t *local, afr_private_t *priv,
+ int32_t op_errno)
+{
+ int i = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] &&
+ (local->child_errno[i] != op_errno))
+ return _gf_false;
+ }
+ return _gf_true;
+}
+
+static gf_boolean_t
+are_fops_wound (afr_local_t *local, afr_private_t *priv)
+{
+ int i = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i])
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+void
+afr_dir_fop_handle_all_fop_failures (call_frame_t *frame)
+{
+ xlator_t *this = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int32_t skip_errnos[3] = {0, 0, 0};
+ int i = 0;
+
+ this = frame->this;
+ local = frame->local;
+ priv = this->private;
+
+ if (local->op_ret >= 0)
+ goto out;
+
+ if (!are_fops_wound (local, priv))
+ goto out;
+
+ switch (local->op) {
+ case GF_FOP_CREATE:
+ case GF_FOP_MKNOD:
+ case GF_FOP_LINK:
+ case GF_FOP_SYMLINK:
+ case GF_FOP_MKDIR:
+ skip_errnos[0] = EEXIST;
+ break;
+ case GF_FOP_RMDIR:
+ case GF_FOP_UNLINK:
+ skip_errnos[0] = ENOENT;
+ break;
+ case GF_FOP_RENAME:
+ skip_errnos[0] = ENOENT;
+ skip_errnos[1] = EEXIST;
+ break;
+ default:
+ break;
+ }
+
+ /*
+ * If a new entry creation fop fails with EEXIST or a delete entry fop
+ * fails with ENOENT, on all the subvols the fop is wound, then no
+ * change took place to the directory. So treat these cases as success
+ * from the point of view of changelog.
+ */
+
+ for (i = 0; skip_errnos[i]; i++) {
+ if (did_all_winds_fail_with_same_errno (local, priv,
+ skip_errnos[i]))
+ __mark_all_success (local->pending, priv->child_count,
+ local->transaction.type);
+ }
+out:
+ return;
}
void
@@ -253,6 +334,8 @@ afr_dir_fop_done (call_frame_t *frame, xlator_t *this)
done:
local->transaction.unwind (frame, this);
afr_dir_fop_mark_entry_pending_changelog (frame, this);
+ afr_dir_fop_handle_all_fop_failures (frame);
+ local->transaction.resume (frame, this);
}
/* {{{ create */
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index d20928d..b38da54 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -192,7 +192,7 @@ __mark_non_participant_children (int32_t *pending[], int child_count,
}
-static void
+void
__mark_all_success (int32_t *pending[], int child_count,
afr_transaction_type type)
{
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index e95bc5b..55e8bbc 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -37,4 +37,7 @@ afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this);
void
afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd);
+void
+__mark_all_success (int32_t *pending[], int child_count,
+ afr_transaction_type type);
#endif /* __TRANSACTION_H__ */
--
1.7.10.2