summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2016-05-27 15:47:07 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2016-05-30 21:27:35 -0700
commit3d75e32d6ada03c979077681ff414d948800f07e (patch)
tree18297f74761127aa7dc3ca65a3dffa5d61eb10ff
parenta45bef14b370fe82d4f3af41a35d2802a359c287 (diff)
cluster/afr: Unwind xdata_rsp even in case of failures
DHT expects GF_PREOP_CHECK_FAILED to be present in xdata_rsp in case of mkdir failures because of stale layout. But AFR was unwinding null xdata_rsp in case of failures. This was leading to mkdir failures just after remove-brick. Unwind the xdata_rsp in case of failures to make sure the response from brick reaches dht. BUG: 1340623 Change-Id: Idd3f7b95730e8ea987b608e892011ff190e181d1 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: http://review.gluster.org/14553 NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Ravishankar N <ravishankar@redhat.com> Smoke: Gluster Build System <jenkins@build.gluster.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anuradha Talur <atalur@redhat.com> Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com>
-rw-r--r--xlators/cluster/afr/src/afr-common.c29
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c3
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c20
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c6
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c71
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h4
-rw-r--r--xlators/cluster/afr/src/afr.h8
7 files changed, 120 insertions, 21 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 2043e11d1e8..c20c53218f9 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1269,6 +1269,7 @@ afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this,
int
afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
+ unsigned char *readables,
int *event_p, afr_transaction_type type,
afr_read_subvol_args_t *args)
{
@@ -1305,6 +1306,9 @@ afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
*subvol_p = subvol;
if (event_p)
*event_p = event;
+ if (readables)
+ memcpy (readables, readable,
+ sizeof (*readables) * priv->child_count);
return subvol;
}
@@ -1434,6 +1438,7 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
GF_FREE (local->read_attempted);
GF_FREE (local->readable);
+ GF_FREE (local->readable2);
if (local->inode)
inode_unref (local->inode);
@@ -1593,8 +1598,8 @@ afr_get_parent_read_subvol (xlator_t *this, inode_t *parent,
priv = this->private;
if (parent)
- par_read_subvol = afr_data_subvol_get (parent, this, 0, 0,
- NULL);
+ par_read_subvol = afr_data_subvol_get (parent, this, NULL, NULL,
+ NULL, NULL);
for (i = 0; i < priv->child_count; i++) {
if (!replies[i].valid)
@@ -1633,8 +1638,7 @@ afr_read_subvol_decide (inode_t *inode, xlator_t *this,
int data_subvol = -1;
int mdata_subvol = -1;
- data_subvol = afr_data_subvol_get (inode, this,
- 0, 0, args);
+ data_subvol = afr_data_subvol_get (inode, this, NULL, NULL, NULL, args);
mdata_subvol = afr_metadata_subvol_get (inode, this,
0, 0, args);
if (data_subvol == -1 || mdata_subvol == -1)
@@ -1782,7 +1786,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
goto cant_interpret;
} else {
read_subvol = afr_data_subvol_get (local->inode, this,
- 0, 0, &args);
+ NULL, NULL, NULL, &args);
}
} else {
cant_interpret:
@@ -2409,7 +2413,7 @@ afr_discover (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req
return 0;
}
- afr_read_subvol_get (loc->inode, this, NULL, &event,
+ afr_read_subvol_get (loc->inode, this, NULL, NULL, &event,
AFR_DATA_TRANSACTION, NULL);
if (event != local->event_generation)
@@ -2560,7 +2564,7 @@ afr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
}
}
- afr_read_subvol_get (loc->parent, this, NULL, &event,
+ afr_read_subvol_get (loc->parent, this, NULL, NULL, &event,
AFR_DATA_TRANSACTION, NULL);
if (event != local->event_generation)
@@ -2883,7 +2887,8 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
- read_subvol = afr_data_subvol_get (local->inode, this, 0, 0, NULL);
+ read_subvol = afr_data_subvol_get (local->inode, this, NULL, NULL,
+ NULL, NULL);
LOCK (&frame->lock);
{
@@ -4283,6 +4288,14 @@ afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
goto out;
}
+ local->readable2 = GF_CALLOC (priv->child_count, sizeof (char),
+ gf_afr_mt_char);
+ if (!local->readable2) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
local->replies = GF_CALLOC(priv->child_count, sizeof(*local->replies),
gf_afr_mt_reply_t);
if (!local->replies) {
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index 841c64361cf..2260e5dac26 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -153,7 +153,8 @@ afr_validate_read_subvol (inode_t *inode, xlator_t *this, int par_read_subvol)
* -1 above due to gen being 0, which is why it is OK to pass NULL for
* read_subvol_args here.
*/
- entry_read_subvol = afr_data_subvol_get (inode, this, 0, 0, NULL);
+ entry_read_subvol = afr_data_subvol_get (inode, this, NULL, NULL,
+ NULL, NULL);
if (entry_read_subvol != par_read_subvol)
return -1;
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 55aec7429a7..f3de5352d7e 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -101,18 +101,21 @@ __afr_dir_write_finalize (call_frame_t *frame, xlator_t *this)
if (local->inode) {
afr_replies_interpret (frame, this, local->inode, NULL);
inode_read_subvol = afr_data_subvol_get (local->inode, this,
- NULL, NULL, &args);
+ NULL, NULL, NULL, &args);
}
if (local->parent)
parent_read_subvol = afr_data_subvol_get (local->parent, this,
- NULL, NULL, NULL);
+ NULL, local->readable, NULL, NULL);
+
if (local->parent2)
parent2_read_subvol = afr_data_subvol_get (local->parent2, this,
- NULL, NULL, NULL);
+ NULL, local->readable2, NULL, NULL);
local->op_ret = -1;
local->op_errno = afr_final_errno (local, priv);
+ afr_pick_error_xdata (local, priv, local->parent, local->readable,
+ local->parent2, local->readable2);
for (i = 0; i < priv->child_count; i++) {
if (!local->replies[i].valid)
@@ -144,6 +147,11 @@ __afr_dir_write_finalize (call_frame_t *frame, xlator_t *this)
local->replies[i].preparent2;
local->cont.dir_fop.postnewparent =
local->replies[i].postparent2;
+ if (local->xdata_rsp) {
+ dict_unref (local->xdata_rsp);
+ local->xdata_rsp = NULL;
+ }
+
if (local->replies[i].xdata)
local->xdata_rsp =
dict_ref (local->replies[i].xdata);
@@ -196,6 +204,9 @@ __afr_dir_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
local->replies[child_index].valid = 1;
local->replies[child_index].op_ret = op_ret;
local->replies[child_index].op_errno = op_errno;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref (xdata);
+
if (op_ret >= 0) {
if (poststat)
@@ -208,9 +219,6 @@ __afr_dir_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
local->replies[child_index].preparent2 = *preparent2;
if (postparent2)
local->replies[child_index].postparent2 = *postparent2;
- if (xdata)
- local->replies[child_index].xdata = dict_ref (xdata);
-
if (fd_ctx)
fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
} else {
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 36889429657..47320ce9412 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -84,7 +84,7 @@ __afr_inode_write_finalize (call_frame_t *frame, xlator_t *this)
&args);
else
read_subvol = afr_data_subvol_get (local->inode, this,
- NULL, NULL, &args);
+ NULL, NULL, NULL, &args);
}
local->op_ret = -1;
@@ -164,8 +164,8 @@ __afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
local->replies[child_index].poststat = *postbuf;
if (xattr)
local->replies[child_index].xattr = dict_ref (xattr);
- if (xdata)
- local->replies[child_index].xdata = dict_ref (xdata);
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref (xdata);
} else {
afr_transaction_fop_failed (frame, this, child_index);
}
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index f5ff6a0428e..2760563e0ae 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -57,6 +57,66 @@ afr_zero_fill_stat (afr_local_t *local)
}
}
+/* In case of errors afr needs to choose which xdata from lower xlators it needs
+ * to unwind with. The way it is done is by checking if there are
+ * any good subvols which failed. Give preference to errnos other than
+ * ENOTCONN even if the child is source */
+void
+afr_pick_error_xdata (afr_local_t *local, afr_private_t *priv,
+ inode_t *inode1, unsigned char *readable1,
+ inode_t *inode2, unsigned char *readable2)
+{
+ int s = -1;/*selection*/
+ int i = 0;
+ unsigned char *readable = NULL;
+
+ if (local->xdata_rsp) {
+ dict_unref (local->xdata_rsp);
+ local->xdata_rsp = NULL;
+ }
+
+ readable = alloca0 (priv->child_count * sizeof (*readable));
+ if (inode2 && readable2) {/*rename fop*/
+ AFR_INTERSECT (readable, readable1, readable2,
+ priv->child_count);
+ } else {
+ memcpy (readable, readable1,
+ sizeof (*readable) * priv->child_count);
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+
+ if (local->replies[i].op_ret >= 0)
+ continue;
+
+ if (local->replies[i].op_errno == ENOTCONN)
+ continue;
+
+ /*Order is important in the following condition*/
+ if ((s < 0) || (!readable[s] && readable[i]))
+ s = i;
+ }
+
+ if (s != -1 && local->replies[s].xdata) {
+ local->xdata_rsp = dict_ref (local->replies[s].xdata);
+ } else if (s == -1) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+
+ if (local->replies[i].op_ret >= 0)
+ continue;
+
+ if (!local->replies[i].xdata)
+ continue;
+ local->xdata_rsp = dict_ref (local->replies[i].xdata);
+ break;
+ }
+ }
+}
+
gf_boolean_t
afr_needs_changelog_update (afr_local_t *local)
{
@@ -747,6 +807,17 @@ afr_handle_quorum (call_frame_t *frame)
local->op_errno = afr_final_errno (local, priv);
if (local->op_errno == 0)
local->op_errno = afr_quorum_errno (priv);
+ switch (local->transaction.type) {
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ afr_pick_error_xdata (local, priv, local->parent,
+ local->readable, local->parent2,
+ local->readable2);
+ break;
+ default:
+ /*TBD*/
+ break;
+ }
}
int
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index c58531eff44..ca8fcfefa89 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -55,4 +55,8 @@ gf_boolean_t afr_has_quorum (unsigned char *subvols, xlator_t *this);
gf_boolean_t afr_needs_changelog_update (afr_local_t *local);
void afr_zero_fill_stat (afr_local_t *local);
+void
+afr_pick_error_xdata (afr_local_t *local, afr_private_t *priv,
+ inode_t *inode1, unsigned char *readable1,
+ inode_t *inode2, unsigned char *readable2);
#endif /* __TRANSACTION_H__ */
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 5482dab25b2..c6afbbe20e0 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -426,6 +426,7 @@ typedef struct _afr_local {
performed. This is the output of afr_inode_refresh()
*/
unsigned char *readable;
+ unsigned char *readable2; /*For rename transaction*/
afr_inode_refresh_cbk_t refreshfn;
@@ -860,14 +861,15 @@ afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this,
int type);
int
afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
+ unsigned char *readables,
int *event_p, afr_transaction_type type,
afr_read_subvol_args_t *args);
-#define afr_data_subvol_get(i, t, s, e, a) \
- afr_read_subvol_get(i, t, s, e, AFR_DATA_TRANSACTION, a)
+#define afr_data_subvol_get(i, t, s, r, e, a) \
+ afr_read_subvol_get(i, t, s, r, e, AFR_DATA_TRANSACTION, a)
#define afr_metadata_subvol_get(i, t, s, e, a) \
- afr_read_subvol_get(i, t, s, e, AFR_METADATA_TRANSACTION, a)
+ afr_read_subvol_get(i, t, s, NULL, e, AFR_METADATA_TRANSACTION, a)
int
afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode,