diff options
Diffstat (limited to 'xlators/cluster/stripe/src/stripe.c')
| -rw-r--r-- | xlators/cluster/stripe/src/stripe.c | 3990 |
1 files changed, 2901 insertions, 1089 deletions
diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c index b2c70cfe5..69b510e23 100644 --- a/xlators/cluster/stripe/src/stripe.c +++ b/xlators/cluster/stripe/src/stripe.c @@ -1,25 +1,16 @@ /* - Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ /** * xlators/cluster/stripe: - * Stripe translator, stripes the data accross its child nodes, + * Stripe translator, stripes the data across its child nodes, * as per the options given in the volfile. The striping works * fairly simple. It writes files at different offset as per * calculation. So, 'ls -l' output at the real posix level will @@ -32,65 +23,19 @@ * very much necessary, or else, use it in combination with AFR, to have a * backup copy. */ +#include <fnmatch.h> #include "stripe.h" +#include "libxlator.h" +#include "byte-order.h" +#include "statedump.h" -void -stripe_local_wipe (stripe_local_t *local) -{ - if (!local) - goto out; - - if (local->loc.path) - loc_wipe (&local->loc); - if (local->loc2.path) - loc_wipe (&local->loc2); -out: - return; -} - -/** - * stripe_get_matching_bs - Get the matching block size for the given path. - */ -int32_t -stripe_get_matching_bs (const char *path, struct stripe_options *opts, - uint64_t default_bs) -{ - struct stripe_options *trav = NULL; - char *pathname = NULL; - uint64_t block_size = 0; - - block_size = default_bs; - - if (!path || !opts) - goto out; - - /* FIXME: is a strdup really necessary? */ - pathname = gf_strdup (path); - if (!pathname) - goto out; - - trav = opts; - while (trav) { - if (!fnmatch (trav->path_pattern, pathname, FNM_NOESCAPE)) { - block_size = trav->block_size; - break; - } - trav = trav->next; - } - - GF_FREE (pathname); - -out: - return block_size; -} - - +struct volume_options options[]; int32_t stripe_sh_chown_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, - struct iatt *preop, struct iatt *postop) + struct iatt *preop, struct iatt *postop, dict_t *xdata) { int callcnt = -1; stripe_local_t *local = NULL; @@ -109,8 +54,7 @@ stripe_sh_chown_cbk (call_frame_t *frame, void *cookie, xlator_t *this, UNLOCK (&frame->lock); if (!callcnt) { - stripe_local_wipe (local); - STACK_DESTROY (frame->root); + STRIPE_STACK_DESTROY (frame); } out: return 0; @@ -120,7 +64,7 @@ int32_t stripe_sh_make_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { stripe_local_t *local = NULL; call_frame_t *prev = NULL; @@ -135,7 +79,7 @@ stripe_sh_make_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this, STACK_WIND (frame, stripe_sh_chown_cbk, prev->this, prev->this->fops->setattr, &local->loc, - &local->stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID)); + &local->stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL); out: return 0; @@ -149,7 +93,7 @@ stripe_entry_self_heal (call_frame_t *frame, xlator_t *this, call_frame_t *rframe = NULL; stripe_local_t *rlocal = NULL; stripe_private_t *priv = NULL; - dict_t *dict = NULL; + dict_t *xdata = NULL; int ret = 0; if (!local || !this || !frame) { @@ -167,8 +111,7 @@ stripe_entry_self_heal (call_frame_t *frame, xlator_t *this, if (!rframe) { goto out; } - rlocal = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + rlocal = mem_get0 (this->local_pool); if (!rlocal) { goto out; } @@ -177,14 +120,14 @@ stripe_entry_self_heal (call_frame_t *frame, xlator_t *this, loc_copy (&rlocal->loc, &local->loc); memcpy (&rlocal->stbuf, &local->stbuf, sizeof (struct iatt)); - dict = dict_new (); - if (!dict) + xdata = dict_new (); + if (!xdata) goto out; - ret = dict_set_static_bin (dict, "gfid-req", local->stbuf.ia_gfid, 16); + ret = dict_set_static_bin (xdata, "gfid-req", local->stbuf.ia_gfid, 16); if (ret) gf_log (this->name, GF_LOG_WARNING, - "failed to set gfid-req"); + "%s: failed to set gfid-req", local->loc.path); while (trav) { if (IA_ISREG (local->stbuf.ia_type)) { @@ -192,36 +135,43 @@ stripe_entry_self_heal (call_frame_t *frame, xlator_t *this, trav->xlator, trav->xlator->fops->mknod, &local->loc, st_mode_from_ia (local->stbuf.ia_prot, - local->stbuf.ia_type), 0, - dict); + local->stbuf.ia_type), + 0, 0, xdata); } if (IA_ISDIR (local->stbuf.ia_type)) { STACK_WIND (rframe, stripe_sh_make_entry_cbk, trav->xlator, trav->xlator->fops->mkdir, - &local->loc, st_mode_from_ia (local->stbuf.ia_prot, - local->stbuf.ia_type), - dict); + &local->loc, + st_mode_from_ia (local->stbuf.ia_prot, + local->stbuf.ia_type), + 0, xdata); } trav = trav->next; } + if (xdata) + dict_unref (xdata); + return 0; + out: - if (dict) - dict_unref (dict); + if (rframe) + STRIPE_STACK_DESTROY (rframe); + if (xdata) + dict_unref (xdata); return 0; } + int32_t stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *dict, struct iatt *postparent) + struct iatt *buf, dict_t *xdata, struct iatt *postparent) { - int32_t callcnt = 0; - dict_t *tmp_dict = NULL; - inode_t *tmp_inode = NULL; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; + int ret = 0; if (!this || !frame || !frame->local || !cookie) { gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); @@ -243,7 +193,7 @@ stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, strerror (op_errno)); if (local->op_errno != ESTALE) local->op_errno = op_errno; - if ((op_errno != ENOENT) || + if (((op_errno != ENOENT) && (op_errno != ENOTCONN)) || (prev->this == FIRST_CHILD (this))) local->failed = 1; if (op_errno == ENOENT) @@ -252,51 +202,81 @@ stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (op_ret >= 0) { local->op_ret = 0; + if (IA_ISREG (buf->ia_type)) { + ret = stripe_ctx_handle (this, prev, local, + xdata); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Error getting fctx info from" + " dict"); + } if (FIRST_CHILD(this) == prev->this) { local->stbuf = *buf; local->postparent = *postparent; local->inode = inode_ref (inode); - local->dict = dict_ref (dict); + if (xdata) + local->xdata = dict_ref (xdata); + if (local->xattr) { + stripe_aggregate_xattr (local->xdata, + local->xattr); + dict_unref (local->xattr); + local->xattr = NULL; + } + } + + if (!local->xdata && !local->xattr) { + local->xattr = dict_ref (xdata); + } else if (local->xdata) { + stripe_aggregate_xattr (local->xdata, xdata); + } else if (local->xattr) { + stripe_aggregate_xattr (local->xattr, xdata); } + local->stbuf_blocks += buf->ia_blocks; local->postparent_blocks += postparent->ia_blocks; + correct_file_size(buf, local->fctx, prev); + if (local->stbuf_size < buf->ia_size) local->stbuf_size = buf->ia_size; if (local->postparent_size < postparent->ia_size) local->postparent_size = postparent->ia_size; + + if (uuid_is_null (local->ia_gfid)) + uuid_copy (local->ia_gfid, buf->ia_gfid); + + /* Make sure the gfid on all the nodes are same */ + if (uuid_compare (local->ia_gfid, buf->ia_gfid)) { + gf_log (this->name, GF_LOG_WARNING, + "%s: gfid different on subvolume %s", + local->loc.path, prev->this->name); + } } } UNLOCK (&frame->lock); if (!callcnt) { - if (local->op_ret == 0 && local->entry_self_heal_needed) + if (local->op_ret == 0 && local->entry_self_heal_needed && + !uuid_is_null (local->loc.inode->gfid)) stripe_entry_self_heal (frame, this, local); if (local->failed) local->op_ret = -1; - tmp_dict = local->dict; - tmp_inode = local->inode; - if (local->op_ret != -1) { local->stbuf.ia_blocks = local->stbuf_blocks; local->stbuf.ia_size = local->stbuf_size; local->postparent.ia_blocks = local->postparent_blocks; local->postparent.ia_size = local->postparent_size; + inode_ctx_put (local->inode, this, + (uint64_t) (long)local->fctx); } - stripe_local_wipe (local); - STACK_UNWIND_STRICT (lookup, frame, local->op_ret, + STRIPE_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, local->inode, - &local->stbuf, local->dict, + &local->stbuf, local->xdata, &local->postparent); - - if (tmp_inode) - inode_unref (tmp_inode); - if (tmp_dict) - dict_unref (tmp_dict); } out: return 0; @@ -304,12 +284,15 @@ out: int32_t stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *xattr_req) + dict_t *xdata) { - stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; - stripe_private_t *priv = NULL; + stripe_local_t *local = NULL; + xlator_list_t *trav = NULL; + stripe_private_t *priv = NULL; int32_t op_errno = EINVAL; + int64_t filesize = 0; + int ret = 0; + uint64_t tmpctx = 0; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -321,8 +304,7 @@ stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, trav = this->children; /* Initialization */ - local = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; @@ -331,26 +313,60 @@ stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, frame->local = local; loc_copy (&local->loc, loc); + inode_ctx_get (local->inode, this, &tmpctx); + if (tmpctx) + local->fctx = (stripe_fd_ctx_t*) (long)tmpctx; + + /* quick-read friendly changes */ + if (xdata && dict_get (xdata, GF_CONTENT_KEY)) { + ret = dict_get_int64 (xdata, GF_CONTENT_KEY, &filesize); + if (!ret && (filesize > priv->block_size)) + dict_del (xdata, GF_CONTENT_KEY); + } + + /* get stripe-size xattr on lookup. This would be required for + * open/read/write/pathinfo calls. Hence we send down the request + * even when type == IA_INVAL */ + + /* + * We aren't guaranteed to have xdata here. We need the format info for + * the file, so allocate xdata if necessary. + */ + if (!xdata) + xdata = dict_new(); + else + xdata = dict_ref(xdata); + + if (xdata && (IA_ISREG (loc->inode->ia_type) || + (loc->inode->ia_type == IA_INVAL))) { + ret = stripe_xattr_request_build (this, xdata, 8, 4, 4, 0); + if (ret) + gf_log (this->name , GF_LOG_ERROR, "Failed to build" + " xattr request for %s", loc->path); + + } + /* Everytime in stripe lookup, all child nodes should be looked up */ local->call_count = priv->child_count; while (trav) { STACK_WIND (frame, stripe_lookup_cbk, trav->xlator, - trav->xlator->fops->lookup, - loc, xattr_req); + trav->xlator->fops->lookup, loc, xdata); trav = trav->next; } + dict_unref(xdata); + return 0; err: - STACK_UNWIND_STRICT (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); + STRIPE_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); return 0; } int32_t stripe_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) + int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata) { int32_t callcnt = 0; stripe_local_t *local = NULL; @@ -385,6 +401,9 @@ stripe_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } local->stbuf_blocks += buf->ia_blocks; + + correct_file_size(buf, local->fctx, prev); + if (local->stbuf_size < buf->ia_size) local->stbuf_size = buf->ia_size; } @@ -400,20 +419,20 @@ stripe_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->stbuf.ia_blocks = local->stbuf_blocks; } - stripe_local_wipe (local); - STACK_UNWIND_STRICT (stat, frame, local->op_ret, - local->op_errno, &local->stbuf); + STRIPE_STACK_UNWIND (stat, frame, local->op_ret, + local->op_errno, &local->stbuf, NULL); } out: return 0; } int32_t -stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc) +stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { xlator_list_t *trav = NULL; stripe_local_t *local = NULL; stripe_private_t *priv = NULL; + stripe_fd_ctx_t *fctx = NULL; int32_t op_errno = EINVAL; VALIDATE_OR_GOTO (frame, err); @@ -431,8 +450,7 @@ stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc) } /* Initialization */ - local = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; @@ -441,23 +459,30 @@ stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc) frame->local = local; local->call_count = priv->child_count; + if (IA_ISREG(loc->inode->ia_type)) { + inode_ctx_get(loc->inode, this, (uint64_t *) &fctx); + if (!fctx) + goto err; + local->fctx = fctx; + } + while (trav) { STACK_WIND (frame, stripe_stat_cbk, trav->xlator, - trav->xlator->fops->stat, loc); + trav->xlator->fops->stat, loc, NULL); trav = trav->next; } return 0; err: - STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL); + STRIPE_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL); return 0; } int32_t stripe_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct statvfs *stbuf) + int32_t op_ret, int32_t op_errno, struct statvfs *stbuf, dict_t *xdata) { stripe_local_t *local = NULL; int32_t callcnt = 0; @@ -494,15 +519,15 @@ stripe_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, UNLOCK (&frame->lock); if (!callcnt) { - STACK_UNWIND_STRICT (statfs, frame, local->op_ret, - local->op_errno, &local->statvfs_buf); + STRIPE_STACK_UNWIND (statfs, frame, local->op_ret, + local->op_errno, &local->statvfs_buf, NULL); } out: return 0; } int32_t -stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc) +stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { stripe_local_t *local = NULL; xlator_list_t *trav = NULL; @@ -517,8 +542,7 @@ stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc) priv = this->private; /* Initialization */ - local = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; @@ -530,13 +554,13 @@ stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc) local->call_count = priv->child_count; while (trav) { STACK_WIND (frame, stripe_statfs_cbk, trav->xlator, - trav->xlator->fops->statfs, loc); + trav->xlator->fops->statfs, loc, NULL); trav = trav->next; } return 0; err: - STACK_UNWIND_STRICT (statfs, frame, -1, op_errno, NULL); + STRIPE_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL); return 0; } @@ -545,7 +569,7 @@ err: int32_t stripe_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) + struct iatt *postbuf, dict_t *xdata) { int32_t callcnt = 0; stripe_local_t *local = NULL; @@ -583,6 +607,9 @@ stripe_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->prebuf_blocks += prebuf->ia_blocks; local->postbuf_blocks += postbuf->ia_blocks; + correct_file_size(prebuf, local->fctx, prev); + correct_file_size(postbuf, local->fctx, prev); + if (local->prebuf_size < prebuf->ia_size) local->prebuf_size = prebuf->ia_size; @@ -603,22 +630,23 @@ stripe_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->post_buf.ia_size = local->postbuf_size; } - stripe_local_wipe (local); - STACK_UNWIND_STRICT (truncate, frame, local->op_ret, + STRIPE_STACK_UNWIND (truncate, frame, local->op_ret, local->op_errno, &local->pre_buf, - &local->post_buf); + &local->post_buf, NULL); } out: return 0; } int32_t -stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) +stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata) { - xlator_list_t *trav = NULL; stripe_local_t *local = NULL; stripe_private_t *priv = NULL; + stripe_fd_ctx_t *fctx = NULL; int32_t op_errno = EINVAL; + int i, eof_idx; + off_t dest_offset, tmp_offset; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -627,7 +655,6 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) VALIDATE_OR_GOTO (loc->inode, err); priv = this->private; - trav = this->children; if (priv->first_child_down) { op_errno = ENOTCONN; @@ -635,8 +662,7 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) } /* Initialization */ - local = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; @@ -645,15 +671,55 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) frame->local = local; local->call_count = priv->child_count; - while (trav) { - STACK_WIND (frame, stripe_truncate_cbk, trav->xlator, - trav->xlator->fops->truncate, loc, offset); - trav = trav->next; - } + inode_ctx_get(loc->inode, this, (uint64_t *) &fctx); + if (!fctx) { + gf_log(this->name, GF_LOG_ERROR, "no stripe context"); + op_errno = EINVAL; + goto err; + } + + local->fctx = fctx; + eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count; + + for (i = 0; i < fctx->stripe_count; i++) { + if (!fctx->xl_array[i]) { + gf_log(this->name, GF_LOG_ERROR, + "no xlator at index %d", i); + op_errno = EINVAL; + goto err; + } + + if (fctx->stripe_coalesce) { + /* + * The node that owns EOF is truncated to the exact + * coalesced offset. Nodes prior to this index should + * be rounded up to the size of the complete stripe, + * while nodes after this index should be rounded down + * to the size of the previous stripe. + */ + if (i < eof_idx) + tmp_offset = roof(offset, fctx->stripe_size * + fctx->stripe_count); + else if (i > eof_idx) + tmp_offset = floor(offset, fctx->stripe_size * + fctx->stripe_count); + else + tmp_offset = offset; + + dest_offset = coalesced_offset(tmp_offset, + fctx->stripe_size, fctx->stripe_count); + } else { + dest_offset = offset; + } + + STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i], + fctx->xl_array[i]->fops->truncate, loc, dest_offset, + NULL); + } return 0; err: - STACK_UNWIND_STRICT (truncate, frame, -1, op_errno, NULL, NULL); + STRIPE_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL); return 0; } @@ -661,7 +727,7 @@ err: int32_t stripe_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, - struct iatt *preop, struct iatt *postop) + struct iatt *preop, struct iatt *postop, dict_t *xdata) { int32_t callcnt = 0; stripe_local_t *local = NULL; @@ -700,6 +766,9 @@ stripe_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->prebuf_blocks += preop->ia_blocks; local->postbuf_blocks += postop->ia_blocks; + correct_file_size(preop, local->fctx, prev); + correct_file_size(postop, local->fctx, prev); + if (local->prebuf_size < preop->ia_size) local->prebuf_size = preop->ia_size; if (local->postbuf_size < postop->ia_size) @@ -719,10 +788,9 @@ stripe_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->post_buf.ia_size = local->postbuf_size; } - stripe_local_wipe (local); - STACK_UNWIND_STRICT (setattr, frame, local->op_ret, + STRIPE_STACK_UNWIND (setattr, frame, local->op_ret, local->op_errno, &local->pre_buf, - &local->post_buf); + &local->post_buf, NULL); } out: return 0; @@ -731,11 +799,12 @@ out: int32_t stripe_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid) + struct iatt *stbuf, int32_t valid, dict_t *xdata) { xlator_list_t *trav = NULL; stripe_local_t *local = NULL; stripe_private_t *priv = NULL; + stripe_fd_ctx_t *fctx = NULL; int32_t op_errno = EINVAL; VALIDATE_OR_GOTO (frame, err); @@ -753,33 +822,47 @@ stripe_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, } /* Initialization */ - local = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; } local->op_ret = -1; frame->local = local; - local->call_count = priv->child_count; + if (!IA_ISDIR (loc->inode->ia_type) && + !IA_ISREG (loc->inode->ia_type)) { + local->call_count = 1; + STACK_WIND (frame, stripe_setattr_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->setattr, + loc, stbuf, valid, NULL); + return 0; + } + + if (IA_ISREG(loc->inode->ia_type)) { + inode_ctx_get(loc->inode, this, (uint64_t *) &fctx); + if (!fctx) + goto err; + local->fctx = fctx; + } + local->call_count = priv->child_count; while (trav) { STACK_WIND (frame, stripe_setattr_cbk, trav->xlator, trav->xlator->fops->setattr, - loc, stbuf, valid); + loc, stbuf, valid, NULL); trav = trav->next; } return 0; err: - STACK_UNWIND_STRICT (setattr, frame, -1, op_errno, NULL, NULL); + STRIPE_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL); return 0; } int32_t stripe_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iatt *stbuf, int32_t valid) + struct iatt *stbuf, int32_t valid, dict_t *xdata) { stripe_local_t *local = NULL; stripe_private_t *priv = NULL; @@ -795,8 +878,7 @@ stripe_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, trav = this->children; /* Initialization */ - local = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; @@ -807,13 +889,13 @@ stripe_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, while (trav) { STACK_WIND (frame, stripe_setattr_cbk, trav->xlator, - trav->xlator->fops->fsetattr, fd, stbuf, valid); + trav->xlator->fops->fsetattr, fd, stbuf, valid, NULL); trav = trav->next; } return 0; err: - STACK_UNWIND_STRICT (fsetattr, frame, -1, op_errno, NULL, NULL); + STRIPE_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL); return 0; } @@ -821,7 +903,8 @@ int32_t stripe_stack_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *buf, struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent) + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) { int32_t callcnt = 0; stripe_local_t *local = NULL; @@ -858,6 +941,8 @@ stripe_stack_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->pre_buf.ia_blocks += prenewparent->ia_blocks; local->post_buf.ia_blocks += postnewparent->ia_blocks; + correct_file_size(buf, local->fctx, prev); + if (local->stbuf.ia_size < buf->ia_size) local->stbuf.ia_size = buf->ia_size; @@ -880,11 +965,10 @@ stripe_stack_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (local->failed) local->op_ret = -1; - stripe_local_wipe (local); - STACK_UNWIND_STRICT (rename, frame, local->op_ret, local->op_errno, + STRIPE_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, &local->stbuf, &local->preparent, &local->postparent, &local->pre_buf, - &local->post_buf); + &local->post_buf, NULL); } out: return 0; @@ -894,7 +978,8 @@ int32_t stripe_first_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *buf, struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent) + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) { stripe_local_t *local = NULL; xlator_list_t *trav = NULL; @@ -925,25 +1010,25 @@ stripe_first_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, while (trav) { STACK_WIND (frame, stripe_stack_rename_cbk, trav->xlator, trav->xlator->fops->rename, - &local->loc, &local->loc2); + &local->loc, &local->loc2, NULL); trav = trav->next; } return 0; unwind: - stripe_local_wipe (local); - STACK_UNWIND_STRICT (rename, frame, -1, op_errno, buf, preoldparent, - postoldparent, prenewparent, postnewparent); + STRIPE_STACK_UNWIND (rename, frame, -1, op_errno, buf, preoldparent, + postoldparent, prenewparent, postnewparent, NULL); return 0; } int32_t stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, - loc_t *newloc) + loc_t *newloc, dict_t *xdata) { stripe_private_t *priv = NULL; stripe_local_t *local = NULL; xlator_list_t *trav = NULL; + stripe_fd_ctx_t *fctx = NULL; int32_t op_errno = EINVAL; VALIDATE_OR_GOTO (frame, err); @@ -963,8 +1048,7 @@ stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, } /* Initialization */ - local = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; @@ -975,24 +1059,67 @@ stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, local->call_count = priv->child_count; + if (IA_ISREG(oldloc->inode->ia_type)) { + inode_ctx_get(oldloc->inode, this, (uint64_t *) &fctx); + if (!fctx) + goto err; + local->fctx = fctx; + } + frame->local = local; STACK_WIND (frame, stripe_first_rename_cbk, trav->xlator, - trav->xlator->fops->rename, oldloc, newloc); + trav->xlator->fops->rename, oldloc, newloc, NULL); return 0; err: - STACK_UNWIND_STRICT (rename, frame, -1, op_errno, NULL, NULL, NULL, - NULL, NULL); + STRIPE_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, + NULL, NULL, NULL); return 0; } +int32_t +stripe_first_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; + local = frame->local; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, "%s returned %s", + prev->this->name, strerror (op_errno)); + goto out; + } + local->op_ret = 0; + local->preparent = *preparent; + local->postparent = *postparent; + local->preparent_blocks += preparent->ia_blocks; + local->postparent_blocks += postparent->ia_blocks; + + STRIPE_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, + &local->preparent, &local->postparent, xdata); + return 0; +out: + STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL); + + return 0; +} + int32_t stripe_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { int32_t callcnt = 0; stripe_local_t *local = NULL; @@ -1014,50 +1141,33 @@ stripe_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, gf_log (this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name, strerror (op_errno)); local->op_errno = op_errno; - if ((op_errno != ENOENT) || - (prev->this == FIRST_CHILD (this))) + if (op_errno != ENOENT) { local->failed = 1; - } - if (op_ret >= 0) { - local->op_ret = op_ret; - if (FIRST_CHILD(this) == prev->this) { - local->preparent = *preparent; - local->postparent = *postparent; + local->op_ret = op_ret; } - local->preparent_blocks += preparent->ia_blocks; - local->postparent_blocks += postparent->ia_blocks; - - if (local->preparent_size < preparent->ia_size) - local->preparent_size = preparent->ia_size; - - if (local->postparent_size < postparent->ia_size) - local->postparent_size = postparent->ia_size; } } UNLOCK (&frame->lock); - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - if (local->op_ret != -1) { - local->preparent.ia_blocks = local->preparent_blocks; - local->preparent.ia_size = local->preparent_size; - local->postparent.ia_blocks = local->postparent_blocks; - local->postparent.ia_size = local->postparent_size; + if (callcnt == 1) { + if (local->failed) { + op_errno = local->op_errno; + goto out; } - - stripe_local_wipe (local); - STACK_UNWIND_STRICT (unlink, frame, local->op_ret, - local->op_errno, &local->preparent, - &local->postparent); + STACK_WIND(frame, stripe_first_unlink_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->unlink, &local->loc, + local->xflag, local->xdata); } + return 0; out: + STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } int32_t -stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc) +stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, + int xflag, dict_t *xdata) { xlator_list_t *trav = NULL; stripe_local_t *local = NULL; @@ -1085,26 +1195,32 @@ stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc) } /* Initialization */ - local = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; } local->op_ret = -1; + loc_copy (&local->loc, loc); + local->xflag = xflag; + + if (xdata) + local->xdata = dict_ref (xdata); + frame->local = local; local->call_count = priv->child_count; + trav = trav->next; /* Skip the first child */ while (trav) { STACK_WIND (frame, stripe_unlink_cbk, trav->xlator, trav->xlator->fops->unlink, - loc); + loc, xflag, xdata); trav = trav->next; } return 0; err: - STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, NULL); + STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL); return 0; } @@ -1112,10 +1228,8 @@ err: int32_t stripe_first_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno,struct iatt *preparent, - struct iatt *postparent) - + struct iatt *postparent, dict_t *xdata) { - xlator_list_t *trav = NULL; stripe_local_t *local = NULL; if (!this || !frame || !frame->local) { @@ -1128,11 +1242,10 @@ stripe_first_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto err; } - trav = this->children; local = frame->local; + local->op_ret = 0; local->call_count--; /* First child successful */ - trav = trav->next; /* Skip first child */ local->preparent = *preparent; local->postparent = *postparent; @@ -1141,22 +1254,60 @@ stripe_first_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->preparent_blocks += preparent->ia_blocks; local->postparent_blocks += postparent->ia_blocks; - while (trav) { - STACK_WIND (frame, stripe_unlink_cbk, trav->xlator, - trav->xlator->fops->rmdir, &local->loc, - local->flags); - trav = trav->next; - } - + STRIPE_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno, + &local->preparent, &local->postparent, xdata); return 0; err: - STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, NULL, NULL); + STRIPE_STACK_UNWIND (rmdir, frame, op_ret, op_errno, NULL, NULL, NULL); return 0; } int32_t -stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags) +stripe_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; + local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, "%s returned %s", + prev->this->name, strerror (op_errno)); + if (op_errno != ENOENT) + local->failed = 1; + } + } + UNLOCK (&frame->lock); + + if (callcnt == 1) { + if (local->failed) + goto out; + STACK_WIND (frame, stripe_first_rmdir_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->rmdir, &local->loc, + local->flags, NULL); + } + return 0; +out: + STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} + +int32_t +stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata) { xlator_list_t *trav = NULL; stripe_local_t *local = NULL; @@ -1179,8 +1330,7 @@ stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags) } /* Initialization */ - local = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; @@ -1190,13 +1340,17 @@ stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags) loc_copy (&local->loc, loc); local->flags = flags; local->call_count = priv->child_count; + trav = trav->next; /* skip the first child */ - STACK_WIND (frame, stripe_first_rmdir_cbk, trav->xlator, - trav->xlator->fops->rmdir, loc, flags); + while (trav) { + STACK_WIND (frame, stripe_rmdir_cbk, trav->xlator, + trav->xlator->fops->rmdir, loc, flags, NULL); + trav = trav->next; + } return 0; err: - STACK_UNWIND_STRICT (rmdir, frame, -1, op_errno, NULL, NULL); + STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL); return 0; } @@ -1205,7 +1359,7 @@ int32_t stripe_mknod_ifreg_fail_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { int32_t callcnt = 0; stripe_local_t *local = NULL; @@ -1224,10 +1378,9 @@ stripe_mknod_ifreg_fail_unlink_cbk (call_frame_t *frame, void *cookie, UNLOCK (&frame->lock); if (!callcnt) { - stripe_local_wipe (local); - STACK_UNWIND_STRICT (mknod, frame, local->op_ret, local->op_errno, + STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno, local->inode, &local->stbuf, - &local->preparent, &local->postparent); + &local->preparent, &local->postparent, NULL); } out: return 0; @@ -1239,7 +1392,7 @@ out: int32_t stripe_mknod_ifreg_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno) + int32_t op_errno, dict_t *xdata) { int32_t callcnt = 0; stripe_local_t *local = NULL; @@ -1278,16 +1431,15 @@ stripe_mknod_ifreg_setxattr_cbk (call_frame_t *frame, void *cookie, stripe_mknod_ifreg_fail_unlink_cbk, trav->xlator, trav->xlator->fops->unlink, - &local->loc); + &local->loc, 0, NULL); trav = trav->next; } return 0; } - stripe_local_wipe (local); - STACK_UNWIND_STRICT (mknod, frame, local->op_ret, local->op_errno, + STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno, local->inode, &local->stbuf, - &local->preparent, &local->postparent); + &local->preparent, &local->postparent, NULL); } out: return 0; @@ -1297,13 +1449,13 @@ int32_t stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { - int ret = 0; int32_t callcnt = 0; stripe_local_t *local = NULL; stripe_private_t *priv = NULL; call_frame_t *prev = NULL; + xlator_list_t *trav = NULL; if (!this || !frame || !frame->local || !cookie) { gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); @@ -1311,7 +1463,7 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } prev = cookie; - priv = this->private; + priv = this->private; local = frame->local; LOCK (&frame->lock); @@ -1327,20 +1479,24 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->failed = 1; local->op_errno = op_errno; } - if (op_ret >= 0) { local->op_ret = op_ret; - if (FIRST_CHILD(this) == prev->this) { - local->stbuf = *buf; - local->preparent = *preparent; - local->postparent = *postparent; - } + /* Can be used as a mechanism to understand if mknod + was successful in at least one place */ + if (uuid_is_null (local->ia_gfid)) + uuid_copy (local->ia_gfid, buf->ia_gfid); + + if (stripe_ctx_handle(this, prev, local, xdata)) + gf_log(this->name, GF_LOG_ERROR, + "Error getting fctx info from dict"); local->stbuf_blocks += buf->ia_blocks; local->preparent_blocks += preparent->ia_blocks; local->postparent_blocks += postparent->ia_blocks; + correct_file_size(buf, local->fctx, prev); + if (local->stbuf_size < buf->ia_size) local->stbuf_size = buf->ia_size; if (local->preparent_size < preparent->ia_size) @@ -1355,6 +1511,23 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (local->failed) local->op_ret = -1; + if ((local->op_ret == -1) && !uuid_is_null (local->ia_gfid)) { + /* ia_gfid set means, at least on one node 'mknod' + is successful */ + local->call_count = priv->child_count; + trav = this->children; + while (trav) { + STACK_WIND (frame, + stripe_mknod_ifreg_fail_unlink_cbk, + trav->xlator, + trav->xlator->fops->unlink, + &local->loc, 0, NULL); + trav = trav->next; + } + return 0; + } + + if (local->op_ret != -1) { local->preparent.ia_blocks = local->preparent_blocks; local->preparent.ia_size = local->preparent_size; @@ -1362,73 +1535,109 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->postparent.ia_size = local->postparent_size; local->stbuf.ia_size = local->stbuf_size; local->stbuf.ia_blocks = local->stbuf_blocks; + inode_ctx_put (local->inode, this, + (uint64_t)(long) local->fctx); + } + STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, + &local->preparent, &local->postparent, NULL); + } +out: + return 0; +} - if ((local->op_ret != -1) && priv->xattr_supported) { - /* Send a setxattr request to nodes where the - files are created */ - int32_t i = 0; - char size_key[256] = {0,}; - char index_key[256] = {0,}; - char count_key[256] = {0,}; - dict_t *dict = NULL; - sprintf (size_key, - "trusted.%s.stripe-size", this->name); - sprintf (count_key, - "trusted.%s.stripe-count", this->name); - sprintf (index_key, - "trusted.%s.stripe-index", this->name); +int32_t +stripe_mknod_first_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + call_frame_t *prev = NULL; + xlator_list_t *trav = NULL; + int i = 1; + dict_t *dict = NULL; + int ret = 0; + int need_unref = 0; - local->call_count = priv->child_count; - memcpy (local->loc.inode->gfid, local->stbuf.ia_gfid, 16); - for (i = 0; i < priv->child_count; i++) { - dict = get_new_dict (); - if (!dict) { - gf_log (this->name, GF_LOG_ERROR, - "failed to allocate dict"); - } + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - dict_ref (dict); - /* TODO: check return value */ - ret = dict_set_int64 (dict, size_key, - local->stripe_size); - if (ret) - gf_log (this->name, GF_LOG_ERROR, - "%s: set stripe-size failed", - local->loc.path); - ret = dict_set_int32 (dict, count_key, - priv->child_count); - if (ret) - gf_log (this->name, GF_LOG_ERROR, - "%s: set child_count failed", - local->loc.path); - ret = dict_set_int32 (dict, index_key, i); - if (ret) - gf_log (this->name, GF_LOG_ERROR, - "%s: set stripe-index failed", - local->loc.path); + prev = cookie; + priv = this->private; + local = frame->local; + trav = this->children; - STACK_WIND (frame, - stripe_mknod_ifreg_setxattr_cbk, - priv->xl_array[i], - priv->xl_array[i]->fops->setxattr, - &local->loc, dict, 0); + local->call_count--; - dict_unref (dict); + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s", + prev->this->name, strerror (op_errno)); + local->failed = 1; + local->op_errno = op_errno; + goto out; + } + + local->op_ret = op_ret; + + local->stbuf = *buf; + local->preparent = *preparent; + local->postparent = *postparent; + + if (uuid_is_null (local->ia_gfid)) + uuid_copy (local->ia_gfid, buf->ia_gfid); + local->preparent.ia_blocks = local->preparent_blocks; + local->preparent.ia_size = local->preparent_size; + local->postparent.ia_blocks = local->postparent_blocks; + local->postparent.ia_size = local->postparent_size; + local->stbuf.ia_size = local->stbuf_size; + local->stbuf.ia_blocks = local->stbuf_blocks; + + trav = trav->next; + while (trav) { + if (priv->xattr_supported) { + dict = dict_new (); + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, + "failed to allocate dict %s", local->loc.path); } - return 0; + need_unref = 1; + + dict_copy (local->xattr, dict); + + ret = stripe_xattr_request_build (this, dict, + local->stripe_size, + priv->child_count, i, + priv->coalesce); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to build xattr request"); + + } else { + dict = local->xattr; } - /* Create itself has failed.. so return - without setxattring */ - stripe_local_wipe (local); - STACK_UNWIND_STRICT (mknod, frame, local->op_ret, local->op_errno, - local->inode, &local->stbuf, - &local->preparent, &local->postparent); + STACK_WIND (frame, stripe_mknod_ifreg_cbk, + trav->xlator, trav->xlator->fops->mknod, + &local->loc, local->mode, local->rdev, 0, dict); + trav = trav->next; + i++; + + if (dict && need_unref) + dict_unref (dict); } -out: + return 0; + +out: + + STRIPE_STACK_UNWIND (mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, NULL); + return 0; } @@ -1436,22 +1645,25 @@ int32_t stripe_single_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { - STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf, - preparent, postparent); + STRIPE_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); return 0; } int stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev, dict_t *params) + dev_t rdev, mode_t umask, dict_t *xdata) { - stripe_private_t *priv = NULL; - stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; - int32_t op_errno = EINVAL; + stripe_private_t *priv = NULL; + stripe_local_t *local = NULL; + int32_t op_errno = EINVAL; + int32_t i = 0; + dict_t *dict = NULL; + int ret = 0; + int need_unref = 0; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -1460,7 +1672,6 @@ stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, VALIDATE_OR_GOTO (loc->inode, err); priv = this->private; - trav = this->children; if (priv->first_child_down) { op_errno = ENOTCONN; @@ -1479,43 +1690,63 @@ stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, } /* Initialization */ - local = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; } local->op_ret = -1; local->op_errno = ENOTCONN; - local->stripe_size = stripe_get_matching_bs (loc->path, - priv->pattern, - priv->block_size); + local->stripe_size = stripe_get_matching_bs (loc->path, priv); frame->local = local; - local->inode = loc->inode; + local->inode = inode_ref (loc->inode); loc_copy (&local->loc, loc); + local->xattr = dict_copy_with_ref (xdata, NULL); + local->mode = mode; + local->umask = umask; + local->rdev = rdev; /* Everytime in stripe lookup, all child nodes should be looked up */ local->call_count = priv->child_count; - while (trav) { - STACK_WIND (frame, stripe_mknod_ifreg_cbk, - trav->xlator, trav->xlator->fops->mknod, - loc, mode, rdev, params); - trav = trav->next; + if (priv->xattr_supported) { + dict = dict_new (); + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, + "failed to allocate dict %s", loc->path); + } + need_unref = 1; + + dict_copy (xdata, dict); + + ret = stripe_xattr_request_build (this, dict, + local->stripe_size, + priv->child_count, + i, priv->coalesce); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "failed to build xattr request"); + } else { + dict = xdata; } - /* This case is handled, no need to continue further. */ + STACK_WIND (frame, stripe_mknod_first_ifreg_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->mknod, + loc, mode, rdev, umask, dict); + + if (dict && need_unref) + dict_unref (dict); return 0; } STACK_WIND (frame, stripe_single_mknod_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, - loc, mode, rdev, params); + loc, mode, rdev, umask, xdata); return 0; err: - STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL); + STRIPE_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); return 0; } @@ -1524,11 +1755,10 @@ int32_t stripe_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { int32_t callcnt = 0; stripe_local_t *local = NULL; - inode_t *local_inode = NULL; call_frame_t *prev = NULL; if (!this || !frame || !frame->local || !cookie) { @@ -1556,12 +1786,6 @@ stripe_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (op_ret >= 0) { local->op_ret = 0; - if (FIRST_CHILD(this) == prev->this) { - local->inode = inode_ref (inode); - local->stbuf = *buf; - local->postparent = *postparent; - local->preparent = *preparent; - } local->stbuf_blocks += buf->ia_blocks; local->preparent_blocks += preparent->ia_blocks; local->postparent_blocks += postparent->ia_blocks; @@ -1577,12 +1801,7 @@ stripe_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, UNLOCK (&frame->lock); if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - local_inode = local->inode; - - if (local->op_ret != -1) { + if (local->failed != -1) { local->preparent.ia_blocks = local->preparent_blocks; local->preparent.ia_size = local->preparent_size; local->postparent.ia_blocks = local->postparent_blocks; @@ -1590,22 +1809,79 @@ stripe_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->stbuf.ia_size = local->stbuf_size; local->stbuf.ia_blocks = local->stbuf_blocks; } - STACK_UNWIND_STRICT (mkdir, frame, local->op_ret, + STRIPE_STACK_UNWIND (mkdir, frame, local->op_ret, local->op_errno, local->inode, &local->stbuf, &local->preparent, - &local->postparent); + &local->postparent, NULL); + } +out: + return 0; +} + - if (local_inode) - inode_unref (local_inode); +int32_t +stripe_first_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; + xlator_list_t *trav = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; } + + prev = cookie; + local = frame->local; + trav = this->children; + + local->call_count--; /* first child is successful */ + trav = trav->next; /* skip first child */ + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s", + prev->this->name, strerror (op_errno)); + local->op_errno = op_errno; + goto out; + } + + local->op_ret = 0; + + local->inode = inode_ref (inode); + local->stbuf = *buf; + local->postparent = *postparent; + local->preparent = *preparent; + + local->stbuf_blocks += buf->ia_blocks; + local->preparent_blocks += preparent->ia_blocks; + local->postparent_blocks += postparent->ia_blocks; + + local->stbuf_size = buf->ia_size; + local->preparent_size = preparent->ia_size; + local->postparent_size = postparent->ia_size; + + while (trav) { + STACK_WIND (frame, stripe_mkdir_cbk, trav->xlator, + trav->xlator->fops->mkdir, &local->loc, local->mode, + local->umask, local->xdata); + trav = trav->next; + } + return 0; out: + STRIPE_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, + NULL, NULL); + return 0; + } int stripe_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dict_t *params) + mode_t umask, dict_t *xdata) { stripe_private_t *priv = NULL; stripe_local_t *local = NULL; @@ -1627,27 +1903,27 @@ stripe_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, } /* Initialization */ - local = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; } local->op_ret = -1; local->call_count = priv->child_count; + if (xdata) + local->xdata = dict_ref (xdata); + local->mode = mode; + local->umask = umask; + loc_copy (&local->loc, loc); frame->local = local; /* Everytime in stripe lookup, all child nodes should be looked up */ - while (trav) { - STACK_WIND (frame, stripe_mkdir_cbk, - trav->xlator, trav->xlator->fops->mkdir, - loc, mode, params); - trav = trav->next; - } + STACK_WIND (frame, stripe_first_mkdir_cbk, trav->xlator, + trav->xlator->fops->mkdir, loc, mode, umask, xdata); return 0; err: - STACK_UNWIND_STRICT (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL); + STRIPE_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); return 0; } @@ -1656,12 +1932,12 @@ int32_t stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { int32_t callcnt = 0; stripe_local_t *local = NULL; - inode_t *local_inode = NULL; call_frame_t *prev = NULL; + stripe_fd_ctx_t *fctx = NULL; if (!this || !frame || !frame->local || !cookie) { gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); @@ -1688,6 +1964,16 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (op_ret >= 0) { local->op_ret = 0; + if (IA_ISREG(inode->ia_type)) { + inode_ctx_get(inode, this, (uint64_t *) &fctx); + if (!fctx) { + gf_log(this->name, GF_LOG_ERROR, + "failed to get stripe context"); + op_ret = -1; + op_errno = EINVAL; + } + } + if (FIRST_CHILD(this) == prev->this) { local->inode = inode_ref (inode); local->stbuf = *buf; @@ -1698,6 +1984,8 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->preparent_blocks += preparent->ia_blocks; local->postparent_blocks += postparent->ia_blocks; + correct_file_size(buf, fctx, prev); + if (local->stbuf_size < buf->ia_size) local->stbuf_size = buf->ia_size; if (local->preparent_size < preparent->ia_size) @@ -1712,8 +2000,6 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (local->failed) local->op_ret = -1; - local_inode = local->inode; - if (local->op_ret != -1) { local->preparent.ia_blocks = local->preparent_blocks; local->preparent.ia_size = local->preparent_size; @@ -1722,20 +2008,17 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->stbuf.ia_size = local->stbuf_size; local->stbuf.ia_blocks = local->stbuf_blocks; } - STACK_UNWIND_STRICT (link, frame, local->op_ret, + STRIPE_STACK_UNWIND (link, frame, local->op_ret, local->op_errno, local->inode, &local->stbuf, &local->preparent, - &local->postparent); - - if (local_inode) - inode_unref (local_inode); + &local->postparent, NULL); } out: return 0; } int32_t -stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc) +stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata) { xlator_list_t *trav = NULL; stripe_local_t *local = NULL; @@ -1758,8 +2041,7 @@ stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc) } /* Initialization */ - local = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; @@ -1773,13 +2055,13 @@ stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc) while (trav) { STACK_WIND (frame, stripe_link_cbk, trav->xlator, trav->xlator->fops->link, - oldloc, newloc); + oldloc, newloc, NULL); trav = trav->next; } return 0; err: - STACK_UNWIND_STRICT (link, frame, -1, op_errno, NULL, NULL, NULL, NULL); + STRIPE_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); return 0; } @@ -1787,12 +2069,10 @@ int32_t stripe_create_fail_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { int32_t callcnt = 0; - fd_t *lfd = NULL; stripe_local_t *local = NULL; - inode_t *local_inode = NULL; if (!this || !frame || !frame->local) { gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); @@ -1808,18 +2088,9 @@ stripe_create_fail_unlink_cbk (call_frame_t *frame, void *cookie, UNLOCK (&frame->lock); if (!callcnt) { - local_inode = local->inode; - lfd = local->fd; - - stripe_local_wipe (local); - STACK_UNWIND_STRICT (create, frame, local->op_ret, local->op_errno, + STRIPE_STACK_UNWIND (create, frame, local->op_ret, local->op_errno, local->fd, local->inode, &local->stbuf, - &local->preparent, &local->postparent); - - if (local_inode) - inode_unref (local_inode); - if (lfd) - fd_unref (lfd); + &local->preparent, &local->postparent, NULL); } out: return 0; @@ -1827,16 +2098,16 @@ out: int32_t -stripe_create_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, + inode_t *inode, struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - inode_t *local_inode = NULL; - fd_t *lfd = NULL; + int32_t callcnt = 0; stripe_local_t *local = NULL; stripe_private_t *priv = NULL; + call_frame_t *prev = NULL; xlator_list_t *trav = NULL; - int32_t callcnt = 0; - call_frame_t *prev = NULL; if (!this || !frame || !frame->local || !cookie) { gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); @@ -1844,7 +2115,7 @@ stripe_create_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } prev = cookie; - priv = this->private; + priv = this->private; local = frame->local; LOCK (&frame->lock); @@ -1855,13 +2126,40 @@ stripe_create_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s", prev->this->name, strerror (op_errno)); - local->op_ret = -1; + local->failed = 1; local->op_errno = op_errno; } + + if (op_ret >= 0) { + if (IA_ISREG(buf->ia_type)) { + if (stripe_ctx_handle(this, prev, local, xdata)) + gf_log(this->name, GF_LOG_ERROR, + "Error getting fctx info from " + "dict"); + } + + local->op_ret = op_ret; + + local->stbuf_blocks += buf->ia_blocks; + local->preparent_blocks += preparent->ia_blocks; + local->postparent_blocks += postparent->ia_blocks; + + correct_file_size(buf, local->fctx, prev); + + if (local->stbuf_size < buf->ia_size) + local->stbuf_size = buf->ia_size; + if (local->preparent_size < preparent->ia_size) + local->preparent_size = preparent->ia_size; + if (local->postparent_size < postparent->ia_size) + local->postparent_size = postparent->ia_size; + } } UNLOCK (&frame->lock); if (!callcnt) { + if (local->failed) + local->op_ret = -1; + if (local->op_ret == -1) { local->call_count = priv->child_count; trav = this->children; @@ -1870,44 +2168,57 @@ stripe_create_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, stripe_create_fail_unlink_cbk, trav->xlator, trav->xlator->fops->unlink, - &local->loc); + &local->loc, 0, NULL); trav = trav->next; } return 0; } - lfd = local->fd; - local_inode = local->inode; + if (local->op_ret >= 0) { + local->preparent.ia_blocks = local->preparent_blocks; + local->preparent.ia_size = local->preparent_size; + local->postparent.ia_blocks = local->postparent_blocks; + local->postparent.ia_size = local->postparent_size; + local->stbuf.ia_size = local->stbuf_size; + local->stbuf.ia_blocks = local->stbuf_blocks; - stripe_local_wipe (local); - STACK_UNWIND_STRICT (create, frame, local->op_ret, local->op_errno, - local->fd, local->inode, &local->stbuf, - &local->preparent, &local->postparent); + stripe_copy_xl_array(local->fctx->xl_array, + priv->xl_array, + local->fctx->stripe_count); + inode_ctx_put(local->inode, this, + (uint64_t) local->fctx); + } - if (local_inode) - inode_unref (local_inode); - if (lfd) - fd_unref (lfd); + /* Create itself has failed.. so return + without setxattring */ + STRIPE_STACK_UNWIND (create, frame, local->op_ret, + local->op_errno, local->fd, + local->inode, &local->stbuf, + &local->preparent, &local->postparent, NULL); } + out: return 0; } + + int32_t -stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +stripe_first_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { - int32_t callcnt = 0; stripe_local_t *local = NULL; stripe_private_t *priv = NULL; - fd_t *lfd = NULL; - stripe_fd_ctx_t *fctx = NULL; - inode_t *local_inode = NULL; call_frame_t *prev = NULL; - int ret = 0; + xlator_list_t *trav = NULL; + int i = 1; + dict_t *dict = NULL; + loc_t *loc = NULL; + int32_t need_unref = 0; + int32_t ret = -1; if (!this || !frame || !frame->local || !cookie) { gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); @@ -1917,150 +2228,89 @@ stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, prev = cookie; priv = this->private; local = frame->local; + trav = this->children; + loc = &local->loc; - LOCK (&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "%s returned error %s", - prev->this->name, strerror (op_errno)); - if ((op_errno != ENOENT) || - (prev->this == FIRST_CHILD (this))) - local->failed = 1; - local->op_errno = op_errno; - } - - if (op_ret >= 0) { - local->op_ret = op_ret; - /* Get the mapping in inode private */ - /* Get the stat buf right */ - if (FIRST_CHILD(this) == prev->this) { - local->stbuf = *buf; - local->preparent = *preparent; - local->postparent = *postparent; - } - - local->stbuf_blocks += buf->ia_blocks; - local->preparent_blocks += preparent->ia_blocks; - local->postparent_blocks += postparent->ia_blocks; + --local->call_count; - if (local->stbuf_size < buf->ia_size) - local->stbuf_size = buf->ia_size; - if (local->preparent_size < preparent->ia_size) - local->preparent_size = preparent->ia_size; - if (local->postparent_size < postparent->ia_size) - local->postparent_size = postparent->ia_size; - } + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s", + prev->this->name, strerror (op_errno)); + local->failed = 1; + local->op_errno = op_errno; } - UNLOCK (&frame->lock); - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - if (local->op_ret != -1) { - local->preparent.ia_blocks = local->preparent_blocks; - local->preparent.ia_size = local->preparent_size; - local->postparent.ia_blocks = local->postparent_blocks; - local->postparent.ia_size = local->postparent_size; - local->stbuf.ia_size = local->stbuf_size; - local->stbuf.ia_blocks = local->stbuf_blocks; - } - - /* */ - if (local->op_ret >= 0) { - fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t), - gf_stripe_mt_stripe_fd_ctx_t); - if (!fctx) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto unwind; - } - - fctx->stripe_size = local->stripe_size; - fctx->stripe_count = priv->child_count; - fctx->static_array = 1; - fctx->xl_array = priv->xl_array; - fd_ctx_set (local->fd, this, - (uint64_t)(long)fctx); - } - - if ((local->op_ret != -1) && - local->stripe_size && priv->xattr_supported) { - /* Send a setxattr request to nodes where - the files are created */ - int32_t i = 0; - char size_key[256] = {0,}; - char index_key[256] = {0,}; - char count_key[256] = {0,}; - dict_t *dict = NULL; - - sprintf (size_key, - "trusted.%s.stripe-size", this->name); - sprintf (count_key, - "trusted.%s.stripe-count", this->name); - sprintf (index_key, - "trusted.%s.stripe-index", this->name); + local->op_ret = 0; + /* Get the mapping in inode private */ + /* Get the stat buf right */ + local->stbuf = *buf; + local->preparent = *preparent; + local->postparent = *postparent; - local->call_count = priv->child_count; - memcpy (local->loc.inode->gfid, local->stbuf.ia_gfid, 16); - for (i = 0; i < priv->child_count; i++) { - dict = get_new_dict (); - if (!dict) { - gf_log (this->name, GF_LOG_ERROR, - "error allocating dict"); - } - dict_ref (dict); + local->stbuf_blocks += buf->ia_blocks; + local->preparent_blocks += preparent->ia_blocks; + local->postparent_blocks += postparent->ia_blocks; - /* TODO: check return values */ - ret = dict_set_int64 (dict, size_key, - local->stripe_size); - if (ret) - gf_log (this->name, GF_LOG_ERROR, - "%s: set stripe-size failed", - local->loc.path); + if (local->stbuf_size < buf->ia_size) + local->stbuf_size = buf->ia_size; + if (local->preparent_size < preparent->ia_size) + local->preparent_size = preparent->ia_size; + if (local->postparent_size < postparent->ia_size) + local->postparent_size = postparent->ia_size; - ret = dict_set_int32 (dict, count_key, - priv->child_count); - if (ret) - gf_log (this->name, GF_LOG_ERROR, - "%s: set stripe-size failed", - local->loc.path); + if (local->failed) + local->op_ret = -1; - ret = dict_set_int32 (dict, index_key, i); - if (ret) - gf_log (this->name, GF_LOG_ERROR, - "%s: set stripe-size failed", - local->loc.path); + if (local->op_ret == -1) { + local->call_count = 1; + STACK_WIND (frame, stripe_create_fail_unlink_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->unlink, + &local->loc, 0, NULL); + return 0; + } - STACK_WIND (frame, stripe_create_setxattr_cbk, - priv->xl_array[i], - priv->xl_array[i]->fops->setxattr, - &local->loc, dict, 0); + if (local->op_ret >= 0) { + local->preparent.ia_blocks = local->preparent_blocks; + local->preparent.ia_size = local->preparent_size; + local->postparent.ia_blocks = local->postparent_blocks; + local->postparent.ia_size = local->postparent_size; + local->stbuf.ia_size = local->stbuf_size; + local->stbuf.ia_blocks = local->stbuf_blocks; + } - dict_unref (dict); + /* Send a setxattr request to nodes where the + files are created */ + trav = trav->next; + while (trav) { + if (priv->xattr_supported) { + dict = dict_new (); + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, + "failed to allocate dict %s", loc->path); } - return 0; - } + need_unref = 1; -unwind: - /* Create itself has failed.. so return - without setxattring */ - lfd = local->fd; - local_inode = local->inode; + dict_copy (local->xattr, dict); - stripe_local_wipe (local); - STACK_UNWIND_STRICT (create, frame, local->op_ret, - local->op_errno, local->fd, - local->inode, &local->stbuf, - &local->preparent, &local->postparent); + ret = stripe_xattr_request_build (this, dict, + local->stripe_size, + priv->child_count, + i, priv->coalesce); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "failed to build xattr request"); + } else { + dict = local->xattr; + } - if (local_inode) - inode_unref (local_inode); - if (lfd) - fd_unref (lfd); + STACK_WIND (frame, stripe_create_cbk, trav->xlator, + trav->xlator->fops->create, &local->loc, + local->flags, local->mode, local->umask, local->fd, + dict); + trav = trav->next; + if (need_unref && dict) + dict_unref (dict); + i++; } out: @@ -2068,6 +2318,7 @@ out: } + /** * stripe_create - If a block-size is specified for the 'name', create the * file in all the child nodes. If not, create it in only first child. @@ -2076,12 +2327,15 @@ out: */ int32_t stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc, - int32_t flags, mode_t mode, fd_t *fd, dict_t *params) + int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { stripe_private_t *priv = NULL; stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; int32_t op_errno = EINVAL; + int ret = 0; + int need_unref = 0; + int i = 0; + dict_t *dict = NULL; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -2102,46 +2356,71 @@ stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc, } /* Initialization */ - local = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; } local->op_ret = -1; local->op_errno = ENOTCONN; - local->stripe_size = stripe_get_matching_bs (loc->path, - priv->pattern, - priv->block_size); + local->stripe_size = stripe_get_matching_bs (loc->path, priv); frame->local = local; local->inode = inode_ref (loc->inode); loc_copy (&local->loc, loc); local->fd = fd_ref (fd); + local->flags = flags; + local->mode = mode; + local->umask = umask; + if (xdata) + local->xattr = dict_ref (xdata); local->call_count = priv->child_count; + /* Send a setxattr request to nodes where the + files are created */ - trav = this->children; - while (trav) { - STACK_WIND (frame, stripe_create_cbk, trav->xlator, - trav->xlator->fops->create, loc, flags, - mode, fd, params); - trav = trav->next; + if (priv->xattr_supported) { + dict = dict_new (); + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, + "failed to allocate dict %s", loc->path); + } + need_unref = 1; + + dict_copy (xdata, dict); + + ret = stripe_xattr_request_build (this, dict, + local->stripe_size, + priv->child_count, + i, priv->coalesce); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "failed to build xattr request"); + } else { + dict = xdata; } + + STACK_WIND (frame, stripe_first_create_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->create, loc, flags, mode, + umask, fd, dict); + + if (need_unref && dict) + dict_unref (dict); + + return 0; err: - STACK_UNWIND_STRICT (create, frame, -1, op_errno, NULL, NULL, NULL, - NULL, NULL); + STRIPE_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, + NULL, NULL, xdata); return 0; } int32_t stripe_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd) + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { int32_t callcnt = 0; stripe_local_t *local = NULL; - fd_t *lfd = NULL; call_frame_t *prev = NULL; if (!this || !frame || !frame->local || !cookie) { @@ -2176,228 +2455,20 @@ stripe_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (local->failed) local->op_ret = -1; - if (local->op_ret == -1) { - if (local->fctx) { - if (!local->fctx->static_array) - GF_FREE (local->fctx->xl_array); - GF_FREE (local->fctx); - } - } else { - fd_ctx_set (local->fd, this, - (uint64_t)(long)local->fctx); - } - - lfd = local->fd; - - stripe_local_wipe (local); - STACK_UNWIND_STRICT (open, frame, local->op_ret, - local->op_errno, local->fd); - if (lfd) - fd_unref (lfd); - + STRIPE_STACK_UNWIND (open, frame, local->op_ret, + local->op_errno, local->fd, xdata); } out: return 0; } -int32_t -stripe_open_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict) -{ - int32_t index = 0; - int32_t callcnt = 0; - char key[256] = {0,}; - stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; - stripe_private_t *priv = NULL; - data_t *data = NULL; - call_frame_t *prev = NULL; - fd_t *lfd = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = (call_frame_t *)cookie; - priv = this->private; - local = frame->local; - - LOCK (&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "%s returned error %s", - prev->this->name, strerror (op_errno)); - local->op_ret = -1; - if (local->op_errno != EIO) - local->op_errno = op_errno; - if ((op_errno != ENOENT) || - (prev->this == FIRST_CHILD (this))) - local->failed = 1; - goto unlock; - } - - if (!dict) - goto unlock; - - if (!local->fctx) { - local->fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t), - gf_stripe_mt_stripe_fd_ctx_t); - if (!local->fctx) { - local->op_errno = ENOMEM; - local->op_ret = -1; - goto unlock; - } - - local->fctx->static_array = 0; - } - /* Stripe block size */ - sprintf (key, "trusted.%s.stripe-size", this->name); - data = dict_get (dict, key); - if (!data) { - local->xattr_self_heal_needed = 1; - } else { - if (!local->fctx->stripe_size) { - local->fctx->stripe_size = - data_to_int64 (data); - } - - if (local->fctx->stripe_size != data_to_int64 (data)) { - gf_log (this->name, GF_LOG_WARNING, - "stripe-size mismatch in blocks"); - local->xattr_self_heal_needed = 1; - } - } - /* Stripe count */ - sprintf (key, "trusted.%s.stripe-count", this->name); - data = dict_get (dict, key); - if (!data) { - local->xattr_self_heal_needed = 1; - goto unlock; - } - if (!local->fctx->xl_array) { - local->fctx->stripe_count = data_to_int32 (data); - if (!local->fctx->stripe_count) { - gf_log (this->name, GF_LOG_ERROR, - "error with stripe-count xattr"); - local->op_ret = -1; - local->op_errno = EIO; - goto unlock; - } - - local->fctx->xl_array = - GF_CALLOC (local->fctx->stripe_count, - sizeof (xlator_t *), - gf_stripe_mt_xlator_t); - if (!local->fctx->xl_array) { - local->op_errno = ENOMEM; - local->op_ret = -1; - goto unlock; - } - } - if (local->fctx->stripe_count != data_to_int32 (data)) { - gf_log (this->name, GF_LOG_ERROR, - "error with stripe-count xattr (%d != %d)", - local->fctx->stripe_count, data_to_int32 (data)); - local->op_ret = -1; - local->op_errno = EIO; - goto unlock; - } - - /* index */ - sprintf (key, "trusted.%s.stripe-index", this->name); - data = dict_get (dict, key); - if (!data) { - local->xattr_self_heal_needed = 1; - goto unlock; - } - index = data_to_int32 (data); - if (index > priv->child_count) { - gf_log (this->name, GF_LOG_ERROR, - "error with stripe-index xattr (%d)", index); - local->op_ret = -1; - local->op_errno = EIO; - goto unlock; - } - if (local->fctx->xl_array) { - if (local->fctx->xl_array[index]) { - gf_log (this->name, GF_LOG_ERROR, - "duplicate entry @ index (%d)", index); - local->op_ret = -1; - local->op_errno = EIO; - goto unlock; - } - local->fctx->xl_array[index] = prev->this; - } - local->entry_count++; - local->op_ret = 0; - } -unlock: - UNLOCK (&frame->lock); - - if (!callcnt) { - /* TODO: if self-heal flag is set, do it */ - if (local->xattr_self_heal_needed) { - gf_log (this->name, GF_LOG_DEBUG, - "%s: stripe info need to be healed", - local->loc.path); - } - - if (local->failed) - local->op_ret = -1; - - if (local->op_ret) - goto err; - - if (local->entry_count != local->fctx->stripe_count) { - gf_log (this->name, GF_LOG_ERROR, - "entry-count (%d) != stripe-count (%d)", - local->entry_count, local->fctx->stripe_count); - local->op_ret = -1; - local->op_errno = EIO; - goto err; - } - if (!local->fctx->stripe_size) { - gf_log (this->name, GF_LOG_ERROR, "stripe size not set"); - local->op_ret = -1; - local->op_errno = EIO; - goto err; - } - - local->call_count = local->fctx->stripe_count; - - trav = this->children; - while (trav) { - STACK_WIND (frame, stripe_open_cbk, trav->xlator, - trav->xlator->fops->open, &local->loc, - local->flags, local->fd, 0); - trav = trav->next; - } - } - - return 0; -err: - lfd = local->fd; - - stripe_local_wipe (local); - STACK_UNWIND_STRICT (open, frame, local->op_ret, local->op_errno, - local->fd); - if (lfd) - fd_unref (lfd); -out: - return 0; -} - /** * stripe_open - */ int32_t stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc, - int32_t flags, fd_t *fd, int32_t wbflags) + int32_t flags, fd_t *fd, dict_t *xdata) { stripe_local_t *local = NULL; stripe_private_t *priv = NULL; @@ -2419,8 +2490,7 @@ stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc, } /* Initialization */ - local = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; @@ -2436,52 +2506,28 @@ stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc, /* Striped files */ local->flags = flags; local->call_count = priv->child_count; - local->stripe_size = stripe_get_matching_bs (loc->path, - priv->pattern, - priv->block_size); - - if (priv->xattr_supported) { - while (trav) { - STACK_WIND (frame, stripe_open_getxattr_cbk, - trav->xlator, trav->xlator->fops->getxattr, - loc, NULL); - trav = trav->next; - } - return 0; - } - local->fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t), - gf_stripe_mt_stripe_fd_ctx_t); - if (!local->fctx) { - op_errno = ENOMEM; - goto err; - } - - local->fctx->static_array = 1; - local->fctx->stripe_size = local->stripe_size; - local->fctx->stripe_count = priv->child_count; - local->fctx->xl_array = priv->xl_array; + local->stripe_size = stripe_get_matching_bs (loc->path, priv); while (trav) { STACK_WIND (frame, stripe_open_cbk, trav->xlator, trav->xlator->fops->open, &local->loc, local->flags, local->fd, - wbflags); + xdata); trav = trav->next; } return 0; err: - STACK_UNWIND_STRICT (open, frame, -1, op_errno, NULL); + STRIPE_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL); return 0; } int32_t stripe_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd) + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { int32_t callcnt = 0; stripe_local_t *local = NULL; - fd_t *local_fd = NULL; call_frame_t *prev = NULL; if (!this || !frame || !frame->local || !cookie) { @@ -2510,11 +2556,8 @@ stripe_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, UNLOCK (&frame->lock); if (!callcnt) { - local_fd = local->fd; - STACK_UNWIND_STRICT (opendir, frame, local->op_ret, - local->op_errno, local->fd); - if (local_fd) - fd_unref (local_fd); + STRIPE_STACK_UNWIND (opendir, frame, local->op_ret, + local->op_errno, local->fd, NULL); } out: return 0; @@ -2522,7 +2565,7 @@ out: int32_t -stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) +stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata) { xlator_list_t *trav = NULL; stripe_local_t *local = NULL; @@ -2544,8 +2587,7 @@ stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) } /* Initialization */ - local = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; @@ -2556,19 +2598,19 @@ stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) while (trav) { STACK_WIND (frame, stripe_opendir_cbk, trav->xlator, - trav->xlator->fops->opendir, loc, fd); + trav->xlator->fops->opendir, loc, fd, NULL); trav = trav->next; } return 0; err: - STACK_UNWIND_STRICT (opendir, frame, -1, op_errno, NULL); + STRIPE_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL); return 0; } int32_t stripe_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct gf_flock *lock) + int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata) { int32_t callcnt = 0; stripe_local_t *local = NULL; @@ -2607,8 +2649,8 @@ stripe_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (!callcnt) { if (local->failed) local->op_ret = -1; - STACK_UNWIND_STRICT (lk, frame, local->op_ret, - local->op_errno, &local->lock); + STRIPE_STACK_UNWIND (lk, frame, local->op_ret, + local->op_errno, &local->lock, NULL); } out: return 0; @@ -2616,7 +2658,7 @@ out: int32_t stripe_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, - struct gf_flock *lock) + struct gf_flock *lock, dict_t *xdata) { stripe_local_t *local = NULL; xlator_list_t *trav = NULL; @@ -2632,8 +2674,7 @@ stripe_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, priv = this->private; /* Initialization */ - local = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; @@ -2644,20 +2685,20 @@ stripe_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, while (trav) { STACK_WIND (frame, stripe_lk_cbk, trav->xlator, - trav->xlator->fops->lk, fd, cmd, lock); + trav->xlator->fops->lk, fd, cmd, lock, NULL); trav = trav->next; } return 0; err: - STACK_UNWIND_STRICT (lk, frame, -1, op_errno, NULL); + STRIPE_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL); return 0; } int32_t stripe_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { int32_t callcnt = 0; stripe_local_t *local = NULL; @@ -2693,16 +2734,15 @@ stripe_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (local->failed) local->op_ret = -1; - stripe_local_wipe (local); - STACK_UNWIND_STRICT (flush, frame, local->op_ret, - local->op_errno); + STRIPE_STACK_UNWIND (flush, frame, local->op_ret, + local->op_errno, NULL); } out: return 0; } int32_t -stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) +stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { stripe_local_t *local = NULL; stripe_private_t *priv = NULL; @@ -2722,8 +2762,7 @@ stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) goto err; } /* Initialization */ - local = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; @@ -2734,13 +2773,13 @@ stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) while (trav) { STACK_WIND (frame, stripe_flush_cbk, trav->xlator, - trav->xlator->fops->flush, fd); + trav->xlator->fops->flush, fd, NULL); trav = trav->next; } return 0; err: - STACK_UNWIND_STRICT (flush, frame, -1, op_errno); + STRIPE_STACK_UNWIND (flush, frame, -1, op_errno, NULL); return 0; } @@ -2749,7 +2788,7 @@ err: int32_t stripe_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) + struct iatt *postbuf, dict_t *xdata) { int32_t callcnt = 0; stripe_local_t *local = NULL; @@ -2785,6 +2824,9 @@ stripe_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->prebuf_blocks += prebuf->ia_blocks; local->postbuf_blocks += postbuf->ia_blocks; + correct_file_size(prebuf, local->fctx, prev); + correct_file_size(postbuf, local->fctx, prev); + if (local->prebuf_size < prebuf->ia_size) local->prebuf_size = prebuf->ia_size; @@ -2805,21 +2847,21 @@ stripe_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->post_buf.ia_size = local->postbuf_size; } - stripe_local_wipe (local); - STACK_UNWIND_STRICT (fsync, frame, local->op_ret, + STRIPE_STACK_UNWIND (fsync, frame, local->op_ret, local->op_errno, &local->pre_buf, - &local->post_buf); + &local->post_buf, NULL); } out: return 0; } int32_t -stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags) +stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata) { stripe_local_t *local = NULL; stripe_private_t *priv = NULL; xlator_list_t *trav = NULL; + stripe_fd_ctx_t *fctx = NULL; int32_t op_errno = 1; VALIDATE_OR_GOTO (frame, err); @@ -2831,31 +2873,38 @@ stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags) trav = this->children; /* Initialization */ - local = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; } + + inode_ctx_get(fd->inode, this, (uint64_t *) &fctx); + if (!fctx) { + op_errno = EINVAL; + goto err; + } + local->fctx = fctx; + local->op_ret = -1; frame->local = local; local->call_count = priv->child_count; while (trav) { STACK_WIND (frame, stripe_fsync_cbk, trav->xlator, - trav->xlator->fops->fsync, fd, flags); + trav->xlator->fops->fsync, fd, flags, NULL); trav = trav->next; } return 0; err: - STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL); + STRIPE_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL); return 0; } int32_t stripe_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) + int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata) { int32_t callcnt = 0; stripe_local_t *local = NULL; @@ -2890,6 +2939,9 @@ stripe_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->stbuf = *buf; local->stbuf_blocks += buf->ia_blocks; + + correct_file_size(buf, local->fctx, prev); + if (local->stbuf_size < buf->ia_size) local->stbuf_size = buf->ia_size; } @@ -2905,9 +2957,8 @@ stripe_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->stbuf.ia_blocks = local->stbuf_blocks; } - stripe_local_wipe (local); - STACK_UNWIND_STRICT (fstat, frame, local->op_ret, - local->op_errno, &local->stbuf); + STRIPE_STACK_UNWIND (fstat, frame, local->op_ret, + local->op_errno, &local->stbuf, NULL); } out: @@ -2917,11 +2968,12 @@ out: int32_t stripe_fstat (call_frame_t *frame, xlator_t *this, - fd_t *fd) + fd_t *fd, dict_t *xdata) { stripe_local_t *local = NULL; stripe_private_t *priv = NULL; xlator_list_t *trav = NULL; + stripe_fd_ctx_t *fctx = NULL; int32_t op_errno = 1; VALIDATE_OR_GOTO (frame, err); @@ -2933,8 +2985,7 @@ stripe_fstat (call_frame_t *frame, trav = this->children; /* Initialization */ - local = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; @@ -2943,26 +2994,35 @@ stripe_fstat (call_frame_t *frame, frame->local = local; local->call_count = priv->child_count; + if (IA_ISREG(fd->inode->ia_type)) { + inode_ctx_get(fd->inode, this, (uint64_t *) &fctx); + if (!fctx) + goto err; + local->fctx = fctx; + } + while (trav) { STACK_WIND (frame, stripe_fstat_cbk, trav->xlator, - trav->xlator->fops->fstat, fd); + trav->xlator->fops->fstat, fd, NULL); trav = trav->next; } return 0; err: - STACK_UNWIND_STRICT (fstat, frame, -1, op_errno, NULL); + STRIPE_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL); return 0; } int32_t -stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) +stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata) { stripe_local_t *local = NULL; stripe_private_t *priv = NULL; - xlator_list_t *trav = NULL; - int32_t op_errno = 1; + stripe_fd_ctx_t *fctx = NULL; + int i, eof_idx; + off_t dest_offset, tmp_offset; + int32_t op_errno = 1; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -2970,11 +3030,9 @@ stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) VALIDATE_OR_GOTO (fd->inode, err); priv = this->private; - trav = this->children; /* Initialization */ - local = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; @@ -2983,22 +3041,60 @@ stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) frame->local = local; local->call_count = priv->child_count; - while (trav) { - STACK_WIND (frame, stripe_truncate_cbk, trav->xlator, - trav->xlator->fops->ftruncate, fd, offset); - trav = trav->next; - } + inode_ctx_get(fd->inode, this, (uint64_t *) &fctx); + if (!fctx) { + gf_log(this->name, GF_LOG_ERROR, "no stripe context"); + op_errno = EINVAL; + goto err; + } + if (!fctx->stripe_count) { + gf_log(this->name, GF_LOG_ERROR, "no stripe count"); + op_errno = EINVAL; + goto err; + } + + local->fctx = fctx; + eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count; + + for (i = 0; i < fctx->stripe_count; i++) { + if (!fctx->xl_array[i]) { + gf_log(this->name, GF_LOG_ERROR, "no xlator at index " + "%d", i); + op_errno = EINVAL; + goto err; + } + + if (fctx->stripe_coalesce) { + if (i < eof_idx) + tmp_offset = roof(offset, fctx->stripe_size * + fctx->stripe_count); + else if (i > eof_idx) + tmp_offset = floor(offset, fctx->stripe_size * + fctx->stripe_count); + else + tmp_offset = offset; + + dest_offset = coalesced_offset(tmp_offset, + fctx->stripe_size, fctx->stripe_count); + } else { + dest_offset = offset; + } + + STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i], + fctx->xl_array[i]->fops->ftruncate, fd, dest_offset, + NULL); + } return 0; err: - STACK_UNWIND_STRICT (ftruncate, frame, -1, op_errno, NULL, NULL); + STRIPE_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL); return 0; } int32_t stripe_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { int32_t callcnt = 0; stripe_local_t *local = NULL; @@ -3034,16 +3130,15 @@ stripe_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (local->failed) local->op_ret = -1; - stripe_local_wipe (local); - STACK_UNWIND_STRICT (fsyncdir, frame, local->op_ret, - local->op_errno); + STRIPE_STACK_UNWIND (fsyncdir, frame, local->op_ret, + local->op_errno, NULL); } out: return 0; } int32_t -stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags) +stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata) { stripe_local_t *local = NULL; stripe_private_t *priv = NULL; @@ -3059,8 +3154,7 @@ stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags) trav = this->children; /* Initialization */ - local = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; @@ -3071,20 +3165,20 @@ stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags) while (trav) { STACK_WIND (frame, stripe_fsyncdir_cbk, trav->xlator, - trav->xlator->fops->fsyncdir, fd, flags); + trav->xlator->fops->fsyncdir, fd, flags, NULL); trav = trav->next; } return 0; err: - STACK_UNWIND_STRICT (fsyncdir, frame, -1, op_errno); + STRIPE_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL); return 0; } int32_t stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) + int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata) { int32_t i = 0; int32_t callcnt = 0; @@ -3094,6 +3188,7 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, struct iatt tmp_stbuf = {0,}; struct iobref *tmp_iobref = NULL; struct iobuf *iobuf = NULL; + call_frame_t *prev = NULL; if (!this || !frame || !frame->local) { gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); @@ -3101,13 +3196,16 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } local = frame->local; + prev = cookie; LOCK (&frame->lock); { callcnt = --local->call_count; - if (op_ret != -1) + if (op_ret != -1) { + correct_file_size(buf, local->fctx, prev); if (local->stbuf_size < buf->ia_size) local->stbuf_size = buf->ia_size; + } } UNLOCK (&frame->lock); @@ -3136,7 +3234,8 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, vec[count].iov_len = (local->replies[i].requested_size - local->replies[i].op_ret); - iobuf = iobuf_get (this->ctx->iobuf_pool); + iobuf = iobuf_get2 (this->ctx->iobuf_pool, + vec[count].iov_len); if (!iobuf) { gf_log (this->name, GF_LOG_ERROR, "Out of memory."); @@ -3145,9 +3244,11 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto done; } memset (iobuf->ptr, 0, vec[count].iov_len); - iobref_add (local->iobref, iobuf); vec[count].iov_base = iobuf->ptr; + iobref_add (local->iobref, iobuf); + iobuf_unref(iobuf); + op_ret += vec[count].iov_len; count++; } @@ -3164,13 +3265,11 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, done: GF_FREE (local->replies); tmp_iobref = local->iobref; - fd_unref (local->fd); - STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vec, - count, &tmp_stbuf, tmp_iobref); + STRIPE_STACK_UNWIND (readv, frame, op_ret, op_errno, vec, + count, &tmp_stbuf, tmp_iobref, NULL); iobref_unref (tmp_iobref); - if (vec) - GF_FREE (vec); + GF_FREE (vec); } out: return 0; @@ -3183,7 +3282,7 @@ out: int32_t stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iovec *vector, - int32_t count, struct iatt *stbuf, struct iobref *iobref) + int32_t count, struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) { int32_t index = 0; int32_t callcnt = 0; @@ -3194,8 +3293,10 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, stripe_local_t *local = NULL; struct iovec *final_vec = NULL; struct iatt tmp_stbuf = {0,}; + struct iatt *tmp_stbuf_p = NULL; //need it for a warning struct iobref *tmp_iobref = NULL; stripe_fd_ctx_t *fctx = NULL; + call_frame_t *prev = NULL; if (!this || !frame || !frame->local || !cookie) { gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); @@ -3204,6 +3305,7 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local = frame->local; index = local->node_index; + prev = cookie; mframe = local->orig_frame; if (!mframe) goto out; @@ -3224,6 +3326,12 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, mlocal->replies[index].count = count; mlocal->replies[index].vector = iov_dup (vector, count); + correct_file_size(stbuf, fctx, prev); + + if (local->stbuf_size < stbuf->ia_size) + local->stbuf_size = stbuf->ia_size; + local->stbuf_blocks += stbuf->ia_blocks; + if (!mlocal->iobref) mlocal->iobref = iobref_new (); iobref_merge (mlocal->iobref, iobref); @@ -3280,18 +3388,21 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, * cause any bugs at higher levels */ memcpy (&tmp_stbuf, &mlocal->replies[0].stbuf, sizeof (struct iatt)); + tmp_stbuf.ia_size = local->stbuf_size; + tmp_stbuf.ia_blocks = local->stbuf_blocks; done: /* */ GF_FREE (mlocal->replies); tmp_iobref = mlocal->iobref; - fd_unref (mlocal->fd); - STACK_UNWIND_STRICT (readv, mframe, op_ret, op_errno, final_vec, - final_count, &tmp_stbuf, tmp_iobref); + /* work around for nfs truncated read. Bug 3774 */ + tmp_stbuf_p = &tmp_stbuf; + WIPE (tmp_stbuf_p); + STRIPE_STACK_UNWIND (readv, mframe, op_ret, op_errno, final_vec, + final_count, &tmp_stbuf, tmp_iobref, NULL); iobref_unref (tmp_iobref); - if (final_vec) - GF_FREE (final_vec); + GF_FREE (final_vec); } goto out; @@ -3303,11 +3414,11 @@ check_size: STACK_WIND (mframe, stripe_readv_fstat_cbk, (fctx->xl_array[index]), (fctx->xl_array[index])->fops->fstat, - mlocal->fd); + mlocal->fd, NULL); } out: - STACK_DESTROY (frame->root); + STRIPE_STACK_DESTROY (frame); end: return 0; } @@ -3315,7 +3426,7 @@ end: int32_t stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, - size_t size, off_t offset) + size_t size, off_t offset, uint32_t flags, dict_t *xdata) { int32_t op_errno = EINVAL; int32_t idx = 0; @@ -3328,6 +3439,7 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, uint64_t stripe_size = 0; off_t rounded_start = 0; off_t frame_offset = offset; + off_t dest_offset = 0; stripe_local_t *local = NULL; call_frame_t *rframe = NULL; stripe_local_t *rlocal = NULL; @@ -3338,7 +3450,7 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, VALIDATE_OR_GOTO (fd, err); VALIDATE_OR_GOTO (fd->inode, err); - fd_ctx_get (fd, this, &tmp_fctx); + inode_ctx_get (fd->inode, this, &tmp_fctx); if (!tmp_fctx) { op_errno = EBADFD; goto err; @@ -3346,6 +3458,8 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; stripe_size = fctx->stripe_size; + STRIPE_VALIDATE_FCTX (fctx, err); + if (!stripe_size) { gf_log (this->name, GF_LOG_DEBUG, "Wrong stripe size for the file"); @@ -3360,8 +3474,7 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, rounded_end = roof (offset+size, stripe_size); num_stripe = (rounded_end- rounded_start)/stripe_size; - local = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; @@ -3369,8 +3482,8 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, frame->local = local; /* This is where all the vectors should be copied. */ - local->replies = GF_CALLOC (num_stripe, sizeof (struct readv_replies), - gf_stripe_mt_readv_replies); + local->replies = GF_CALLOC (num_stripe, sizeof (struct stripe_replies), + gf_stripe_mt_stripe_replies); if (!local->replies) { op_errno = ENOMEM; goto err; @@ -3385,8 +3498,7 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, for (index = off_index; index < (num_stripe + off_index); index++) { rframe = copy_frame (frame); - rlocal = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + rlocal = mem_get0 (this->local_pool); if (!rlocal) { op_errno = ENOMEM; goto err; @@ -3400,19 +3512,26 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, rlocal->readv_size = frame_size; rframe->local = rlocal; idx = (index % fctx->stripe_count); + + if (fctx->stripe_coalesce) + dest_offset = coalesced_offset(frame_offset, + stripe_size, fctx->stripe_count); + else + dest_offset = frame_offset; + STACK_WIND (rframe, stripe_readv_cbk, fctx->xl_array[idx], fctx->xl_array[idx]->fops->readv, - fd, frame_size, frame_offset); + fd, frame_size, dest_offset, flags, xdata); frame_offset += frame_size; } return 0; err: - if (local && local->fd) - fd_unref (local->fd); + if (rframe) + STRIPE_STACK_DESTROY (rframe); - STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL); + STRIPE_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL); return 0; } @@ -3420,11 +3539,15 @@ err: int32_t stripe_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) + struct iatt *postbuf, dict_t *xdata) { int32_t callcnt = 0; stripe_local_t *local = NULL; + stripe_local_t *mlocal = NULL; call_frame_t *prev = NULL; + call_frame_t *mframe = NULL; + struct stripe_replies *reply = NULL; + int32_t i = 0; if (!this || !frame || !frame->local || !cookie) { gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); @@ -3433,39 +3556,82 @@ stripe_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, prev = cookie; local = frame->local; + mframe = local->orig_frame; + mlocal = mframe->local; LOCK(&frame->lock); { - callcnt = ++local->call_count; + callcnt = ++mlocal->call_count; + + mlocal->replies[local->node_index].op_ret = op_ret; + mlocal->replies[local->node_index].op_errno = op_errno; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "%s returned error %s", - prev->this->name, strerror (op_errno)); - local->op_errno = op_errno; - local->op_ret = -1; - } if (op_ret >= 0) { - local->op_ret += op_ret; - local->post_buf = *postbuf; - local->pre_buf = *prebuf; + mlocal->post_buf = *postbuf; + mlocal->pre_buf = *prebuf; + + mlocal->prebuf_blocks += prebuf->ia_blocks; + mlocal->postbuf_blocks += postbuf->ia_blocks; + + correct_file_size(prebuf, mlocal->fctx, prev); + correct_file_size(postbuf, mlocal->fctx, prev); + + if (mlocal->prebuf_size < prebuf->ia_size) + mlocal->prebuf_size = prebuf->ia_size; + if (mlocal->postbuf_size < postbuf->ia_size) + mlocal->postbuf_size = postbuf->ia_size; } } UNLOCK (&frame->lock); - if ((callcnt == local->wind_count) && local->unwind) { - STACK_UNWIND_STRICT (writev, frame, local->op_ret, - local->op_errno, &local->pre_buf, - &local->post_buf); + if ((callcnt == mlocal->wind_count) && mlocal->unwind) { + mlocal->pre_buf.ia_size = mlocal->prebuf_size; + mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks; + mlocal->post_buf.ia_size = mlocal->postbuf_size; + mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks; + + /* + * Only return the number of consecutively written bytes up until + * the first error. Only return an error if it occurs first. + * + * When a short write occurs, the application should retry at the + * appropriate offset, at which point we'll potentially pass back + * the error. + */ + for (i = 0, reply = mlocal->replies; i < mlocal->wind_count; + i++, reply++) { + if (reply->op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, "reply %d " + "returned error %s", i, + strerror(reply->op_errno)); + if (!mlocal->op_ret) { + mlocal->op_ret = -1; + mlocal->op_errno = reply->op_errno; + } + break; + } + + mlocal->op_ret += reply->op_ret; + + if (reply->op_ret < reply->requested_size) + break; + } + + GF_FREE(mlocal->replies); + + STRIPE_STACK_UNWIND (writev, mframe, mlocal->op_ret, + mlocal->op_errno, &mlocal->pre_buf, + &mlocal->post_buf, NULL); } out: + STRIPE_STACK_DESTROY(frame); return 0; } int32_t stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, int32_t count, off_t offset, - struct iobref *iobref) + uint32_t flags, struct iobref *iobref, dict_t *xdata) { struct iovec *tmp_vec = NULL; stripe_local_t *local = NULL; @@ -3479,13 +3645,19 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t fill_size = 0; uint64_t stripe_size = 0; uint64_t tmp_fctx = 0; + off_t dest_offset = 0; + off_t rounded_start = 0; + off_t rounded_end = 0; + int32_t total_chunks = 0; + call_frame_t *wframe = NULL; + stripe_local_t *wlocal = NULL; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (fd, err); VALIDATE_OR_GOTO (fd->inode, err); - fd_ctx_get (fd, this, &tmp_fctx); + inode_ctx_get (fd->inode, this, &tmp_fctx); if (!tmp_fctx) { op_errno = EINVAL; goto err; @@ -3493,22 +3665,51 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; stripe_size = fctx->stripe_size; + STRIPE_VALIDATE_FCTX (fctx, err); + /* File has to be stripped across the child nodes */ for (idx = 0; idx< count; idx ++) { total_size += vector[idx].iov_len; } remaining_size = total_size; - local = GF_CALLOC (1, sizeof (stripe_local_t), - gf_stripe_mt_stripe_local_t); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; } frame->local = local; local->stripe_size = stripe_size; + local->fctx = fctx; + if (!stripe_size) { + gf_log (this->name, GF_LOG_DEBUG, + "Wrong stripe size for the file"); + op_errno = EINVAL; + goto err; + } + + rounded_start = floor(offset, stripe_size); + rounded_end = roof(offset + total_size, stripe_size); + total_chunks = (rounded_end - rounded_start) / stripe_size; + local->replies = GF_CALLOC(total_chunks, sizeof(struct stripe_replies), + gf_stripe_mt_stripe_replies); + if (!local->replies) { + op_errno = ENOMEM; + goto err; + } + + total_chunks = 0; while (1) { + wframe = copy_frame(frame); + wlocal = mem_get0(this->local_pool); + if (!wlocal) { + op_errno = ENOMEM; + goto err; + } + wlocal->orig_frame = frame; + wframe->local = wlocal; + /* Send striped chunk of the vector to child nodes appropriately. */ idx = (((offset + offset_offset) / @@ -3536,47 +3737,589 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, if (remaining_size == 0) local->unwind = 1; - STACK_WIND (frame, stripe_writev_cbk, fctx->xl_array[idx], + /* + * Store off the request index (with respect to the chunk of the + * initial offset) and the size of the request. This is required + * in the callback to calculate an appropriate return value in + * the event of a write failure in one or more requests. + */ + wlocal->node_index = total_chunks; + local->replies[total_chunks].requested_size = fill_size; + + dest_offset = offset + offset_offset; + if (fctx->stripe_coalesce) + dest_offset = coalesced_offset(dest_offset, + local->stripe_size, fctx->stripe_count); + + STACK_WIND (wframe, stripe_writev_cbk, fctx->xl_array[idx], fctx->xl_array[idx]->fops->writev, fd, tmp_vec, - tmp_count, offset + offset_offset, iobref); + tmp_count, dest_offset, flags, iobref, + xdata); + GF_FREE (tmp_vec); offset_offset += fill_size; + total_chunks++; if (remaining_size == 0) break; } return 0; err: - STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL); + if (wframe) + STRIPE_STACK_DESTROY(wframe); + + STRIPE_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL); return 0; } int32_t -stripe_release (xlator_t *this, fd_t *fd) +stripe_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { + int32_t callcnt = 0; + stripe_local_t *local = NULL; + stripe_local_t *mlocal = NULL; + call_frame_t *prev = NULL; + call_frame_t *mframe = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; + local = frame->local; + mframe = local->orig_frame; + mlocal = mframe->local; + + LOCK(&frame->lock); + { + callcnt = ++mlocal->call_count; + + if (op_ret == 0) { + mlocal->post_buf = *postbuf; + mlocal->pre_buf = *prebuf; + + mlocal->prebuf_blocks += prebuf->ia_blocks; + mlocal->postbuf_blocks += postbuf->ia_blocks; + + correct_file_size(prebuf, mlocal->fctx, prev); + correct_file_size(postbuf, mlocal->fctx, prev); + + if (mlocal->prebuf_size < prebuf->ia_size) + mlocal->prebuf_size = prebuf->ia_size; + if (mlocal->postbuf_size < postbuf->ia_size) + mlocal->postbuf_size = postbuf->ia_size; + } + + /* return the first failure */ + if (mlocal->op_ret == 0) { + mlocal->op_ret = op_ret; + mlocal->op_errno = op_errno; + } + } + UNLOCK (&frame->lock); + + if ((callcnt == mlocal->wind_count) && mlocal->unwind) { + mlocal->pre_buf.ia_size = mlocal->prebuf_size; + mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks; + mlocal->post_buf.ia_size = mlocal->postbuf_size; + mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks; + + STRIPE_STACK_UNWIND (fallocate, mframe, mlocal->op_ret, + mlocal->op_errno, &mlocal->pre_buf, + &mlocal->post_buf, NULL); + } +out: + STRIPE_STACK_DESTROY(frame); + return 0; +} + +int32_t +stripe_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + stripe_local_t *local = NULL; + stripe_fd_ctx_t *fctx = NULL; + int32_t op_errno = 1; + int32_t idx = 0; + int32_t offset_offset = 0; + int32_t remaining_size = 0; + off_t fill_size = 0; + uint64_t stripe_size = 0; uint64_t tmp_fctx = 0; + off_t dest_offset = 0; + call_frame_t *fframe = NULL; + stripe_local_t *flocal = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + + inode_ctx_get (fd->inode, this, &tmp_fctx); + if (!tmp_fctx) { + op_errno = EINVAL; + goto err; + } + fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; + stripe_size = fctx->stripe_size; + + STRIPE_VALIDATE_FCTX (fctx, err); + + remaining_size = len; + + local = mem_get0 (this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + frame->local = local; + local->stripe_size = stripe_size; + local->fctx = fctx; + + if (!stripe_size) { + gf_log (this->name, GF_LOG_DEBUG, + "Wrong stripe size for the file"); + op_errno = EINVAL; + goto err; + } + + while (1) { + fframe = copy_frame(frame); + flocal = mem_get0(this->local_pool); + if (!flocal) { + op_errno = ENOMEM; + goto err; + } + flocal->orig_frame = frame; + fframe->local = flocal; + + /* send fallocate request to the associated child node */ + idx = (((offset + offset_offset) / + local->stripe_size) % fctx->stripe_count); + + fill_size = (local->stripe_size - + ((offset + offset_offset) % local->stripe_size)); + if (fill_size > remaining_size) + fill_size = remaining_size; + + remaining_size -= fill_size; + + local->wind_count++; + if (remaining_size == 0) + local->unwind = 1; + + dest_offset = offset + offset_offset; + if (fctx->stripe_coalesce) + dest_offset = coalesced_offset(dest_offset, + local->stripe_size, fctx->stripe_count); + + /* + * TODO: Create a separate handler for coalesce mode that sends a + * single fallocate per-child (since the ranges are linear). + */ + STACK_WIND(fframe, stripe_fallocate_cbk, fctx->xl_array[idx], + fctx->xl_array[idx]->fops->fallocate, fd, mode, + dest_offset, fill_size, xdata); + + offset_offset += fill_size; + if (remaining_size == 0) + break; + } + + return 0; +err: + if (fframe) + STRIPE_STACK_DESTROY(fframe); + + STRIPE_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} + + +int32_t +stripe_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + int32_t callcnt = 0; + stripe_local_t *local = NULL; + stripe_local_t *mlocal = NULL; + call_frame_t *prev = NULL; + call_frame_t *mframe = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; + local = frame->local; + mframe = local->orig_frame; + mlocal = mframe->local; + + LOCK(&frame->lock); + { + callcnt = ++mlocal->call_count; + + if (op_ret == 0) { + mlocal->post_buf = *postbuf; + mlocal->pre_buf = *prebuf; + + mlocal->prebuf_blocks += prebuf->ia_blocks; + mlocal->postbuf_blocks += postbuf->ia_blocks; + + correct_file_size(prebuf, mlocal->fctx, prev); + correct_file_size(postbuf, mlocal->fctx, prev); + + if (mlocal->prebuf_size < prebuf->ia_size) + mlocal->prebuf_size = prebuf->ia_size; + if (mlocal->postbuf_size < postbuf->ia_size) + mlocal->postbuf_size = postbuf->ia_size; + } + + /* return the first failure */ + if (mlocal->op_ret == 0) { + mlocal->op_ret = op_ret; + mlocal->op_errno = op_errno; + } + } + UNLOCK (&frame->lock); + + if ((callcnt == mlocal->wind_count) && mlocal->unwind) { + mlocal->pre_buf.ia_size = mlocal->prebuf_size; + mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks; + mlocal->post_buf.ia_size = mlocal->postbuf_size; + mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks; + + STRIPE_STACK_UNWIND (discard, mframe, mlocal->op_ret, + mlocal->op_errno, &mlocal->pre_buf, + &mlocal->post_buf, NULL); + } +out: + STRIPE_STACK_DESTROY(frame); + return 0; +} + +int32_t +stripe_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + stripe_local_t *local = NULL; stripe_fd_ctx_t *fctx = NULL; + int32_t op_errno = 1; + int32_t idx = 0; + int32_t offset_offset = 0; + int32_t remaining_size = 0; + off_t fill_size = 0; + uint64_t stripe_size = 0; + uint64_t tmp_fctx = 0; + off_t dest_offset = 0; + call_frame_t *fframe = NULL; + stripe_local_t *flocal = NULL; + VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); - fd_ctx_del (fd, this, &tmp_fctx); + inode_ctx_get (fd->inode, this, &tmp_fctx); if (!tmp_fctx) { + op_errno = EINVAL; goto err; } + fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; + stripe_size = fctx->stripe_size; + + STRIPE_VALIDATE_FCTX (fctx, err); + + remaining_size = len; + + local = mem_get0 (this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + frame->local = local; + local->stripe_size = stripe_size; + local->fctx = fctx; + if (!stripe_size) { + gf_log (this->name, GF_LOG_DEBUG, + "Wrong stripe size for the file"); + op_errno = EINVAL; + goto err; + } + + while (1) { + fframe = copy_frame(frame); + flocal = mem_get0(this->local_pool); + if (!flocal) { + op_errno = ENOMEM; + goto err; + } + flocal->orig_frame = frame; + fframe->local = flocal; + + /* send discard request to the associated child node */ + idx = (((offset + offset_offset) / + local->stripe_size) % fctx->stripe_count); + + fill_size = (local->stripe_size - + ((offset + offset_offset) % local->stripe_size)); + if (fill_size > remaining_size) + fill_size = remaining_size; + + remaining_size -= fill_size; + + local->wind_count++; + if (remaining_size == 0) + local->unwind = 1; + + dest_offset = offset + offset_offset; + if (fctx->stripe_coalesce) + dest_offset = coalesced_offset(dest_offset, + local->stripe_size, fctx->stripe_count); + + /* + * TODO: Create a separate handler for coalesce mode that sends a + * single discard per-child (since the ranges are linear). + */ + STACK_WIND(fframe, stripe_discard_cbk, fctx->xl_array[idx], + fctx->xl_array[idx]->fops->discard, fd, dest_offset, + fill_size, xdata); + + offset_offset += fill_size; + if (remaining_size == 0) + break; + } + + return 0; +err: + if (fframe) + STRIPE_STACK_DESTROY(fframe); + + STRIPE_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} + +int32_t +stripe_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + int32_t callcnt = 0; + stripe_local_t *local = NULL; + stripe_local_t *mlocal = NULL; + call_frame_t *prev = NULL; + call_frame_t *mframe = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; + local = frame->local; + mframe = local->orig_frame; + mlocal = mframe->local; + + LOCK(&frame->lock); + { + callcnt = ++mlocal->call_count; + + if (op_ret == 0) { + mlocal->post_buf = *postbuf; + mlocal->pre_buf = *prebuf; + + mlocal->prebuf_blocks += prebuf->ia_blocks; + mlocal->postbuf_blocks += postbuf->ia_blocks; + + correct_file_size(prebuf, mlocal->fctx, prev); + correct_file_size(postbuf, mlocal->fctx, prev); + + if (mlocal->prebuf_size < prebuf->ia_size) + mlocal->prebuf_size = prebuf->ia_size; + if (mlocal->postbuf_size < postbuf->ia_size) + mlocal->postbuf_size = postbuf->ia_size; + } + + /* return the first failure */ + if (mlocal->op_ret == 0) { + mlocal->op_ret = op_ret; + mlocal->op_errno = op_errno; + } + } + UNLOCK (&frame->lock); + + if ((callcnt == mlocal->wind_count) && mlocal->unwind) { + mlocal->pre_buf.ia_size = mlocal->prebuf_size; + mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks; + mlocal->post_buf.ia_size = mlocal->postbuf_size; + mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks; + + STRIPE_STACK_UNWIND (zerofill, mframe, mlocal->op_ret, + mlocal->op_errno, &mlocal->pre_buf, + &mlocal->post_buf, NULL); + } +out: + STRIPE_STACK_DESTROY(frame); + return 0; +} + +int32_t +stripe_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + stripe_local_t *local = NULL; + stripe_fd_ctx_t *fctx = NULL; + int32_t op_errno = 1; + int32_t idx = 0; + int32_t offset_offset = 0; + int32_t remaining_size = 0; + off_t fill_size = 0; + uint64_t stripe_size = 0; + uint64_t tmp_fctx = 0; + off_t dest_offset = 0; + call_frame_t *fframe = NULL; + stripe_local_t *flocal = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + + inode_ctx_get (fd->inode, this, &tmp_fctx); + if (!tmp_fctx) { + op_errno = EINVAL; + goto err; + } fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; + stripe_size = fctx->stripe_size; - if (!fctx->static_array) - GF_FREE (fctx->xl_array); + STRIPE_VALIDATE_FCTX (fctx, err); - GF_FREE (fctx); + remaining_size = len; + + local = mem_get0 (this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; + stripe_size = fctx->stripe_size; + + STRIPE_VALIDATE_FCTX (fctx, err); + + remaining_size = len; + + local = mem_get0 (this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; + stripe_size = fctx->stripe_size; + + STRIPE_VALIDATE_FCTX (fctx, err); + + remaining_size = len; + + local = mem_get0 (this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + frame->local = local; + local->stripe_size = stripe_size; + local->fctx = fctx; + + if (!stripe_size) { + gf_log (this->name, GF_LOG_DEBUG, + "Wrong stripe size for the file"); + op_errno = EINVAL; + goto err; + } + + while (1) { + fframe = copy_frame(frame); + flocal = mem_get0(this->local_pool); + if (!flocal) { + op_errno = ENOMEM; + goto err; + } + flocal->orig_frame = frame; + fframe->local = flocal; + + idx = (((offset + offset_offset) / + local->stripe_size) % fctx->stripe_count); + + fill_size = (local->stripe_size - + ((offset + offset_offset) % local->stripe_size)); + if (fill_size > remaining_size) + fill_size = remaining_size; + remaining_size -= fill_size; + + local->wind_count++; + if (remaining_size == 0) + local->unwind = 1; + + dest_offset = offset + offset_offset; + if (fctx->stripe_coalesce) + dest_offset = coalesced_offset(dest_offset, + local->stripe_size, + fctx->stripe_count); + + STACK_WIND(fframe, stripe_zerofill_cbk, fctx->xl_array[idx], + fctx->xl_array[idx]->fops->zerofill, fd, + dest_offset, fill_size, xdata); + offset_offset += fill_size; + if (remaining_size == 0) + break; + } + + return 0; err: + if (fframe) + STRIPE_STACK_DESTROY(fframe); + + STRIPE_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} + +int32_t +stripe_release (xlator_t *this, fd_t *fd) +{ return 0; } +int +stripe_forget (xlator_t *this, inode_t *inode) +{ + uint64_t tmp_fctx = 0; + stripe_fd_ctx_t *fctx = NULL; + + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (inode, err); + + (void) inode_ctx_del (inode, this, &tmp_fctx); + if (!tmp_fctx) { + goto err; + } + + fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; + + if (!fctx->static_array) + GF_FREE (fctx->xl_array); + + GF_FREE (fctx); +err: + return 0; +} int32_t notify (xlator_t *this, int32_t event, void *data, ...) @@ -3584,6 +4327,7 @@ notify (xlator_t *this, int32_t event, void *data, ...) stripe_private_t *priv = NULL; int down_client = 0; int i = 0; + gf_boolean_t heard_from_all_children = _gf_false; if (!this) return 0; @@ -3601,23 +4345,28 @@ notify (xlator_t *this, int32_t event, void *data, ...) if (data == priv->xl_array[i]) break; } - priv->state[i] = 1; - for (i = 0; i < priv->child_count; i++) { - if (!priv->state[i]) - down_client++; + + if (priv->child_count == i) { + gf_log (this->name, GF_LOG_ERROR, + "got GF_EVENT_CHILD_UP bad subvolume %s", + data? ((xlator_t *)data)->name: NULL); + break; } LOCK (&priv->lock); { - priv->nodes_down = down_client; if (data == FIRST_CHILD (this)) priv->first_child_down = 0; - if (!priv->nodes_down) - default_notify (this, event, data); + priv->last_event[i] = event; } UNLOCK (&priv->lock); } break; + case GF_EVENT_CHILD_CONNECTING: + { + // 'CONNECTING' doesn't ensure its CHILD_UP, so do nothing + goto out; + } case GF_EVENT_CHILD_DOWN: { /* get an index number to set */ @@ -3625,20 +4374,19 @@ notify (xlator_t *this, int32_t event, void *data, ...) if (data == priv->xl_array[i]) break; } - priv->state[i] = 0; - for (i = 0; i < priv->child_count; i++) { - if (!priv->state[i]) - down_client++; + + if (priv->child_count == i) { + gf_log (this->name, GF_LOG_ERROR, + "got GF_EVENT_CHILD_DOWN bad subvolume %s", + data? ((xlator_t *)data)->name: NULL); + break; } LOCK (&priv->lock); { - priv->nodes_down = down_client; - if (data == FIRST_CHILD (this)) priv->first_child_down = 1; - if (priv->nodes_down) - default_notify (this, event, data); + priv->last_event[i] = event; } UNLOCK (&priv->lock); } @@ -3648,67 +4396,252 @@ notify (xlator_t *this, int32_t event, void *data, ...) { /* */ default_notify (this, event, data); + goto out; } break; } + // Consider child as down if it's last_event is not CHILD_UP + for (i = 0, down_client = 0; i < priv->child_count; i++) + if (priv->last_event[i] != GF_EVENT_CHILD_UP) + down_client++; + + LOCK (&priv->lock); + { + priv->nodes_down = down_client; + } + UNLOCK (&priv->lock); + + heard_from_all_children = _gf_true; + for (i = 0; i < priv->child_count; i++) + if (!priv->last_event[i]) + heard_from_all_children = _gf_false; + + if (heard_from_all_children) + default_notify (this, event, data); +out: return 0; } int -set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data) +stripe_setxattr_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, dict_t *xdata) { - int ret = -1; - char *tmp_str = NULL; - char *tmp_str1 = NULL; - char *dup_str = NULL; - char *stripe_str = NULL; - char *pattern = NULL; - char *num = NULL; - struct stripe_options *temp_stripeopt = NULL; - struct stripe_options *stripe_opt = NULL; - - if (!this || !priv || !data) - goto out; - - /* Get the pattern for striping. - "option block-size *avi:10MB" etc */ - stripe_str = strtok_r (data, ",", &tmp_str); - while (stripe_str) { - dup_str = gf_strdup (stripe_str); - stripe_opt = CALLOC (1, sizeof (struct stripe_options)); - if (!stripe_opt) { - GF_FREE (dup_str); - goto out; - } + int ret = -1; + int call_cnt = 0; + stripe_local_t *local = NULL; - pattern = strtok_r (dup_str, ":", &tmp_str1); - num = strtok_r (NULL, ":", &tmp_str1); - if (!num) { - num = pattern; - pattern = "*"; + if (!frame || !frame->local || !this) { + gf_log ("", GF_LOG_ERROR, "Possible NULL deref"); + return ret; + } + + local = frame->local; + + LOCK (&frame->lock); + { + call_cnt = --local->wind_count; + + /** + * We overwrite ->op_* values here for subsequent faliure + * conditions, hence we propogate the last errno down the + * stack. + */ + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto unlock; } - if (gf_string2bytesize (num, &stripe_opt->block_size) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "invalid number format \"%s\"", num); - goto out; + } + + unlock: + UNLOCK (&frame->lock); + + if (!call_cnt) { + STRIPE_STACK_UNWIND (setxattr, frame, local->op_ret, + local->op_errno, xdata); + } + + return 0; +} + +#ifdef HAVE_BD_XLATOR +int +stripe_is_bd (dict_t *this, char *key, data_t *value, void *data) +{ + gf_boolean_t *is_bd = data; + + if (data == NULL) + return 0; + + if (XATTR_IS_BD (key)) + *is_bd = _gf_true; + + return 0; +} + +inline gf_boolean_t +stripe_setxattr_is_bd (dict_t *dict) +{ + gf_boolean_t is_bd = _gf_false; + + if (dict == NULL) + goto out; + + dict_foreach (dict, stripe_is_bd, &is_bd); +out: + return is_bd; +} +#else +#define stripe_setxattr_is_bd(dict) _gf_false +#endif + +int +stripe_setxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *dict, int flags, dict_t *xdata) +{ + int32_t op_errno = EINVAL; + xlator_list_t *trav = NULL; + stripe_private_t *priv = NULL; + stripe_local_t *local = NULL; + int i = 0; + gf_boolean_t is_bd = _gf_false; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + + GF_IF_INTERNAL_XATTR_GOTO ("trusted.*stripe*", dict, + op_errno, err); + + priv = this->private; + trav = this->children; + + local = mem_get0 (this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + frame->local = local; + local->wind_count = priv->child_count; + local->op_ret = local->op_errno = 0; + + is_bd = stripe_setxattr_is_bd (dict); + + /** + * Set xattrs for directories on all subvolumes. Additionally + * this power is only given to a special client. Bd xlator + * also needs xattrs for regular files (ie LVs) + */ + if (((frame->root->pid == GF_CLIENT_PID_GSYNCD) && + IA_ISDIR (loc->inode->ia_type)) || is_bd) { + for (i = 0; i < priv->child_count; i++, trav = trav->next) { + STACK_WIND (frame, stripe_setxattr_cbk, + trav->xlator, trav->xlator->fops->setxattr, + loc, dict, flags, xdata); } - memcpy (stripe_opt->path_pattern, pattern, strlen (pattern)); + } else { + local->wind_count = 1; + STACK_WIND (frame, stripe_setxattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, + loc, dict, flags, xdata); + } - gf_log (this->name, GF_LOG_DEBUG, - "block-size : pattern %s : size %"PRId64, - stripe_opt->path_pattern, stripe_opt->block_size); + return 0; +err: + STRIPE_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); + return 0; +} - if (!priv->pattern) { - priv->pattern = stripe_opt; - } else { - temp_stripeopt = priv->pattern; - while (temp_stripeopt->next) - temp_stripeopt = temp_stripeopt->next; - temp_stripeopt->next = stripe_opt; + +int +stripe_fsetxattr_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, dict_t *xdata) +{ + STRIPE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata); + return 0; +} + + +int +stripe_is_special_key (dict_t *this, + char *key, + data_t *value, + void *data) +{ + gf_boolean_t *is_special = NULL; + + if (data == NULL) { + goto out; + } + + is_special = data; + + if (XATTR_IS_LOCKINFO (key) || XATTR_IS_BD (key)) + *is_special = _gf_true; + +out: + return 0; +} + +int32_t +stripe_fsetxattr_everyone_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + dict_t *xdata) +{ + int call_count = 0; + stripe_local_t *local = NULL; + + local = frame->local; + + LOCK (&frame->lock); + { + call_count = --local->wind_count; + + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; } - stripe_str = strtok_r (NULL, ",", &tmp_str); - GF_FREE (dup_str); + } + UNLOCK (&frame->lock); + + if (call_count == 0) { + STRIPE_STACK_UNWIND (fsetxattr, frame, local->op_ret, + local->op_errno, NULL); + } + return 0; +} + +int +stripe_fsetxattr_to_everyone (call_frame_t *frame, xlator_t *this, fd_t *fd, + dict_t *dict, int flags, dict_t *xdata) +{ + xlator_list_t *trav = NULL; + stripe_private_t *priv = NULL; + int ret = -1; + stripe_local_t *local = NULL; + + priv = this->private; + + local = mem_get0 (this->local_pool); + if (local == NULL) { + goto out; + } + + frame->local = local; + + local->wind_count = priv->child_count; + + trav = this->children; + + while (trav) { + STACK_WIND (frame, stripe_fsetxattr_everyone_cbk, + trav->xlator, trav->xlator->fops->fsetxattr, + fd, dict, flags, xdata); + trav = trav->next; } ret = 0; @@ -3716,97 +4649,477 @@ out: return ret; } -int32_t -mem_acct_init (xlator_t *this) +inline gf_boolean_t +stripe_fsetxattr_is_special (dict_t *dict) { - int ret = -1; + gf_boolean_t is_spl = _gf_false; - if (!this) + if (dict == NULL) { goto out; + } - ret = xlator_mem_acct_init (this, gf_stripe_mt_end + 1); + dict_foreach (dict, stripe_is_special_key, &is_spl); + +out: + return is_spl; +} + +int +stripe_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + dict_t *dict, int flags, dict_t *xdata) +{ + int32_t op_ret = -1, ret = -1, op_errno = EINVAL; + gf_boolean_t is_spl = _gf_false; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + GF_IF_INTERNAL_XATTR_GOTO ("trusted.*stripe*", dict, + op_errno, err); + + is_spl = stripe_fsetxattr_is_special (dict); + if (is_spl) { + ret = stripe_fsetxattr_to_everyone (frame, this, fd, dict, + flags, xdata); + if (ret < 0) { + op_errno = ENOMEM; + goto err; + } - if (ret != 0) { - gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" - "failed"); goto out; } + STACK_WIND (frame, stripe_fsetxattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, + fd, dict, flags, xdata); out: - return ret; + return 0; +err: + STRIPE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL); + return 0; } + int -validate_options (xlator_t *this, dict_t *options, char **op_errstr) +stripe_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { + STRIPE_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata); + return 0; +} +int +stripe_removexattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *name, dict_t *xdata) +{ + int32_t op_errno = EINVAL; - data_t *data = NULL; - int ret = 0; + VALIDATE_OR_GOTO (this, err); + + GF_IF_NATIVE_XATTR_GOTO ("trusted.*stripe*", + name, op_errno, err); + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (loc, err); + + STACK_WIND (frame, stripe_removexattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, + loc, name, xdata); + return 0; +err: + STRIPE_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL); + return 0; +} + + +int +stripe_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + STRIPE_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata); + return 0; +} + +int +stripe_fremovexattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *name, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + GF_IF_NATIVE_XATTR_GOTO ("trusted.*stripe*", + name, op_errno, err); + + STACK_WIND (frame, stripe_fremovexattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, + fd, name, xdata); + return 0; + err: + STRIPE_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata); + return 0; +} + +int32_t +stripe_readdirp_lookup_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + inode_t *inode, struct iatt *stbuf, + dict_t *xattr, struct iatt *parent) +{ + stripe_local_t *local = NULL; + call_frame_t *main_frame = NULL; + stripe_local_t *main_local = NULL; + gf_dirent_t *entry = NULL; + call_frame_t *prev = NULL; + int done = 0; + + local = frame->local; + prev = cookie; + + entry = local->dirent; + + main_frame = local->orig_frame; + main_local = main_frame->local; + LOCK (&frame->lock); + { + + local->call_count--; + if (!local->call_count) + done = 1; + if (op_ret == -1) { + local->op_errno = op_errno; + local->op_ret = op_ret; + goto unlock; + } + + if (stripe_ctx_handle(this, prev, local, xattr)) + gf_log(this->name, GF_LOG_ERROR, + "Error getting fctx info from dict."); + + correct_file_size(stbuf, local->fctx, prev); + + stripe_iatt_merge (stbuf, &entry->d_stat); + local->stbuf_blocks += stbuf->ia_blocks; + } +unlock: + UNLOCK(&frame->lock); + + if (done) { + inode_ctx_put (entry->inode, this, + (uint64_t) (long)local->fctx); + + done = 0; + LOCK (&main_frame->lock); + { + main_local->wind_count--; + if (!main_local->wind_count) + done = 1; + if (local->op_ret == -1) { + main_local->op_errno = local->op_errno; + main_local->op_ret = local->op_ret; + } + entry->d_stat.ia_blocks = local->stbuf_blocks; + } + UNLOCK (&main_frame->lock); + if (done) { + main_frame->local = NULL; + STRIPE_STACK_UNWIND (readdir, main_frame, + main_local->op_ret, + main_local->op_errno, + &main_local->entries, NULL); + gf_dirent_free (&main_local->entries); + stripe_local_wipe (main_local); + mem_put (main_local); + } + frame->local = NULL; + stripe_local_wipe (local); + mem_put (local); + STRIPE_STACK_DESTROY (frame); + } + + return 0; +} + +int32_t +stripe_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + gf_dirent_t *orig_entries, dict_t *xdata) +{ + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; + gf_dirent_t *local_entry = NULL; + gf_dirent_t *tmp_entry = NULL; + xlator_list_t *trav = NULL; + loc_t loc = {0, }; + int32_t count = 0; stripe_private_t *priv = NULL; + int32_t subvols = 0; + dict_t *xattrs = NULL; + call_frame_t *local_frame = NULL; + stripe_local_t *local_ent = NULL; - data = dict_get (options, "block-size"); - if (data) { - gf_log (this->name, GF_LOG_TRACE,"Reconfiguring Stripe" - " Block-size"); - priv = GF_CALLOC (1, sizeof (stripe_private_t), - gf_stripe_mt_stripe_private_t); - if (!priv) { - gf_log ("",GF_LOG_ERROR, "Unable to allocate memory"); - ret = -1; + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + prev = cookie; + local = frame->local; + trav = this->children; + priv = this->private; + + subvols = priv->child_count; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "%s returned error %s", + prev->this->name, strerror (op_errno)); + local->op_errno = op_errno; + local->op_ret = op_ret; + goto unlock; + } else { + local->op_ret = op_ret; + list_splice_init (&orig_entries->list, + &local->entries.list); + local->wind_count = op_ret; + } + + } +unlock: + UNLOCK (&frame->lock); + + if (op_ret == -1) + goto out; + + xattrs = dict_new (); + if (xattrs) + (void) stripe_xattr_request_build (this, xattrs, 0, 0, 0, 0); + count = op_ret; + list_for_each_entry_safe (local_entry, tmp_entry, + (&local->entries.list), list) { + + if (!local_entry) + break; + if (!IA_ISREG (local_entry->d_stat.ia_type)) { + LOCK (&frame->lock); + { + local->wind_count--; + count = local->wind_count; + } + UNLOCK (&frame->lock); + continue; + } + + local_frame = copy_frame (frame); + + if (!local_frame) { + op_errno = ENOMEM; + op_ret = -1; goto out; } - ret = set_stripe_block_size (this, priv, data->data); - if (ret) { - gf_log (this->name, GF_LOG_DEBUG, - "Reconfigue: Block-Size reconfiguration failed"); - *op_errstr = gf_strdup ("Error, could not parse list"); - ret = -1; + local_ent = mem_get0 (this->local_pool); + if (!local_ent) { + op_errno = ENOMEM; + op_ret = -1; goto out; } - gf_log (this->name, GF_LOG_TRACE, - "Reconfigue: Block-Size reconfigured Successfully"); + + loc.inode = inode_ref (local_entry->inode); + + uuid_copy (loc.gfid, local_entry->d_stat.ia_gfid); + + local_ent->orig_frame = frame; + + local_ent->call_count = subvols; + + local_ent->dirent = local_entry; + + local_frame->local = local_ent; + + trav = this->children; + while (trav) { + STACK_WIND (local_frame, stripe_readdirp_lookup_cbk, + trav->xlator, trav->xlator->fops->lookup, + &loc, xattrs); + trav = trav->next; + } + loc_wipe (&loc); + } +out: + if (!count) { + /* all entries are directories */ + frame->local = NULL; + STRIPE_STACK_UNWIND (readdir, frame, local->op_ret, + local->op_errno, &local->entries, NULL); + gf_dirent_free (&local->entries); + stripe_local_wipe (local); + mem_put (local); + } + if (xattrs) + dict_unref (xattrs); + return 0; + +} +int32_t +stripe_readdirp (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off, dict_t *xdata) +{ + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + xlator_list_t *trav = NULL; + int op_errno = -1; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + priv = this->private; + trav = this->children; + + if (priv->first_child_down) { + op_errno = ENOTCONN; + goto err; + } + + /* Initialization */ + local = mem_get0 (this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + frame->local = local; + + local->fd = fd_ref (fd); + + local->wind_count = 0; + + local->count = 0; + local->op_ret = -1; + INIT_LIST_HEAD(&local->entries); + + if (!trav) + goto err; + + STACK_WIND (frame, stripe_readdirp_cbk, trav->xlator, + trav->xlator->fops->readdirp, fd, size, off, xdata); + return 0; +err: + op_errno = (op_errno == -1) ? errno : op_errno; + STRIPE_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL); + + return 0; + +} + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + if (!this) + goto out; + + ret = xlator_mem_acct_init (this, gf_stripe_mt_end + 1); + + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" + "failed"); + goto out; } out: - if (priv) - GF_FREE (priv); return ret; +} + +static int +clear_pattern_list (stripe_private_t *priv) +{ + struct stripe_options *prev = NULL; + struct stripe_options *trav = NULL; + int ret = -1; + + GF_VALIDATE_OR_GOTO ("stripe", priv, out); + + trav = priv->pattern; + priv->pattern = NULL; + while (trav) { + prev = trav; + trav = trav->next; + GF_FREE (prev); + } + + ret = 0; + out: + return ret; + } + int reconfigure (xlator_t *this, dict_t *options) { - stripe_private_t *priv = NULL; - data_t *data = NULL; - int ret = 0; + stripe_private_t *priv = NULL; + data_t *data = NULL; + int ret = -1; + volume_option_t *opt = NULL; + + GF_ASSERT (this); + GF_ASSERT (this->private); + priv = this->private; - priv = this->private; - data = dict_get (options, "block-size"); - if (data) { - gf_log (this->name, GF_LOG_TRACE,"Reconfiguring Stripe" - " Block-size"); - ret = set_stripe_block_size (this, priv, data->data); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Reconfigue: Block-Size reconfiguration failed"); - ret = -1; - goto out; - } - gf_log (this->name, GF_LOG_TRACE, - "Reconfigue: Block-Size reconfigured Successfully"); - } - else { - priv->block_size = (128 * GF_UNIT_KB); + ret = 0; + LOCK (&priv->lock); + { + ret = clear_pattern_list (priv); + if (ret) + goto unlock; + + data = dict_get (options, "block-size"); + if (data) { + ret = set_stripe_block_size (this, priv, data->data); + if (ret) + goto unlock; + } else { + opt = xlator_volume_option_get (this, "block-size"); + if (!opt) { + gf_log (this->name, GF_LOG_WARNING, + "option 'block-size' not found"); + ret = -1; + goto unlock; + } + + if (gf_string2bytesize (opt->default_value, &priv->block_size)){ + gf_log (this->name, GF_LOG_ERROR, + "Unable to set default block-size "); + ret = -1; + goto unlock; + } + } + + GF_OPTION_RECONF("coalesce", priv->coalesce, options, bool, + unlock); } - + unlock: + UNLOCK (&priv->lock); + if (ret) + goto out; -out: - return ret; + ret = 0; + out: + return ret; } @@ -3819,6 +5132,7 @@ int32_t init (xlator_t *this) { stripe_private_t *priv = NULL; + volume_option_t *opt = NULL; xlator_list_t *trav = NULL; data_t *data = NULL; int32_t count = 0; @@ -3862,9 +5176,9 @@ init (xlator_t *this) if (!priv->xl_array) goto out; - priv->state = GF_CALLOC (count, sizeof (int8_t), - gf_stripe_mt_int8_t); - if (!priv->state) + priv->last_event = GF_CALLOC (count, sizeof (int), + gf_stripe_mt_int32_t); + if (!priv->last_event) goto out; priv->child_count = count; @@ -3884,41 +5198,56 @@ init (xlator_t *this) goto out; } - priv->block_size = (128 * GF_UNIT_KB); - /* option stripe-pattern *avi:1GB,*pdf:4096 */ - data = dict_get (this->options, "block-size"); - if (!data) { - gf_log (this->name, GF_LOG_DEBUG, - "No \"option block-size <x>\" given, defaulting " - "to 128KB"); - } else { - ret = set_stripe_block_size (this, priv, data->data); - if (ret) - goto out; - } - - priv->xattr_supported = 1; - data = dict_get (this->options, "use-xattr"); - if (data) { - if (gf_string2boolean (data->data, - &priv->xattr_supported) == -1) { + ret = 0; + LOCK (&priv->lock); + { + opt = xlator_volume_option_get (this, "block-size"); + if (!opt) { + gf_log (this->name, GF_LOG_WARNING, + "option 'block-size' not found"); + ret = -1; + goto unlock; + } + if (gf_string2bytesize (opt->default_value, &priv->block_size)){ gf_log (this->name, GF_LOG_ERROR, - "error setting hard check for extended " - "attribute"); - //return -1; + "Unable to set default block-size "); + ret = -1; + goto unlock; + } + /* option stripe-pattern *avi:1GB,*pdf:16K */ + data = dict_get (this->options, "block-size"); + if (data) { + ret = set_stripe_block_size (this, priv, data->data); + if (ret) + goto unlock; } } + unlock: + UNLOCK (&priv->lock); + if (ret) + goto out; + GF_OPTION_INIT ("use-xattr", priv->xattr_supported, bool, out); /* notify related */ priv->nodes_down = priv->child_count; + + GF_OPTION_INIT("coalesce", priv->coalesce, bool, out); + + this->local_pool = mem_pool_new (stripe_local_t, 128); + if (!this->local_pool) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "failed to create local_t's memory pool"); + goto out; + } + this->private = priv; ret = 0; out: if (ret) { if (priv) { - if (priv->xl_array) - GF_FREE (priv->xl_array); + GF_FREE (priv->xl_array); GF_FREE (priv); } } @@ -3942,15 +5271,15 @@ fini (xlator_t *this) priv = this->private; if (priv) { this->private = NULL; - if (priv->xl_array) - GF_FREE (priv->xl_array); + GF_FREE (priv->xl_array); trav = priv->pattern; while (trav) { prev = trav; trav = trav->next; - FREE (prev); + GF_FREE (prev); } + GF_FREE (priv->last_event); LOCK_DESTROY (&priv->lock); GF_FREE (priv); } @@ -3959,44 +5288,527 @@ out: return; } +int32_t +stripe_getxattr_unwind (call_frame_t *frame, + int op_ret, int op_errno, dict_t *dict, dict_t *xdata) + +{ + STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata); + return 0; +} + +int +stripe_internal_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, + dict_t *xdata) +{ + + char size_key[256] = {0,}; + char index_key[256] = {0,}; + char count_key[256] = {0,}; + char coalesce_key[256] = {0,}; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (frame->local, out); + + if (!xattr || (op_ret == -1)) + goto out; + + sprintf (size_key, "trusted.%s.stripe-size", this->name); + sprintf (count_key, "trusted.%s.stripe-count", this->name); + sprintf (index_key, "trusted.%s.stripe-index", this->name); + sprintf (coalesce_key, "trusted.%s.stripe-coalesce", this->name); + + dict_del (xattr, size_key); + dict_del (xattr, count_key); + dict_del (xattr, index_key); + dict_del (xattr, coalesce_key); + +out: + STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata); + + return 0; + +} + +int +stripe_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) +{ + int call_cnt = 0; + stripe_local_t *local = NULL; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (frame->local, out); + + local = frame->local; + + LOCK (&frame->lock); + { + call_cnt = --local->wind_count; + } + UNLOCK (&frame->lock); + + if (!xattr || (op_ret < 0)) + goto out; + + local->op_ret = 0; + + if (!local->xattr) { + local->xattr = dict_ref (xattr); + } else { + stripe_aggregate_xattr (local->xattr, xattr); + } + +out: + if (!call_cnt) { + STRIPE_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno, + local->xattr, xdata); + } + + return 0; +} + +int32_t +stripe_vgetxattr_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + dict_t *dict, dict_t *xdata) +{ + stripe_local_t *local = NULL; + int32_t callcnt = 0; + int32_t ret = -1; + long cky = 0; + void *xattr_val = NULL; + void *xattr_serz = NULL; + stripe_xattr_sort_t *xattr = NULL; + dict_t *stripe_xattr = NULL; + + if (!frame || !frame->local || !this) { + gf_log ("", GF_LOG_ERROR, "Possible NULL deref"); + return ret; + } + + local = frame->local; + cky = (long) cookie; + + if (local->xsel[0] == '\0') { + gf_log (this->name, GF_LOG_ERROR, "Empty xattr in cbk"); + return ret; + } + + LOCK (&frame->lock); + { + callcnt = --local->wind_count; + + if (!dict || (op_ret < 0)) + goto out; + + if (!local->xattr_list) + local->xattr_list = (stripe_xattr_sort_t *) + GF_CALLOC (local->nallocs, + sizeof (stripe_xattr_sort_t), + gf_stripe_mt_xattr_sort_t); + + if (local->xattr_list) { + xattr = local->xattr_list + (int32_t) cky; + + ret = dict_get_ptr_and_len (dict, local->xsel, + &xattr_val, + &xattr->xattr_len); + if (xattr->xattr_len == 0) + goto out; + + xattr->pos = cky; + xattr->xattr_value = gf_memdup (xattr_val, + xattr->xattr_len); + + if (xattr->xattr_value != NULL) + local->xattr_total_len += xattr->xattr_len + 1; + } + } + out: + UNLOCK (&frame->lock); + + if (!callcnt) { + if (!local->xattr_total_len) + goto unwind; + + stripe_xattr = dict_new (); + if (!stripe_xattr) + goto unwind; + + /* select filler based on ->xsel */ + if (XATTR_IS_PATHINFO (local->xsel)) + ret = stripe_fill_pathinfo_xattr (this, local, + (char **)&xattr_serz); + else if (XATTR_IS_LOCKINFO (local->xsel)) { + ret = stripe_fill_lockinfo_xattr (this, local, + &xattr_serz); + } else { + gf_log (this->name, GF_LOG_WARNING, + "Unknown xattr in xattr request"); + goto unwind; + } + + if (!ret) { + ret = dict_set_dynptr (stripe_xattr, local->xsel, + xattr_serz, + local->xattr_total_len); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Can't set %s key in dict", + local->xsel); + } + + unwind: + STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, + stripe_xattr, NULL); + + ret = stripe_free_xattr_str (local); + + GF_FREE (local->xattr_list); + + if (stripe_xattr) + dict_unref (stripe_xattr); + } + + return ret; +} + +int32_t +stripe_getxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *name, dict_t *xdata) +{ + stripe_local_t *local = NULL; + xlator_list_t *trav = NULL; + stripe_private_t *priv = NULL; + int32_t op_errno = EINVAL; + int i = 0; + xlator_t **sub_volumes; + int ret = 0; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->path, err); + VALIDATE_OR_GOTO (loc->inode, err); + + priv = this->private; + trav = this->children; + + /* Initialization */ + local = mem_get0 (this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + frame->local = local; + loc_copy (&local->loc, loc); + + + if (name && (strcmp (GF_XATTR_MARKER_KEY, name) == 0) + && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) { + local->marker.call_count = priv->child_count; + + sub_volumes = alloca ( priv->child_count * + sizeof (xlator_t *)); + for (i = 0, trav = this->children; trav ; + trav = trav->next, i++) { + + *(sub_volumes + i) = trav->xlator; + + } + + if (cluster_getmarkerattr (frame, this, loc, name, + local, stripe_getxattr_unwind, + sub_volumes, priv->child_count, + MARKER_UUID_TYPE, marker_uuid_default_gauge, + priv->vol_uuid)) { + op_errno = EINVAL; + goto err; + } + + return 0; + } + + if (name && strncmp (name, GF_XATTR_QUOTA_SIZE_KEY, + strlen (GF_XATTR_QUOTA_SIZE_KEY)) == 0) { + local->wind_count = priv->child_count; + + for (i = 0, trav=this->children; i < priv->child_count; i++, + trav = trav->next) { + STACK_WIND (frame, stripe_getxattr_cbk, + trav->xlator, trav->xlator->fops->getxattr, + loc, name, xdata); + } + + return 0; + } + + if (name && + ((strncmp (name, GF_XATTR_PATHINFO_KEY, + strlen (GF_XATTR_PATHINFO_KEY)) == 0))) { + if (IA_ISREG (loc->inode->ia_type)) { + ret = inode_ctx_get (loc->inode, this, + (uint64_t *) &local->fctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "stripe size unavailable from fctx" + " relying on pathinfo could lead to" + " wrong results"); + } + + local->nallocs = local->wind_count = priv->child_count; + (void) strncpy (local->xsel, name, strlen (name)); + + /** + * for xattrs that need info from all childs, fill ->xsel + * as above and call the filler function in cbk based on + * it + */ + for (i = 0, trav = this->children; i < priv->child_count; i++, + trav = trav->next) { + STACK_WIND_COOKIE (frame, stripe_vgetxattr_cbk, + (void *) (long) i, trav->xlator, + trav->xlator->fops->getxattr, + loc, name, xdata); + } + + return 0; + } + + if (name &&(*priv->vol_uuid)) { + if ((match_uuid_local (name, priv->vol_uuid) == 0) + && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) { + + if (!IA_FILE_OR_DIR (loc->inode->ia_type)) + local->marker.call_count = 1; + else + local->marker.call_count = priv->child_count; + + sub_volumes = alloca (local->marker.call_count * + sizeof (xlator_t *)); + + for (i = 0, trav = this->children; + i < local->marker.call_count; + i++, trav = trav->next) { + *(sub_volumes + i) = trav->xlator; + + } + + if (cluster_getmarkerattr (frame, this, loc, name, + local, + stripe_getxattr_unwind, + sub_volumes, + local->marker.call_count, + MARKER_XTIME_TYPE, + marker_xtime_default_gauge, + priv->vol_uuid)) { + op_errno = EINVAL; + goto err; + } + + return 0; + } + } + + + STACK_WIND (frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); + + return 0; + +err: + STRIPE_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL); + return 0; +} + +inline gf_boolean_t +stripe_is_special_xattr (const char *name) +{ + gf_boolean_t is_spl = _gf_false; + + if (!name) { + goto out; + } + + if (!strncmp (name, GF_XATTR_LOCKINFO_KEY, + strlen (GF_XATTR_LOCKINFO_KEY)) + || !strncmp (name, GF_XATTR_PATHINFO_KEY, + strlen (GF_XATTR_PATHINFO_KEY))) + is_spl = _gf_true; +out: + return is_spl; +} + +int32_t +stripe_fgetxattr_from_everyone (call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + int32_t ret = -1, op_errno = 0; + int i = 0; + xlator_list_t *trav = NULL; + + priv = this->private; + + local = mem_get0 (this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + local->op_ret = -1; + frame->local = local; + + strncpy (local->xsel, name, strlen (name)); + local->nallocs = local->wind_count = priv->child_count; + + for (i = 0, trav = this->children; i < priv->child_count; i++, + trav = trav->next) { + STACK_WIND_COOKIE (frame, stripe_vgetxattr_cbk, + (void *) (long) i, trav->xlator, + trav->xlator->fops->fgetxattr, + fd, name, xdata); + } + + return 0; + +err: + STACK_UNWIND_STRICT (fgetxattr, frame, -1, op_errno, NULL, NULL); + return ret; +} + +int32_t +stripe_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + if (stripe_is_special_xattr (name)) { + stripe_fgetxattr_from_everyone (frame, this, fd, name, xdata); + goto out; + } + + STACK_WIND (frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); + +out: + return 0; +} + + + +int32_t +stripe_priv_dump (xlator_t *this) +{ + char key[GF_DUMP_MAX_BUF_LEN]; + int i = 0; + stripe_private_t *priv = NULL; + int ret = -1; + struct stripe_options *options = NULL; + + GF_VALIDATE_OR_GOTO ("stripe", this, out); + + priv = this->private; + if (!priv) + goto out; + + ret = TRY_LOCK (&priv->lock); + if (ret != 0) + goto out; + + gf_proc_dump_add_section("xlator.cluster.stripe.%s.priv", this->name); + gf_proc_dump_write("child_count","%d", priv->child_count); + + for (i = 0; i < priv->child_count; i++) { + sprintf (key, "subvolumes[%d]", i); + gf_proc_dump_write (key, "%s.%s", priv->xl_array[i]->type, + priv->xl_array[i]->name); + } + + options = priv->pattern; + while (options != NULL) { + gf_proc_dump_write ("path_pattern", "%s", priv->pattern->path_pattern); + gf_proc_dump_write ("options_block_size", "%ul", options->block_size); + + options = options->next; + } + + gf_proc_dump_write ("block_size", "%ul", priv->block_size); + gf_proc_dump_write ("nodes-down", "%d", priv->nodes_down); + gf_proc_dump_write ("first-child_down", "%d", priv->first_child_down); + gf_proc_dump_write ("xattr_supported", "%d", priv->xattr_supported); + + UNLOCK (&priv->lock); + +out: + return ret; +} struct xlator_fops fops = { - .stat = stripe_stat, - .unlink = stripe_unlink, - .rename = stripe_rename, - .link = stripe_link, - .truncate = stripe_truncate, - .create = stripe_create, - .open = stripe_open, - .readv = stripe_readv, - .writev = stripe_writev, - .statfs = stripe_statfs, - .flush = stripe_flush, - .fsync = stripe_fsync, - .ftruncate = stripe_ftruncate, - .fstat = stripe_fstat, - .mkdir = stripe_mkdir, - .rmdir = stripe_rmdir, - .lk = stripe_lk, - .opendir = stripe_opendir, - .fsyncdir = stripe_fsyncdir, - .setattr = stripe_setattr, - .fsetattr = stripe_fsetattr, - .lookup = stripe_lookup, - .mknod = stripe_mknod, + .stat = stripe_stat, + .unlink = stripe_unlink, + .rename = stripe_rename, + .link = stripe_link, + .truncate = stripe_truncate, + .create = stripe_create, + .open = stripe_open, + .readv = stripe_readv, + .writev = stripe_writev, + .statfs = stripe_statfs, + .flush = stripe_flush, + .fsync = stripe_fsync, + .ftruncate = stripe_ftruncate, + .fstat = stripe_fstat, + .mkdir = stripe_mkdir, + .rmdir = stripe_rmdir, + .lk = stripe_lk, + .opendir = stripe_opendir, + .fsyncdir = stripe_fsyncdir, + .setattr = stripe_setattr, + .fsetattr = stripe_fsetattr, + .lookup = stripe_lookup, + .mknod = stripe_mknod, + .setxattr = stripe_setxattr, + .fsetxattr = stripe_fsetxattr, + .getxattr = stripe_getxattr, + .fgetxattr = stripe_fgetxattr, + .removexattr = stripe_removexattr, + .fremovexattr = stripe_fremovexattr, + .readdirp = stripe_readdirp, + .fallocate = stripe_fallocate, + .discard = stripe_discard, + .zerofill = stripe_zerofill, }; struct xlator_cbks cbks = { .release = stripe_release, + .forget = stripe_forget, }; +struct xlator_dumpops dumpops = { + .priv = stripe_priv_dump, +}; struct volume_options options[] = { { .key = {"block-size"}, - .type = GF_OPTION_TYPE_ANY + .type = GF_OPTION_TYPE_SIZE_LIST, + .default_value = "128KB", + .min = STRIPE_MIN_BLOCK_SIZE, + .description = "Size of the stripe unit that would be read " + "from or written to the striped servers." }, { .key = {"use-xattr"}, - .type = GF_OPTION_TYPE_BOOL + .type = GF_OPTION_TYPE_BOOL, + .default_value = "true" }, + { .key = {"coalesce"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "true", + .description = "Enable/Disable coalesce mode to flatten striped " + "files as stored on the server (i.e., eliminate holes " + "caused by the traditional format)." + }, { .key = {NULL} }, }; |
