From 85a389cfced2c299faa37b6f930c2cd0ca8c72b9 Mon Sep 17 00:00:00 2001 From: N Balachandran Date: Fri, 14 Jul 2017 15:18:19 +0530 Subject: cluster/dht: Add migration checks to dht_(f)xattrop The dht_(f)xattrop implementation did not implement migration phase1/phase2 checks which could cause issues with rebalance on sharded volumes. This does not solve the issue where fops may reach the target out of order. Change-Id: I2416fc35115e60659e35b4b717fd51f20746586c BUG: 1471031 Signed-off-by: N Balachandran --- xlators/cluster/dht/src/dht-common.c | 47 ++++++ xlators/cluster/dht/src/dht-common.h | 9 ++ xlators/cluster/dht/src/dht-helper.c | 3 + xlators/cluster/dht/src/dht-inode-read.c | 241 ++++++++++++++++++++++++++++--- xlators/cluster/dht/src/dht-rebalance.c | 73 ++++++---- 5 files changed, 326 insertions(+), 47 deletions(-) (limited to 'xlators/cluster') diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 2bee8bb1e65..4a16714177a 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -45,6 +45,11 @@ int dht_rmdir_readdirp_do (call_frame_t *readdirp_frame, xlator_t *this); +int +dht_common_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata); + /* Sets the blocks and size values to fixed values. This is to be called * only for dirs. The caller is responsible for checking the type @@ -60,6 +65,48 @@ int32_t dht_set_fixed_dir_stat (struct iatt *stat) } +/* Set both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY + * Use DHT_MODE_IN_XDATA_KEY if available. Else fall back to + * DHT_IATT_IN_XDATA_KEY + */ +int dht_request_iatt_in_xdata (xlator_t *this, dict_t *xattr_req) +{ + int ret = -1; + + ret = dict_set_int8 (xattr_req, DHT_MODE_IN_XDATA_KEY, 1); + ret = dict_set_int8 (xattr_req, DHT_IATT_IN_XDATA_KEY, 1); + + /* At least one call succeeded */ + return ret; +} + + +/* Get both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY + * Use DHT_MODE_IN_XDATA_KEY if available, else fall back to + * DHT_IATT_IN_XDATA_KEY + * This will return a dummy iatt with only the mode and type set + */ +int dht_read_iatt_from_xdata (xlator_t *this, dict_t *xdata, + struct iatt *stbuf) +{ + int ret = -1; + int32_t mode = 0; + + ret = dict_get_int32 (xdata, DHT_MODE_IN_XDATA_KEY, &mode); + + if (ret) { + ret = dict_get_bin (xdata, DHT_IATT_IN_XDATA_KEY, + (void **)&stbuf); + } else { + stbuf->ia_prot = ia_prot_from_st_mode (mode); + stbuf->ia_type = ia_type_from_st_mode (mode); + } + + return ret; +} + + + int dht_rmdir_unlock (call_frame_t *frame, xlator_t *this); diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index cc7d41b3bfa..577970835ae 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -149,6 +149,7 @@ struct dht_rebalance_ { dht_defrag_cbk_fn_t target_op_fn; dict_t *xdata; dict_t *xattr; + dict_t *dict; int32_t set; struct gf_flock flock; int lock_cmd; @@ -1466,4 +1467,12 @@ int dht_selfheal_dir_setattr (call_frame_t *frame, loc_t *loc, struct iatt *stbuf, int32_t valid, dht_layout_t *layout); +/* Abstract out the DHT-IATT-IN-DICT */ + + +int dht_request_iatt_in_xdata (xlator_t *this, dict_t *xattr_req); + +int dht_read_iatt_from_xdata (xlator_t *this, dict_t *xdata, + struct iatt *stbuf); + #endif/* _DHT_H */ diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index 0c249f0e84c..381856f2455 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -801,6 +801,9 @@ dht_local_wipe (xlator_t *this, dht_local_t *local) if (local->rebalance.xattr) dict_unref (local->rebalance.xattr); + if (local->rebalance.dict) + dict_unref (local->rebalance.dict); + GF_FREE (local->rebalance.vector); if (local->rebalance.iobref) diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c index a9e47664a81..fa63fefb903 100644 --- a/xlators/cluster/dht/src/dht-inode-read.c +++ b/xlators/cluster/dht/src/dht-inode-read.c @@ -24,8 +24,9 @@ int dht_lk2 (xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret); int dht_fsync2 (xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret); - - +int +dht_common_xattrop2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, + int ret); int dht_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -1246,13 +1247,163 @@ err: return 0; } -/* Currently no translators on top of 'distribute' will be using - * below fops, hence not implementing 'migration' related checks - */ + +int +dht_common_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + dht_local_t *local = NULL; + call_frame_t *call_frame = NULL; + xlator_t *prev = NULL; + xlator_t *src_subvol = NULL; + xlator_t *dst_subvol = NULL; + struct iatt stbuf = {0,}; + int ret = -1; + inode_t *inode = NULL; + + local = frame->local; + call_frame = cookie; + prev = call_frame->this; + + local->op_errno = op_errno; + + if ((op_ret == -1) && !dht_inode_missing (op_errno)) { + gf_msg_debug (this->name, op_errno, + "subvolume %s returned -1.", + prev->name); + goto out; + } + + if (local->call_cnt != 1) + goto out; + + ret = dht_read_iatt_from_xdata (this, xdata, &stbuf); + + if ((!op_ret) && (ret)) { + /* This is a potential problem and can cause corruption + * with sharding. + * Oh well. We tried. + */ + goto out; + } + + local->op_ret = op_ret; + local->rebalance.target_op_fn = dht_common_xattrop2; + if (xdata) + local->rebalance.xdata = dict_ref (xdata); + + if (dict) + local->rebalance.dict = dict_ref (dict); + + /* Phase 2 of migration */ + if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (&stbuf)) { + ret = dht_rebalance_complete_check (this, frame); + if (!ret) + return 0; + } + + /* Check if the rebalance phase1 is true */ + if (IS_DHT_MIGRATION_PHASE1 (&stbuf)) { + + inode = local->loc.inode ? local->loc.inode : local->fd->inode; + dht_inode_ctx_get_mig_info (this, inode, &src_subvol, + &dst_subvol); + + if (dht_mig_info_is_invalid (local->cached_subvol, src_subvol, + dst_subvol) || + !dht_fd_open_on_dst (this, local->fd, dst_subvol)) { + + ret = dht_rebalance_in_progress_check (this, frame); + if (!ret) + return 0; + } else { + dht_common_xattrop2 (this, dst_subvol, frame, 0); + return 0; + } + } + + +out: + if (local->fop == GF_FOP_XATTROP) { + DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, + dict, xdata); + } else { + DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, + dict, xdata); + } + + return 0; +} + + +int +dht_common_xattrop2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, + int ret) +{ + dht_local_t *local = NULL; + int32_t op_errno = EINVAL; + + if ((frame == NULL) || (frame->local == NULL)) + goto out; + + local = frame->local; + op_errno = local->op_errno; + + if (we_are_not_migrating (ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + if (local->fop == GF_FOP_XATTROP) { + DHT_STACK_UNWIND (xattrop, frame, local->op_ret, + op_errno, local->rebalance.dict, + local->rebalance.xdata); + } else { + DHT_STACK_UNWIND (fxattrop, frame, local->op_ret, + op_errno, local->rebalance.dict, + local->rebalance.xdata); + } + + return 0; + } + + if (subvol == NULL) + goto out; + + local->call_cnt = 2; /* This is the second attempt */ + + if (local->fop == GF_FOP_XATTROP) { + STACK_WIND (frame, dht_common_xattrop_cbk, subvol, + subvol->fops->xattrop, &local->loc, + local->rebalance.flags, local->rebalance.xattr, + local->xattr_req); + } else { + STACK_WIND (frame, dht_common_xattrop_cbk, subvol, + subvol->fops->fxattrop, local->fd, + local->rebalance.flags, local->rebalance.xattr, + local->xattr_req); + } + + return 0; + +out: + + /* If local is unavailable we could be unwinding the wrong + * function here */ + + if (local && (local->fop == GF_FOP_XATTROP)) { + DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL); + } else { + DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL); + } + return 0; +} + int dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) { DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict, xdata); return 0; @@ -1263,9 +1414,10 @@ int dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) { - xlator_t *subvol = NULL; + xlator_t *subvol = NULL; int op_errno = -1; - dht_local_t *local = NULL; + dht_local_t *local = NULL; + int ret = -1; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -1287,11 +1439,33 @@ dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, goto err; } - local->call_cnt = 1; + /* Todo : Handle dirs as well. At the moment the only xlator above dht + * that uses xattrop is sharding and that is only for files */ + + if (IA_ISDIR (loc->inode->ia_type)) { + STACK_WIND (frame, dht_xattrop_cbk, subvol, + subvol->fops->xattrop, loc, flags, dict, xdata); + + } else { + local->xattr_req = xdata ? dict_ref(xdata) : dict_new (); + local->call_cnt = 1; - STACK_WIND (frame, dht_xattrop_cbk, - subvol, subvol->fops->xattrop, - loc, flags, dict, xdata); + local->rebalance.xattr = dict_ref (dict); + local->rebalance.flags = flags; + + ret = dht_request_iatt_in_xdata (this, local->xattr_req); + + if (ret) { + gf_msg_debug (this->name, 0, + "Failed to set dictionary key %s file=%s", + DHT_IATT_IN_XDATA_KEY, loc->path); + } + + STACK_WIND (frame, dht_common_xattrop_cbk, subvol, + subvol->fops->xattrop, loc, + local->rebalance.flags, local->rebalance.xattr, + local->xattr_req); + } return 0; @@ -1318,6 +1492,8 @@ dht_fxattrop (call_frame_t *frame, xlator_t *this, { xlator_t *subvol = NULL; int op_errno = -1; + dht_local_t *local = NULL; + int ret = -1; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -1331,10 +1507,39 @@ dht_fxattrop (call_frame_t *frame, xlator_t *this, goto err; } - STACK_WIND (frame, - dht_fxattrop_cbk, - subvol, subvol->fops->fxattrop, - fd, flags, dict, xdata); + local = dht_local_init (frame, NULL, fd, GF_FOP_FXATTROP); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + /* Todo : Handle dirs as well. At the moment the only xlator above dht + * that uses xattrop is sharding and that is only for files */ + + if (IA_ISDIR (fd->inode->ia_type)) { + STACK_WIND (frame, dht_fxattrop_cbk, subvol, + subvol->fops->fxattrop, fd, flags, dict, xdata); + + } else { + local->xattr_req = xdata ? dict_ref(xdata) : dict_new (); + local->call_cnt = 1; + + local->rebalance.xattr = dict_ref (dict); + local->rebalance.flags = flags; + + ret = dht_request_iatt_in_xdata (this, local->xattr_req); + + if (ret) { + gf_msg_debug (this->name, 0, + "Failed to set dictionary key %s fd=%p", + DHT_IATT_IN_XDATA_KEY, fd); + } + + STACK_WIND (frame, dht_common_xattrop_cbk, subvol, + subvol->fops->fxattrop, fd, + local->rebalance.flags, local->rebalance.xattr, + local->xattr_req); + } return 0; @@ -1345,6 +1550,9 @@ err: return 0; } +/* Currently no translators on top of 'distribute' will be using + * below fops, hence not implementing 'migration' related checks + */ int dht_inodelk_cbk (call_frame_t *frame, void *cookie, @@ -1406,7 +1614,6 @@ dht_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) { - dht_lk_inode_unref (frame, op_ret); DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata); return 0; diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index c1c8a8a6ecf..23383c528bc 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -175,7 +175,7 @@ dht_strip_out_acls (dict_t *dict) { if (dict) { dict_del (dict, "trusted.SGI_ACL_FILE"); - dict_del (dict, "POSIX_ACL_ACCESS_XATTR"); + dict_del (dict, POSIX_ACL_ACCESS_XATTR); } } @@ -672,7 +672,7 @@ out: static int __dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from, loc_t *loc, struct iatt *stbuf, fd_t **dst_fd, - dict_t *xattr, int *fop_errno) + int *fop_errno) { int ret = -1; fd_t *fd = NULL; @@ -817,16 +817,6 @@ __dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from, goto out; } - ret = syncop_fsetxattr (to, fd, xattr, 0, NULL, NULL); - if (ret < 0) { - *fop_errno = -ret; - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to set xattr on %s", - loc->path, to->name); - - } - ret = syncop_fsetattr (to, fd, stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL, NULL, NULL, NULL); @@ -1650,24 +1640,9 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, goto out; } - - /* TODO: move all xattr related operations to fd based operations */ - ret = syncop_listxattr (from, loc, &xattr, NULL, NULL); - if (ret < 0) { - *fop_errno = -ret; - gf_msg (this->name, GF_LOG_WARNING, *fop_errno, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: failed to get xattr from %s", - loc->path, from->name); - } - - /* Copying posix acls to the linkto file messes up the permissions*/ - dht_strip_out_acls (xattr); - /* create the destination, with required modes/xattr */ ret = __dht_rebalance_create_dst_file (this, to, from, loc, &stbuf, - &dst_fd, xattr, fop_errno); + &dst_fd, fop_errno); if (ret) { gf_msg (this->name, GF_LOG_ERROR, 0, 0, "Create dst failed" " on - %s for file - %s", to->name, loc->path); @@ -1712,7 +1687,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, * as in case of failure the linkto needs to point to the source * subvol */ ret = __dht_rebalance_create_dst_file (this, to, from, loc, &stbuf, - &dst_fd, xattr, fop_errno); + &dst_fd, fop_errno); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Create dst failed" " on - %s for file - %s", to->name, loc->path); @@ -1738,6 +1713,42 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, loc->path, from->name); goto out; } + + /* TODO: move all xattr related operations to fd based operations */ + ret = syncop_listxattr (from, loc, &xattr, NULL, NULL); + if (ret < 0) { + *fop_errno = -ret; + gf_msg (this->name, GF_LOG_WARNING, *fop_errno, + DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: failed to get xattr from %s", + loc->path, from->name); + ret = -1; + goto out; + } + + /* Copying posix acls to the linkto file messes up the permissions*/ + dht_strip_out_acls (xattr); + + /* Remove the linkto xattr as we don't want to overwrite the value + * set on the dst. + */ + dict_del (xattr, conf->link_xattr_name); + + /* We need to error out if this fails as having the wrong shard xattrs + * set on the dst could cause data corruption + */ + ret = syncop_fsetxattr (to, dst_fd, xattr, 0, NULL, NULL); + if (ret < 0) { + *fop_errno = -ret; + gf_msg (this->name, GF_LOG_WARNING, -ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to set xattr on %s", + loc->path, to->name); + ret = -1; + goto out; + } + if (xattr_rsp) { /* we no more require this key */ dict_del (dict, conf->link_xattr_name); @@ -2029,7 +2040,9 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, xattr = NULL; } - ret = syncop_listxattr (from, loc, &xattr, NULL, NULL); + /* Set only the Posix ACLs this time */ + ret = syncop_getxattr (from, loc, &xattr, POSIX_ACL_ACCESS_XATTR, + NULL, NULL); if (ret < 0) { gf_msg (this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED, -- cgit