From b8c56ebe617327d570c252f8c411c85a84e727e6 Mon Sep 17 00:00:00 2001 From: Dan Lambright Date: Wed, 4 Nov 2015 15:33:22 -0500 Subject: cluster/tier: readdirp to cold tier only It is possible a file would get migrated in the middle of a readdir operation. If there are four subvolumes A,B,C,D, and if readdir reads them in order and reaches subvol B, then, if a file is moved from D to A, it will not be included in the readdir output. This phenonema has pre-existed in DHT migration but is more apparent in tiering. When a file is moved off the hashed subvolume a T file is created. For tiering, we will make the cold subvolume the hashed subvolume. This will ensure the creation of a T file. Readdir will not skip T files in the tier translator. Making the cold subvolume the hashed subvolume ensures the T files created on promotions or creates will be less likely to fill the volume. Creates still put the data on the hot subvolume. This is a backport of 12530 > Change-Id: Ifde557d3d0e94a4570ca9f115adee3db2ee75407 > BUG: 1281598 > Signed-off-by: Dan Lambright > Reviewed-on: http://review.gluster.org/12530 > Tested-by: Gluster Build System > Tested-by: NetBSD Build System > Reviewed-by: N Balachandran > Reviewed-by: Raghavendra G Signed-off-by: Dan Lambright Signed-off-by: Dan Lambright Conflicts: xlators/cluster/dht/src/tier.c Change-Id: I5720a4cd04ae5088e5d7d23439b0f90d6bbc6265 BUG: 1283923 Reviewed-on: http://review.gluster.org/12722 Tested-by: NetBSD Build System Reviewed-by: N Balachandran Tested-by: Gluster Build System Reviewed-by: Dan Lambright Tested-by: Dan Lambright --- xlators/cluster/dht/src/dht-common.c | 189 +++++++++++++++++++++++------------ 1 file changed, 124 insertions(+), 65 deletions(-) (limited to 'xlators/cluster/dht/src/dht-common.c') diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 3228f20e53e..e664b576f92 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -187,7 +187,7 @@ dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie, { dht_local_t *local = NULL; dht_layout_t *layout = NULL; - int ret = -1; + int ret = -1; GF_VALIDATE_OR_GOTO ("dht", frame, out); GF_VALIDATE_OR_GOTO ("dht", this, out); @@ -787,13 +787,13 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if ((op_errno != ENOTCONN) && (op_errno != ENOENT) && (op_errno != ESTALE)) { - gf_msg (this->name, GF_LOG_INFO, op_errno, + gf_msg (this->name, GF_LOG_INFO, op_errno, DHT_MSG_REVALIDATE_CBK_INFO, - "Revalidate: subvolume %s for %s " + "Revalidate: subvolume %s for %s " "(gfid = %s) returned -1", - prev->this->name, local->loc.path, + prev->this->name, local->loc.path, gfid); - } + } if (op_errno == ESTALE) { /* propagate the ESTALE to parent. * setting local->return_estale would send @@ -940,7 +940,7 @@ out: } } cont: - if (local->layout_mismatch) { + if (local->layout_mismatch) { /* Found layout mismatch in the directory, need to fix this in the inode context */ dht_layout_unref (this, local->layout); @@ -2336,18 +2336,18 @@ dht_lookup (call_frame_t *frame, xlator_t *this, /* need it for dir self-heal */ dht_check_and_set_acl_xattr_req (loc->inode, local->xattr_req); - for (i = 0; i < call_cnt; i++) { - subvol = layout->list[i].xlator; + for (i = 0; i < call_cnt; i++) { + subvol = layout->list[i].xlator; gf_msg_debug (this->name, 0, "calling " "revalidate lookup for %s at %s", loc->path, subvol->name); - STACK_WIND (frame, dht_revalidate_cbk, - subvol, subvol->fops->lookup, - &local->loc, local->xattr_req); + STACK_WIND (frame, dht_revalidate_cbk, + subvol, subvol->fops->lookup, + &local->loc, local->xattr_req); - } + } } else { do_fresh_lookup: /* TODO: remove the hard-coding */ @@ -2831,8 +2831,8 @@ dht_vgetxattr_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, gf_msg (this->name, GF_LOG_ERROR, op_errno, DHT_MSG_GET_XATTR_FAILED, "getxattr err for dir"); - local->op_ret = -1; - local->op_errno = op_errno; + local->op_ret = -1; + local->op_errno = op_errno; } goto unlock; @@ -3011,8 +3011,8 @@ dht_getxattr_unwind (call_frame_t *frame, int dht_getxattr_get_real_filename_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, - dict_t *xattr, dict_t *xdata) + xlator_t *this, int op_ret, int op_errno, + dict_t *xattr, dict_t *xdata) { int this_call_cnt = 0; dht_local_t *local = NULL; @@ -3104,44 +3104,44 @@ unlock: UNLOCK (&frame->lock); - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { DHT_STACK_UNWIND (getxattr, frame, local->op_ret, local->op_errno, local->xattr, local->xattr_req); - } + } - return 0; + return 0; } int dht_getxattr_get_real_filename (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *key, dict_t *xdata) + loc_t *loc, const char *key, dict_t *xdata) { - dht_local_t *local = NULL; - int i = 0; - dht_layout_t *layout = NULL; - int cnt = 0; - xlator_t *subvol = NULL; + dht_local_t *local = NULL; + int i = 0; + dht_layout_t *layout = NULL; + int cnt = 0; + xlator_t *subvol = NULL; - local = frame->local; - layout = local->layout; + local = frame->local; + layout = local->layout; - cnt = local->call_cnt = layout->cnt; + cnt = local->call_cnt = layout->cnt; - local->op_ret = -1; - local->op_errno = ENOENT; + local->op_ret = -1; + local->op_errno = ENOENT; - for (i = 0; i < cnt; i++) { - subvol = layout->list[i].xlator; - STACK_WIND (frame, dht_getxattr_get_real_filename_cbk, - subvol, subvol->fops->getxattr, - loc, key, xdata); - } + for (i = 0; i < cnt; i++) { + subvol = layout->list[i].xlator; + STACK_WIND (frame, dht_getxattr_get_real_filename_cbk, + subvol, subvol->fops->getxattr, + loc, key, xdata); + } - return 0; + return 0; } int @@ -3210,13 +3210,13 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, } } - if (key && - (strncmp (key, GF_XATTR_GET_REAL_FILENAME_KEY, - strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0) - && DHT_IS_DIR(layout)) { - dht_getxattr_get_real_filename (frame, this, loc, key, xdata); - return 0; - } + if (key && + (strncmp (key, GF_XATTR_GET_REAL_FILENAME_KEY, + strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0) + && DHT_IS_DIR(layout)) { + dht_getxattr_get_real_filename (frame, this, loc, key, xdata); + return 0; + } if (key && DHT_IS_DIR(layout) && (!strcmp (key, GF_REBAL_FIND_LOCAL_SUBVOL))) { @@ -3394,7 +3394,7 @@ dht_fgetxattr (call_frame_t *frame, xlator_t *this, } if ((fd->inode->ia_type == IA_IFDIR) - && key + && key && (strncmp (key, GF_XATTR_LOCKINFO_KEY, strlen (GF_XATTR_LOCKINFO_KEY)) != 0)) { cnt = local->call_cnt = layout->cnt; @@ -4563,6 +4563,7 @@ err: return 0; } + int dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, gf_dirent_t *orig_entries, dict_t *xdata) @@ -4753,17 +4754,17 @@ done: goto unwind; } - if (conf->readdir_optimize == _gf_true) { + if (conf->readdir_optimize == _gf_true) { if (next_subvol != local->first_up_subvol) { ret = dict_set_int32 (local->xattr, GF_READDIR_SKIP_DIRS, 1); if (ret) gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value" + "Failed to set dictionary value" ":key = %s", GF_READDIR_SKIP_DIRS ); - } else { + } else { dict_del (local->xattr, GF_READDIR_SKIP_DIRS); } @@ -4940,23 +4941,23 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, " : key = %s", conf->link_xattr_name); - if (conf->readdir_optimize == _gf_true) { + if (conf->readdir_optimize == _gf_true) { if (xvol != local->first_up_subvol) { - ret = dict_set_int32 (local->xattr, - GF_READDIR_SKIP_DIRS, 1); - if (ret) - gf_msg (this->name, + ret = dict_set_int32 (local->xattr, + GF_READDIR_SKIP_DIRS, 1); + if (ret) + gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, "Failed to set " "dictionary value: " "key = %s", - GF_READDIR_SKIP_DIRS); + GF_READDIR_SKIP_DIRS); } else { dict_del (local->xattr, GF_READDIR_SKIP_DIRS); } - } + } } STACK_WIND (frame, dht_readdirp_cbk, xvol, xvol->fops->readdirp, @@ -6042,6 +6043,58 @@ err: return 0; } +int +dht_create_tier_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this, + xlator_t *subvol, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, + dict_t *params) +{ + xlator_t *hot_subvol = NULL; + xlator_t *cold_subvol = NULL; + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + + local = frame->local; + + conf = this->private; + + cold_subvol = subvol; + hot_subvol = conf->subvolumes[1]; + if (conf->subvolumes[0] != cold_subvol) { + hot_subvol = conf->subvolumes[0]; + } + + /* if hot tier full, write to cold */ + if (dht_is_subvol_filled (this, hot_subvol)) { + gf_msg_debug (this->name, 0, + "creating %s on %s", loc->path, + cold_subvol->name); + + STACK_WIND (frame, dht_create_cbk, + cold_subvol, cold_subvol->fops->create, + loc, flags, mode, umask, fd, params); + } else { + local->params = dict_ref (params); + local->flags = flags; + local->mode = mode; + local->umask = umask; + local->cached_subvol = hot_subvol; + local->hashed_subvol = cold_subvol; + + gf_msg_debug (this->name, 0, + "creating %s on %s (link at %s)", loc->path, + hot_subvol->name, cold_subvol->name); + + dht_linkfile_create (frame, dht_create_linkfile_create_cbk, + this, hot_subvol, cold_subvol, loc); + + goto out; + } +out: + return 0; +} + + int dht_create_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this, xlator_t *subvol, loc_t *loc, int32_t flags, @@ -6053,6 +6106,11 @@ dht_create_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this, local = frame->local; + if (strcmp (this->type, "cluster/tier") == 0) + return dht_create_tier_wind_to_avail_subvol(frame, this, subvol, + loc, flags, mode, + umask, fd, params); + if (!dht_is_subvol_filled (this, subvol)) { gf_msg_debug (this->name, 0, "creating %s on %s", loc->path, @@ -6525,15 +6583,15 @@ dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ret = dht_layout_merge (this, layout, prev->this, -1, ENOSPC, NULL); } else { - if (op_ret == -1 && op_errno == EEXIST) { - /* Very likely just a race between mkdir and - self-heal (from lookup of a concurrent mkdir - attempt). - Ignore error for now. layout setting will - anyways fail if this was a different (old) - pre-existing different directory. - */ - op_ret = 0; + if (op_ret == -1 && op_errno == EEXIST) { + /* Very likely just a race between mkdir and + self-heal (from lookup of a concurrent mkdir + attempt). + Ignore error for now. layout setting will + anyways fail if this was a different (old) + pre-existing different directory. + */ + op_ret = 0; dir_exists = _gf_true; } ret = dht_layout_merge (this, layout, prev->this, @@ -8041,3 +8099,4 @@ int32_t dht_set_local_rebalance (xlator_t *this, dht_local_t *local, return 0; } + -- cgit