From d3fd13688f850e58bfa517a4c2d17e8fa4343dcf Mon Sep 17 00:00:00 2001 From: Mohammed Rafi KC Date: Thu, 10 Dec 2015 20:13:22 +0530 Subject: tier:delete the linkfile if data file creation fails If we are creating data file in a hot subvolume then we will create a linkfile in cold subvolume. Linkfile creation happens first. If linkfile creation was successful and data file creation failed, then linkfile in cold subvolume will become stale. This patch will delete the linkfile as well, if data file creation fails. Also this code duplicates dht_create to make tier_create Change-Id: I377a90dad47f288e9576c7323b23cf694a91a7a3 BUG: 1290677 Signed-off-by: Mohammed Rafi KC Reviewed-on: http://review.gluster.org/12948 Reviewed-by: N Balachandran Tested-by: NetBSD Build System Reviewed-by: Raghavendra G Reviewed-by: Dan Lambright Tested-by: Dan Lambright --- libglusterfs/src/glusterfs.h | 1 + xlators/cluster/dht/src/dht-common.c | 81 --------- xlators/cluster/dht/src/tier-common.c | 261 +++++++++++++++++++++++++++++ xlators/cluster/dht/src/tier-common.h | 26 +++ xlators/cluster/dht/src/tier.c | 2 +- xlators/storage/posix/src/posix-messages.h | 2 +- xlators/storage/posix/src/posix.c | 17 ++ 7 files changed, 307 insertions(+), 83 deletions(-) diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index a4b0a3111af..6fda458347a 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -236,6 +236,7 @@ #define DHT_SKIP_NON_LINKTO_UNLINK "unlink-only-if-dht-linkto-file" #define TIER_SKIP_NON_LINKTO_UNLINK "unlink-only-if-tier-linkto-file" +#define TIER_LINKFILE_GFID "tier-linkfile-gfid" #define DHT_SKIP_OPEN_FD_UNLINK "dont-unlink-for-open-fd" #define DHT_IATT_IN_XDATA_KEY "dht-get-iatt-in-xattr" diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 39da4323d8e..eb7ca32632a 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -6038,82 +6038,6 @@ err: return 0; } -gf_boolean_t -dht_is_hot_tier_decommissioned (xlator_t *this) -{ - dht_conf_t *conf = NULL; - xlator_t *hot_tier = NULL; - int i = 0; - - conf = this->private; - hot_tier = conf->subvolumes[1]; - - if (conf->decommission_subvols_cnt) { - for (i = 0; i < conf->subvolume_cnt; i++) { - if (conf->decommissioned_bricks[i] && - conf->decommissioned_bricks[i] == hot_tier) - return _gf_true; - } - } - - return _gf_false; -} - -int -dht_create_tier_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this, - xlator_t *subvol, loc_t *loc, int32_t flags, - mode_t mode, mode_t umask, fd_t *fd, - dict_t *params) -{ - xlator_t *hot_subvol = NULL; - xlator_t *cold_subvol = NULL; - dht_conf_t *conf = NULL; - dht_local_t *local = NULL; - - local = frame->local; - - conf = this->private; - - cold_subvol = subvol; - hot_subvol = conf->subvolumes[1]; - if (conf->subvolumes[0] != cold_subvol) { - hot_subvol = conf->subvolumes[0]; - } - /* - * if hot tier full, write to cold. - * Also if hot tier is full, create in cold - */ - if (dht_is_subvol_filled (this, hot_subvol) || - dht_is_hot_tier_decommissioned (this)) { - gf_msg_debug (this->name, 0, - "creating %s on %s", loc->path, - cold_subvol->name); - - STACK_WIND (frame, dht_create_cbk, - cold_subvol, cold_subvol->fops->create, - loc, flags, mode, umask, fd, params); - } else { - local->params = dict_ref (params); - local->flags = flags; - local->mode = mode; - local->umask = umask; - local->cached_subvol = hot_subvol; - local->hashed_subvol = cold_subvol; - - gf_msg_debug (this->name, 0, - "creating %s on %s (link at %s)", loc->path, - hot_subvol->name, cold_subvol->name); - - dht_linkfile_create (frame, dht_create_linkfile_create_cbk, - this, hot_subvol, cold_subvol, loc); - - goto out; - } -out: - return 0; -} - - int dht_create_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this, xlator_t *subvol, loc_t *loc, int32_t flags, @@ -6125,11 +6049,6 @@ dht_create_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this, local = frame->local; - if (strcmp (this->type, "cluster/tier") == 0) - return dht_create_tier_wind_to_avail_subvol(frame, this, subvol, - loc, flags, mode, - umask, fd, params); - if (!dht_is_subvol_filled (this, subvol)) { gf_msg_debug (this->name, 0, "creating %s on %s", loc->path, diff --git a/xlators/cluster/dht/src/tier-common.c b/xlators/cluster/dht/src/tier-common.c index 19ed224c58c..40c8c9e965d 100644 --- a/xlators/cluster/dht/src/tier-common.c +++ b/xlators/cluster/dht/src/tier-common.c @@ -16,6 +16,267 @@ #include "tier-common.h" #include "tier.h" +int +tier_create_unlink_stale_linkto_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + + dht_local_t *local = NULL; + + local = frame->local; + + if (local->params) { + dict_del (local->params, GLUSTERFS_INTERNAL_FOP_KEY); + } + + DHT_STACK_UNWIND (create, frame, -1, local->op_errno, + NULL, NULL, NULL, NULL, NULL, NULL); + + return 0; +} + +int +tier_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + fd_t *fd, inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) +{ + call_frame_t *prev = NULL; + int ret = -1; + dht_local_t *local = NULL; + xlator_t *hashed_subvol = NULL; + dht_conf_t *conf = NULL; + + local = frame->local; + conf = this->private; + + hashed_subvol = TIER_HASHED_SUBVOL; + + if (!local) { + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + if (op_ret == -1) { + if (local->linked == _gf_true) { + local->op_errno = op_errno; + local->op_ret = op_ret; + ret = dht_fill_dict_to_avoid_unlink_of_migrating_file + (local->params); + if (ret) { + gf_msg (this->name, GF_LOG_WARNING, 0, + DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value to " + "unlink of migrating file"); + goto out; + } + + STACK_WIND (frame, + tier_create_unlink_stale_linkto_cbk, + hashed_subvol, + hashed_subvol->fops->unlink, + &local->loc, 0, local->params); + return 0; + } + goto out; + } + + prev = cookie; + + if (local->loc.parent) { + dht_inode_ctx_time_update (local->loc.parent, this, + preparent, 0); + + dht_inode_ctx_time_update (local->loc.parent, this, + postparent, 1); + } + + ret = dht_layout_preset (this, prev->this, inode); + if (ret != 0) { + gf_msg_debug (this->name, 0, + "could not set preset layout for subvol %s", + prev->this->name); + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + local->op_errno = op_errno; + + if (local->linked == _gf_true) { + local->stbuf = *stbuf; + dht_linkfile_attr_heal (frame, this); + } +out: + if (local->params) { + dict_del (local->params, TIER_LINKFILE_GFID); + } + + DHT_STRIP_PHASE1_FLAGS (stbuf); + + DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, + stbuf, preparent, postparent, xdata); + + return 0; +} + +int +tier_create_linkfile_create_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + dht_local_t *local = NULL; + xlator_t *cached_subvol = NULL; + dht_conf_t *conf = NULL; + int ret = -1; + + local = frame->local; + if (!local) { + op_errno = EINVAL; + goto err; + } + + if (op_ret == -1) { + local->op_errno = op_errno; + goto err; + } + + conf = this->private; + if (!conf) { + local->op_errno = EINVAL; + op_errno = EINVAL; + goto err; + } + + cached_subvol = TIER_UNHASHED_SUBVOL; + + if (local->params) { + dict_del (local->params, conf->link_xattr_name); + dict_del (local->params, GLUSTERFS_INTERNAL_FOP_KEY); + ret = dict_set_static_bin (local->params, TIER_LINKFILE_GFID, + stbuf->ia_gfid, 16); + if (ret) { + gf_msg (this->name, GF_LOG_WARNING, 0, + DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value" + " : key = %s", TIER_LINKFILE_GFID); + } + + } + + STACK_WIND (frame, tier_create_cbk, + cached_subvol, cached_subvol->fops->create, + &local->loc, local->flags, local->mode, + local->umask, local->fd, local->params); + + return 0; +err: + DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, + NULL, NULL, NULL); + return 0; +} + +gf_boolean_t +tier_is_hot_tier_decommissioned (xlator_t *this) +{ + dht_conf_t *conf = NULL; + xlator_t *hot_tier = NULL; + int i = 0; + + conf = this->private; + hot_tier = conf->subvolumes[1]; + + if (conf->decommission_subvols_cnt) { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->decommissioned_bricks[i] && + conf->decommissioned_bricks[i] == hot_tier) + return _gf_true; + } + } + + return _gf_false; +} + +int +tier_create (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, mode_t mode, + mode_t umask, fd_t *fd, dict_t *params) +{ + int op_errno = -1; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + xlator_t *hot_subvol = NULL; + xlator_t *cold_subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + + conf = this->private; + + dht_get_du_info (frame, this, loc); + + local = dht_local_init (frame, loc, fd, GF_FOP_CREATE); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + + cold_subvol = TIER_HASHED_SUBVOL; + hot_subvol = TIER_UNHASHED_SUBVOL; + + if (conf->subvolumes[0] != cold_subvol) { + hot_subvol = conf->subvolumes[0]; + } + /* + * if hot tier full, write to cold. + * Also if hot tier is full, create in cold + */ + if (dht_is_subvol_filled (this, hot_subvol) || + tier_is_hot_tier_decommissioned (this)) { + gf_msg_debug (this->name, 0, + "creating %s on %s", loc->path, + cold_subvol->name); + + STACK_WIND (frame, tier_create_cbk, + cold_subvol, cold_subvol->fops->create, + loc, flags, mode, umask, fd, params); + } else { + local->params = dict_ref (params); + local->flags = flags; + local->mode = mode; + local->umask = umask; + local->cached_subvol = hot_subvol; + local->hashed_subvol = cold_subvol; + + gf_msg_debug (this->name, 0, + "creating %s on %s (link at %s)", loc->path, + hot_subvol->name, cold_subvol->name); + + dht_linkfile_create (frame, tier_create_linkfile_create_cbk, + this, hot_subvol, cold_subvol, loc); + + goto out; + } +out: + return 0; + +err: + + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, + NULL, NULL, NULL); + + return 0; +} + int tier_unlink_nonhashed_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, diff --git a/xlators/cluster/dht/src/tier-common.h b/xlators/cluster/dht/src/tier-common.h index 81a0c96411d..17cddcdd3e4 100644 --- a/xlators/cluster/dht/src/tier-common.h +++ b/xlators/cluster/dht/src/tier-common.h @@ -11,6 +11,32 @@ #ifndef _TIER_COMMON_H_ #define _TIER_COMMON_H_ /* Function definitions */ +int +tier_create_unlink_stale_linkto_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + struct iatt *preparent, + struct iatt *postparent, dict_t *xdata); + +int +tier_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + fd_t *fd, inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata); + +int +tier_create_linkfile_create_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, + struct iatt *postparent, + dict_t *xdata); + +int +tier_create (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, mode_t mode, + mode_t umask, fd_t *fd, dict_t *params); int32_t tier_unlink (call_frame_t *frame, xlator_t *this, diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c index 2f415c4dbc2..452b491ffbe 100644 --- a/xlators/cluster/dht/src/tier.c +++ b/xlators/cluster/dht/src/tier.c @@ -2020,7 +2020,7 @@ class_methods_t class_methods = { struct xlator_fops fops = { .lookup = dht_lookup, - .create = dht_create, + .create = tier_create, .mknod = dht_mknod, .open = dht_open, diff --git a/xlators/storage/posix/src/posix-messages.h b/xlators/storage/posix/src/posix-messages.h index 9f267dfe0d9..961a706cc36 100644 --- a/xlators/storage/posix/src/posix-messages.h +++ b/xlators/storage/posix/src/posix-messages.h @@ -45,7 +45,7 @@ */ #define POSIX_COMP_BASE GLFS_MSGID_COMP_POSIX -#define GLFS_NUM_MESSAGES 104 +#define GLFS_NUM_MESSAGES 105 #define GLFS_MSGID_END (POSIX_COMP_BASE + GLFS_NUM_MESSAGES + 1) /* Messaged with message IDs */ #define glfs_msg_start_x POSIX_COMP_BASE, "Invalid: Start of messages" diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index 0b5a5097afb..ffae66db273 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -1664,6 +1664,9 @@ posix_unlink (call_frame_t *frame, xlator_t *this, ssize_t xattr_size = -1; int32_t is_dht_linkto_file = 0; dict_t *unwind_dict = NULL; + void *uuid = NULL; + char uuid_str[GF_UUID_BUF_SIZE] = {0}; + char gfid_str[GF_UUID_BUF_SIZE] = {0}; DECLARE_OLD_FS_ID_VAR; @@ -1690,6 +1693,20 @@ posix_unlink (call_frame_t *frame, xlator_t *this, priv = this->private; + op_ret = dict_get_ptr (xdata, TIER_LINKFILE_GFID, &uuid); + + if (!op_ret && gf_uuid_compare (uuid, stbuf.ia_gfid)) { + op_errno = ENOENT; + op_ret = -1; + gf_uuid_unparse (uuid, uuid_str); + gf_uuid_unparse (stbuf.ia_gfid, gfid_str); + gf_msg_debug (this->name, op_errno, "Mismatch in gfid for path " + "%s. Aborting the unlink. loc->gfid = %s, " + "stbuf->ia_gfid = %s", real_path, + uuid_str, gfid_str); + goto out; + } + op_ret = dict_get_int32 (xdata, DHT_SKIP_OPEN_FD_UNLINK, &check_open_fd); -- cgit