diff options
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 559 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 20 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 97 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix.c | 123 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix.h | 10 |
5 files changed, 733 insertions, 76 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 5f7996a9ad6..c5105d27b91 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -442,7 +442,8 @@ dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, is_dir = check_is_dir (inode, stbuf, xattr); if (!is_dir) { gf_log (this->name, GF_LOG_DEBUG, - "lookup of %s on %s returned non dir 0%o", + "lookup of %s on %s returned non dir 0%o " + "calling lookup_everywhere", local->loc.path, prev->this->name, stbuf->ia_type); local->need_selfheal = 1; @@ -541,6 +542,12 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&frame->lock); { + + gf_log (this->name, GF_LOG_DEBUG, + "revalidate lookup of %s " + "returned with op_ret %d and op_errno %d", + local->loc.path, op_ret, op_errno); + if (op_ret == -1) { local->op_errno = op_errno; @@ -564,6 +571,14 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, * the file is not migrated */ if (op_errno == ENOENT) { if (IA_ISREG (local->loc.inode->ia_type)) { + + gf_log (this->name, GF_LOG_DEBUG, + "found ENOENT for %s. " + "Setting " + "need_lookup_everywhere" + " flag to 1", + local->loc.path); + local->need_lookup_everywhere = 1; } } @@ -760,9 +775,16 @@ dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie, } unwind: + gf_log (this->name, GF_LOG_DEBUG, + "creation of linkto on hashed subvol:%s, " + "returned with op_ret %d and op_errno %d: %s", + local->hashed_subvol->name, + op_ret, op_errno, uuid_utoa (local->loc.gfid)); + if (local->linked == _gf_true) dht_linkfile_attr_heal (frame, this); + DHT_STRIP_PHASE1_FLAGS (&local->stbuf); DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, local->inode, &local->stbuf, local->xattr, @@ -771,6 +793,176 @@ out: return ret; } +int +dht_lookup_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + int this_call_cnt = 0; + dht_local_t *local = NULL; + const char *path = NULL; + + local = (dht_local_t*)frame->local; + path = local->loc.path; + + gf_log (this->name, GF_LOG_INFO, "lookup_unlink returned with " + "op_ret -> %d and op-errno -> %d for %s", op_ret, op_errno, + ((path == NULL)? "null" : path )); + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + dht_lookup_everywhere_done (frame, this); + } + + return 0; +} + +int +dht_lookup_unlink_of_false_linkto_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + int this_call_cnt = 0; + dht_local_t *local = NULL; + const char *path = NULL; + + local = (dht_local_t*)frame->local; + path = local->loc.path; + + gf_log (this->name, GF_LOG_INFO, "lookup_unlink returned with " + "op_ret -> %d and op-errno -> %d for %s", op_ret, op_errno, + ((path == NULL)? "null" : path )); + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + + if (op_ret == 0) { + dht_lookup_everywhere_done (frame, this); + } else { + /*When dht_lookup_everywhere is performed, one cached + *and one hashed file was found and hashed file does + *not point to the above mentioned cached node. So it + *was considered as stale and an unlink was performed. + *But unlink fails. So may be rebalance is in progress. + *now ideally we have two data-files. One obtained during + *lookup_everywhere and one where unlink-failed. So + *at this point in time we cannot decide which one to + *choose because there are chances of first cached + *file is truncated after rebalance and if it is choosen + *as cached node, application will fail. So return EIO.*/ + + if (op_errno == EBUSY) { + + gf_log (this->name, GF_LOG_ERROR, + "Could not unlink the linkto file as " + "either fd is open and/or linkto xattr " + "is set for %s", + ((path == NULL)? "null":path)); + + } + DHT_STACK_UNWIND (lookup, frame, -1, EIO, NULL, NULL, + NULL, NULL); + + } + } + + return 0; +} + +int +dht_lookup_unlink_stale_linkto_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + + dht_local_t *local = NULL; + const char *path = NULL; + + /* NOTE: + * If stale file unlink fails either there is an open-fd or is not an + * dht-linkto-file then posix_unlink returns EBUSY, which is overwritten + * to ENOENT + */ + + local = frame->local; + + if (local && local->loc.path) + path = local->loc.path; + + gf_log (this->name, GF_LOG_INFO, "Returned with op_ret %d and " + "op_errno %d for %s", op_ret, op_errno, + ((path==NULL)?"null":path)); + + DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL, NULL, NULL, + NULL); + + return 0; +} + +int +dht_fill_dict_to_avoid_unlink_of_migrating_file (dict_t *dict) { + + int ret = 0; + + ret = dict_set_int32 (dict, DHT_SKIP_NON_LINKTO_UNLINK, 1); + + if (ret) + goto err; + + ret = dict_set_int32 (dict, DHT_SKIP_OPEN_FD_UNLINK, 1); + + if (ret) + goto err; + + + return 0; + +err: + return -1; + +} +/* Rebalance is performed from cached_node to hashed_node. Initial cached_node + * contains a non-linkto file. After migration it is converted to linkto and + * then unlinked. And at hashed_subvolume, first a linkto file is present, + * then after migration it is converted to a non-linkto file. + * + * Lets assume a file is present on cached subvolume and a new brick is added + * and new brick is the new_hashed subvolume. So fresh lookup on newly added + * hashed subvolume will fail and dht_lookup_everywhere gets called. If just + * before sending the dht_lookup_everywhere request rebalance is in progress, + * + * from cached subvolume it may see: Nonlinkto or linkto or No file + * from hashed subvolume it may see: No file or linkto file or non-linkto file + * + * So this boils down to 9 cases: + * at cached_subvol at hashed_subvol + * ---------------- ----------------- + * + *a) No file No file + * [request reached after [Request reached before + * migration] Migration] + * + *b) No file Linkto File + * + *c) No file Non-Linkto File + * + *d) Linkto No-File + * + *e) Linkto Linkto + * + *f) Linkto Non-Linkto + * + *g) NonLinkto No-File + * + *h) NonLinkto Linkto + * + *i) NonLinkto NonLinkto + * + * dht_lookup_everywhere_done takes decision based on any of the above case + */ int dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this) @@ -780,6 +972,7 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this) xlator_t *hashed_subvol = NULL; xlator_t *cached_subvol = NULL; dht_layout_t *layout = NULL; + gf_boolean_t found_non_linkto_on_hashed = _gf_false; local = frame->local; hashed_subvol = local->hashed_subvol; @@ -801,19 +994,210 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this) return 0; } + gf_log (this->name, GF_LOG_INFO, "STATUS: hashed_subvol %s " + "cached_subvol %s", + (hashed_subvol == NULL)?"null":hashed_subvol->name, + (cached_subvol == NULL)?"null":cached_subvol->name); + if (!cached_subvol) { - DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL, NULL, NULL, - NULL); + + if (local->skip_unlink.handle_valid_link && hashed_subvol) { + + /*Purpose of "DHT_SKIP_NON_LINKTO_UNLINK": + * If this lookup is performed by rebalance and this + * rebalance process detected hashed file and by + * the time it sends the lookup request to cached node, + * file got migrated and now at intial hashed_node, + * final migrated file is present. With current logic, + * because this process fails to find the cached_node, + * it will unlink the file at initial hashed_node. + * + * So we avoid this by setting key, and checking at the + * posix_unlink that unlink the file only if file is a + * linkto file and not a migrated_file. + */ + + + ret = dht_fill_dict_to_avoid_unlink_of_migrating_file + (local->xattr_req); + + if (ret) { + /* If for some reason, setting key in the dict + * fails, return with ENOENT, as with respect to + * this process, it detected only a stale link + * file. + * + * Next lookup will delete it. + * + * Performing deletion of stale link file when + * setting key in dict fails, may cause the data + * loss becase of the above mentioned race. + */ + + + DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, + NULL, NULL, NULL, NULL); + } else { + local->skip_unlink.handle_valid_link = _gf_false; + + gf_log (this->name, GF_LOG_DEBUG, + "No Cached was found and " + "unlink on hashed was skipped" + " so performing now: %s", + local->loc.path); + + STACK_WIND (frame, + dht_lookup_unlink_stale_linkto_cbk, + hashed_subvol, + hashed_subvol->fops->unlink, + &local->loc, 0, local->xattr_req); + } + + } else { + gf_log (this->name, GF_LOG_DEBUG, + "There was no cached file and " + "unlink on hashed is not skipped %s", + local->loc.path); + + DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL, NULL, + NULL, NULL); + } return 0; } - if (local->need_lookup_everywhere) { - if (uuid_compare (local->gfid, local->inode->gfid)) { - /* GFID different, return error */ - DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL, - NULL, NULL, NULL); - return 0; + /* At the time of dht_lookup, no file was found on hashed and that is + * why dht_lookup_everywhere is called, but by the time + * dht_lookup_everywhere + * reached to server, file might have already migrated. In that case we + * will find a migrated file at the hashed_node. In this case store the + * layout in context and return successfully. + */ + + if (hashed_subvol || local->need_lookup_everywhere) { + + if (local->need_lookup_everywhere) { + + found_non_linkto_on_hashed = _gf_true; + + } else if ((local->file_count == 1) && + (hashed_subvol == cached_subvol)) { + + gf_log (this->name, GF_LOG_DEBUG, + "found cached file on hashed subvolume " + "so store in context and return for %s", + local->loc.path); + + found_non_linkto_on_hashed = _gf_true; } + + if (found_non_linkto_on_hashed) + goto preset_layout; + + } + + + if (hashed_subvol) { + if (local->skip_unlink.handle_valid_link == _gf_true) { + if (cached_subvol == local->skip_unlink.hash_links_to) { + + if (uuid_compare (local->skip_unlink.cached_gfid, + local->skip_unlink.hashed_gfid)){ + + /*GFID different, return error*/ + DHT_STACK_UNWIND (lookup, frame, -1, + ESTALE, NULL, NULL, NULL, + NULL); + + + } + + ret = dht_layout_preset (this, cached_subvol, + local->loc.inode); + if (ret) { + gf_log (this->name, GF_LOG_INFO, + "Could not set pre-set layout " + "for subvolume %s", + cached_subvol->name); + } + + local->op_ret = (ret == 0) ? ret : -1; + local->op_errno = (ret == 0) ? ret : EINVAL; + + /* Presence of local->cached_subvol validates + * that lookup from cached node is successful + */ + + if (!local->op_ret && local->loc.parent) { + dht_inode_ctx_time_update + (local->loc.parent, this, + &local->postparent, 1); + } + + gf_log (this->name, GF_LOG_DEBUG, + "Skipped unlinking linkto file " + "on the hashed subvolume. " + "Returning success as it is a " + "valid linkto file. Path:%s" + ,local->loc.path); + + goto unwind_hashed_and_cached; + } else { + + local->skip_unlink.handle_valid_link = _gf_false; + + gf_log (this->name, GF_LOG_DEBUG, + "Linkto file found on hashed " + "subvol " + "and data file found on cached " + "subvolume. But linkto points to " + "different cached subvolume (%s) " + "path %s", + local->skip_unlink.hash_links_to->name, + local->loc.path); + + if (local->skip_unlink.opend_fd_count == 0) { + + + ret = dht_fill_dict_to_avoid_unlink_of_migrating_file + (local->xattr_req); + + + if (ret) { + DHT_STACK_UNWIND (lookup, frame, -1, + EIO, NULL, NULL, + NULL, NULL); + } else { + local->call_cnt = 1; + STACK_WIND (frame, + dht_lookup_unlink_of_false_linkto_cbk, + hashed_subvol, + hashed_subvol->fops->unlink, + &local->loc, 0, + local->xattr_req); + } + + return 0; + + } + } + + } + } + + +preset_layout: + + if (found_non_linkto_on_hashed) { + + if (local->need_lookup_everywhere) { + if (uuid_compare (local->gfid, local->inode->gfid)) { + /* GFID different, return error */ + DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, + NULL, NULL, NULL, NULL); + return 0; + } + } + local->op_ret = 0; local->op_errno = 0; layout = dht_layout_for_subvol (this, cached_subvol); @@ -890,26 +1274,15 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this) cached_subvol, hashed_subvol, &local->loc); return ret; -} - - -int -dht_lookup_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) -{ - int this_call_cnt = 0; - - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - dht_lookup_everywhere_done (frame, this); - } +unwind_hashed_and_cached: + DHT_STRIP_PHASE1_FLAGS (&local->stbuf); + DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, + local->loc.inode, &local->stbuf, local->xattr, + &local->postparent); return 0; } - int dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, @@ -924,8 +1297,9 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this, xlator_t *subvol = NULL; loc_t *loc = NULL; xlator_t *link_subvol = NULL; - int ret = -1; - int32_t fd_count = 0; + int ret = -1; + int32_t fd_count = 0; + dict_t *dict_req = {0}; GF_VALIDATE_OR_GOTO ("dht", frame, out); GF_VALIDATE_OR_GOTO ("dht", this, out); @@ -939,6 +1313,11 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this, prev = cookie; subvol = prev->this; + gf_log (this->name, GF_LOG_DEBUG, + "returned with op_ret %d and op_errno %d (%s) " + "from subvol %s", op_ret, op_errno, loc->path, + subvol->name); + LOCK (&frame->lock); { if (op_ret == -1) { @@ -957,6 +1336,13 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } is_linkfile = check_is_linkfile (inode, buf, xattr); + if (is_linkfile) { + gf_log (this->name, GF_LOG_DEBUG, + "Found linktofile on %s for %s", + subvol->name, loc->path); + + } + is_dir = check_is_dir (inode, buf, xattr); if (is_linkfile) { @@ -981,18 +1367,26 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } else { local->file_count++; + gf_log (this->name, GF_LOG_DEBUG, + "found cached file on %s for %s", + subvol->name, loc->path); + if (!local->cached_subvol) { /* found one file */ dht_iatt_merge (this, &local->stbuf, buf, subvol); local->xattr = dict_ref (xattr); local->cached_subvol = subvol; + gf_log (this->name, GF_LOG_DEBUG, - "found on %s file %s", + "datafile found on %s file %s", subvol->name, loc->path); dht_iatt_merge (this, &local->postparent, postparent, subvol); + + uuid_copy (local->skip_unlink.cached_gfid, + buf->ia_gfid); } else { /* This is where we need 'rename' both entries logic */ gf_log (this->name, GF_LOG_WARNING, @@ -1009,15 +1403,68 @@ unlock: if (is_linkfile) { ret = dict_get_int32 (xattr, GLUSTERFS_OPEN_FD_COUNT, &fd_count); - /* Delete the linkfile only if there are no open fds on it. - if there is a open-fd, it may be in migration */ - if (!ret && (fd_count == 0)) { - gf_log (this->name, GF_LOG_INFO, - "deleting stale linkfile %s on %s", - loc->path, subvol->name); - STACK_WIND (frame, dht_lookup_unlink_cbk, - subvol, subvol->fops->unlink, loc, 0, NULL); - return 0; + + /* Any linkto file found on the non-hashed subvolume should + * be unlinked (performed in the "else if" block below) + * + * But if a linkto file is found on hashed subvolume, it may be + * pointing to vaild cached node. So unlinking of linkto + * file on hashed subvolume is skipped and inside + * dht_lookup_everywhere_done, checks are performed. If this + * linkto file is found as stale linkto file, it is deleted + * otherwise unlink is skipped. + */ + + if (local->hashed_subvol && local->hashed_subvol == subvol) { + + local->skip_unlink.handle_valid_link = _gf_true; + local->skip_unlink.opend_fd_count = fd_count; + local->skip_unlink.hash_links_to = link_subvol; + uuid_copy (local->skip_unlink.hashed_gfid, + buf->ia_gfid); + + gf_log (this->name, GF_LOG_DEBUG, "Found" + " one linkto file on hashed subvol %s " + "for %s: Skipping unlinking till " + "everywhere_done", subvol->name, + loc->path); + + } else if (!ret && (fd_count == 0)) { + + dict_req = dict_new (); + + ret = dht_fill_dict_to_avoid_unlink_of_migrating_file + (dict_req); + + if (ret) { + + /* Skip unlinking for dict_failure + *File is found as a linkto file on non-hashed, + *subvolume. In the current implementation, + *finding a linkto-file on non-hashed does not + *always implies that it is stale. So deletion + *of file should be done only when both fd is + *closed and linkto-xattr is set. In case of + *dict_set failure, avoid skipping of file. + *NOTE: dht_frame_return should get called for + * this block. + */ + + dict_unref (dict_req); + + } else { + gf_log (this->name, GF_LOG_INFO, + "attempting deletion of stale linkfile " + "%s on %s", loc->path, subvol->name); + + STACK_WIND (frame, dht_lookup_unlink_cbk, + subvol, subvol->fops->unlink, loc, + 0, dict_req); + + dict_unref (dict_req); + + return 0; + } } } @@ -1054,6 +1501,9 @@ dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc) if (!local->inode) local->inode = inode_ref (loc->inode); + gf_log (this->name, GF_LOG_DEBUG, + "winding lookup call to %d subvols", call_cnt); + for (i = 0; i < call_cnt; i++) { STACK_WIND (frame, dht_lookup_everywhere_cbk, conf->subvolumes[i], @@ -1252,9 +1702,14 @@ dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (!op_ret && uuid_is_null (local->gfid)) memcpy (local->gfid, stbuf->ia_gfid, 16); + gf_log (this->name, GF_LOG_DEBUG, + "fresh_lookup returned for %s with op_ret %d and " + "op_errno %d", loc->path, op_ret, op_errno); + if (ENTRY_MISSING (op_ret, op_errno)) { gf_log (this->name, GF_LOG_TRACE, "Entry %s missing on subvol" " %s", loc->path, prev->this->name); + if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_ON) { local->op_errno = ENOENT; dht_lookup_everywhere (frame, this, loc); @@ -1313,13 +1768,17 @@ dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, subvol = dht_linkfile_subvol (this, inode, stbuf, xattr); if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "linkfile not having link subvolume. path=%s", - loc->path); + gf_log (this->name, GF_LOG_INFO, "linkfile not having link " + "subvol for %s", loc->path); + dht_lookup_everywhere (frame, this, loc); return 0; } + gf_log (this->name, GF_LOG_DEBUG, + "Calling lookup on linkto target %s for path %s", + subvol->name, loc->path); + STACK_WIND (frame, dht_lookup_linkfile_cbk, subvol, subvol->fops->lookup, &local->loc, local->xattr_req); @@ -1465,6 +1924,13 @@ dht_lookup (call_frame_t *frame, xlator_t *this, dht_layout_unref (this, local->layout); local->layout = NULL; local->cached_subvol = NULL; + + gf_log (this->name, GF_LOG_WARNING, + "Called revalidate lookup for %s, " + "but layout->gen (%d) is less than " + "conf->gen (%d), calling fresh_lookup", + loc->path, layout->gen, conf->gen); + goto do_fresh_lookup; } @@ -1521,6 +1987,10 @@ dht_lookup (call_frame_t *frame, xlator_t *this, for (i = 0; i < call_cnt; i++) { subvol = layout->list[i].xlator; + gf_log (this->name, GF_LOG_DEBUG, "calling " + "revalidate lookup for %s at %s", + loc->path, subvol->name); + STACK_WIND (frame, dht_revalidate_cbk, subvol, subvol->fops->lookup, &local->loc, local->xattr_req); @@ -1565,6 +2035,7 @@ dht_lookup (call_frame_t *frame, xlator_t *this, "no subvolume in layout for path=%s, " "checking on all the subvols to see if " "it is a directory", loc->path); + call_cnt = conf->subvolume_cnt; local->call_cnt = call_cnt; @@ -1575,6 +2046,10 @@ dht_lookup (call_frame_t *frame, xlator_t *this, goto err; } + gf_log (this->name, GF_LOG_DEBUG, + "Found null hashed subvol. Calling lookup" + " on all nodes."); + for (i = 0; i < call_cnt; i++) { STACK_WIND (frame, dht_lookup_dir_cbk, conf->subvolumes[i], @@ -1584,6 +2059,10 @@ dht_lookup (call_frame_t *frame, xlator_t *this, return 0; } + gf_log (this->name, GF_LOG_DEBUG, + "Calling fresh lookup for %s on" + " %s", loc->path, hashed_subvol->name); + STACK_WIND (frame, dht_lookup_cbk, hashed_subvol, hashed_subvol->fops->lookup, loc, local->xattr_req); diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 83725f09712..c7f20a28383 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -96,6 +96,15 @@ struct dht_rebalance_ { dict_t *xdata; }; +struct dht_skip_linkto_unlink { + + gf_boolean_t handle_valid_link; + int opend_fd_count; + xlator_t *hash_links_to; + uuid_t cached_gfid; + uuid_t hashed_gfid; +}; + struct dht_local { int call_cnt; loc_t loc; @@ -184,6 +193,9 @@ struct dht_local { xlator_t *first_up_subvol; gf_boolean_t added_link; + + struct dht_skip_linkto_unlink skip_unlink; + }; typedef struct dht_local dht_local_t; @@ -752,4 +764,12 @@ dht_inodectx_dump (xlator_t *this, inode_t *inode); int dht_subvol_status (dht_conf_t *conf, xlator_t *subvol); +void +dht_log_new_layout_for_dir_selfheal (xlator_t *this, loc_t *loc, + dht_layout_t *layout); +int +dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this); + +int +dht_fill_dict_to_avoid_unlink_of_migrating_file (dict_t *dict); #endif/* _DHT_H */ diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index d6e34f92036..725e0c8c7b0 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -237,13 +237,15 @@ out: } static inline int -__dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struct iatt *stbuf, - dict_t *dict, fd_t **dst_fd) +__dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, + struct iatt *stbuf, dict_t *dict, + fd_t **dst_fd) { - xlator_t *this = NULL; - int ret = -1; - fd_t *fd = NULL; - struct iatt new_stbuf = {0,}; + xlator_t *this = NULL; + int ret = -1; + fd_t *fd = NULL; + struct iatt new_stbuf = {0,}; + struct iatt check_stbuf = {0,}; this = THIS; @@ -300,6 +302,46 @@ __dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struc goto out; } + /*Reason of doing lookup after create again: + *In the create, there is some time-gap between opening fd at the + *server (posix_layer) and binding it in server (incrementing fd count), + *so if in that time-gap, if other process sends unlink considering it + *as a linkto file, because inode->fd count will be 0, so file will be + *unlinked at the backend. And because furthur operations are performed + *on fd, so though migration will be done but will end with no file + *at the backend. + */ + + + ret = syncop_lookup (to, loc, NULL, &check_stbuf, NULL, NULL); + if (!ret) { + if (uuid_compare (stbuf->ia_gfid, check_stbuf.ia_gfid) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "file %s exists in %s with different gfid," + "found in lookup after create", + loc->path, to->name); + ret = -1; + fd_unref (fd); + goto out; + } + + } + + if (-ret == ENOENT) { + gf_log (this->name, GF_LOG_ERROR, + "%s: file does not exists" + "on %s (%s)", loc->path, to->name, strerror (-ret)); + ret = -1; + fd_unref (fd); + goto out; + } + + ret = syncop_fsetxattr (to, fd, dict, 0); + if (ret < 0) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to set xattr on %s (%s)", + loc->path, to->name, strerror (-ret)); + ret = syncop_ftruncate (to, fd, stbuf->ia_size); if (ret < 0) gf_log (this->name, GF_LOG_ERROR, @@ -650,17 +692,18 @@ int dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, int flag) { - int ret = -1; - struct iatt new_stbuf = {0,}; - struct iatt stbuf = {0,}; - struct iatt empty_iatt = {0,}; - ia_prot_t src_ia_prot = {0,}; - fd_t *src_fd = NULL; - fd_t *dst_fd = NULL; - dict_t *dict = NULL; - dict_t *xattr = NULL; - dict_t *xattr_rsp = NULL; - int file_has_holes = 0; + int ret = -1; + struct iatt new_stbuf = {0,}; + struct iatt stbuf = {0,}; + struct iatt empty_iatt = {0,}; + ia_prot_t src_ia_prot = {0,}; + fd_t *src_fd = NULL; + fd_t *dst_fd = NULL; + dict_t *dict = NULL; + dict_t *xattr = NULL; + dict_t *xattr_rsp = NULL; + int file_has_holes = 0; + int rcvd_enoent_from_src = 0; gf_log (this->name, GF_LOG_INFO, "%s: attempting to move from %s to %s", loc->path, from->name, to->name); @@ -827,15 +870,31 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, } /* Do a stat and check the gfid before unlink */ + + /* + * Cached file changes its state from non-linkto to linkto file after + * migrating data. If lookup from any other mount-point is performed, + * converted-linkto-cached file will be treated as a stale and will be + * unlinked. But by this time, file is already migrated. So further + * failure because of ENOENT should not be treated as error + */ + ret = syncop_stat (from, loc, &empty_iatt); if (ret) { gf_log (this->name, GF_LOG_WARNING, "%s: failed to do a stat on %s (%s)", loc->path, from->name, strerror (errno)); - goto out; + + if (-ret != ENOENT) { + ret = -1; + goto out; + } + + rcvd_enoent_from_src = 1; } - if (uuid_compare (empty_iatt.ia_gfid, loc->gfid) == 0) { + if ((uuid_compare (empty_iatt.ia_gfid, loc->gfid) == 0 ) && + (!rcvd_enoent_from_src)) { /* take out the source from namespace */ ret = syncop_unlink (from, loc); if (ret) { diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index dc3a709cd26..bf5c188e5ca 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -1019,20 +1019,60 @@ out: return 0; } +int32_t +posix_unlink_gfid_handle_and_entry (xlator_t *this, const char *real_path, + struct iatt *stbuf, int32_t *op_errno) +{ + int32_t ret = 0; + + /* Unlink the gfid_handle_first */ + + if (stbuf && stbuf->ia_nlink == 1) { + ret = posix_handle_unset (this, stbuf->ia_gfid, NULL); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "unlink of gfid handle failed for path:%s with" + "gfid %s with errno:%s", real_path, + uuid_utoa (stbuf->ia_gfid), strerror (errno)); + } + } + + /* Unlink the actual file */ + ret = sys_unlink (real_path); + if (ret == -1) { + if (op_errno) + *op_errno = errno; + + gf_log (this->name, GF_LOG_ERROR, + "unlink of %s failed: %s", real_path, + strerror (errno)); + goto err; + } + + return 0; + +err: + return -1; +} int32_t posix_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, dict_t *xdata) { - int32_t op_ret = -1; - int32_t op_errno = 0; - char *real_path = NULL; - char *par_path = NULL; - int32_t fd = -1; - struct iatt stbuf = {0,}; - struct posix_private *priv = NULL; - struct iatt preparent = {0,}; - struct iatt postparent = {0,}; + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = NULL; + char *par_path = NULL; + int32_t fd = -1; + struct iatt stbuf = {0,}; + struct posix_private *priv = NULL; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; + int32_t unlink_if_linkto = 0; + int32_t check_open_fd = 0; + int32_t skip_unlink = 0; + ssize_t xattr_size = -1; + int32_t is_dht_linkto_file = 0; DECLARE_OLD_FS_ID_VAR; @@ -1052,10 +1092,62 @@ posix_unlink (call_frame_t *frame, xlator_t *this, goto out; } - if (stbuf.ia_nlink == 1) - posix_handle_unset (this, stbuf.ia_gfid, NULL); - priv = this->private; + + op_ret = dict_get_int32 (xdata, DHT_SKIP_OPEN_FD_UNLINK, + &check_open_fd); + + if (!op_ret && check_open_fd) { + + LOCK (&loc->inode->lock); + + if (loc->inode->fd_count) { + skip_unlink = 1; + } + + UNLOCK (&loc->inode->lock); + + gf_log (this->name, GF_LOG_INFO, "open-fd-key-status: " + "%"PRIu32" for %s", skip_unlink, real_path); + + if (skip_unlink) { + op_ret = -1; + op_errno = EBUSY; + goto out; + } + } + + + op_ret = dict_get_int32 (xdata, DHT_SKIP_NON_LINKTO_UNLINK, + &unlink_if_linkto); + + if (!op_ret && unlink_if_linkto) { + + LOCK (&loc->inode->lock); + + xattr_size = sys_lgetxattr (real_path, LINKTO, NULL, 0); + + if (xattr_size <= 0) { + skip_unlink = 1; + } else { + is_dht_linkto_file = IS_DHT_LINKFILE_MODE (&stbuf); + if (!is_dht_linkto_file) + skip_unlink = 1; + } + + UNLOCK (&loc->inode->lock); + + gf_log (this->name, GF_LOG_INFO, "linkto_xattr status: " + "%"PRIu32" for %s", skip_unlink, real_path); + + if (skip_unlink) { + op_ret = -1; + op_errno = EBUSY; + goto out; + } + } + + if (priv->background_unlink) { if (IA_ISREG (loc->inode->ia_type)) { fd = open (real_path, O_RDONLY); @@ -1070,12 +1162,9 @@ posix_unlink (call_frame_t *frame, xlator_t *this, } } - op_ret = sys_unlink (real_path); + op_ret = posix_unlink_gfid_handle_and_entry (this, real_path, &stbuf, + &op_errno); if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "unlink of %s failed: %s", real_path, - strerror (op_errno)); goto out; } diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h index 58f445c699a..80121c08c8f 100644 --- a/xlators/storage/posix/src/posix.h +++ b/xlators/storage/posix/src/posix.h @@ -49,6 +49,16 @@ #include "posix-aio.h" #endif +#define VECTOR_SIZE 64 * 1024 /* vector size 64KB*/ +#define MAX_NO_VECT 1024 + +#define LINKTO "trusted.glusterfs.dht.linkto" + +#define POSIX_GFID_HANDLE_SIZE(base_path_len) (base_path_len + SLEN("/") \ + + SLEN(GF_HIDDEN_PATH) + SLEN("/") \ + + SLEN("00/") \ + + SLEN("00/") + SLEN(UUID0_STR) + 1) /* '\0' */; + /** * posix_fd - internal structure common to file and directory fd's */ |