diff options
Diffstat (limited to 'xlators/cluster/dht/src/dht-common.c')
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 671 |
1 files changed, 443 insertions, 228 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 99cf6f787..8f61339e6 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -22,6 +22,7 @@ #include "dht-common.h" #include "defaults.h" #include "byte-order.h" +#include "glusterfs-acl.h" #include <sys/time.h> #include <libgen.h> @@ -62,6 +63,11 @@ dht_aggregate (dict_t *this, char *key, data_t *value, void *data) } *size = hton64 (ntoh64 (*size) + ntoh64 (*ptr)); + + } else if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0) { + ret = gf_get_min_stime (THIS, dst, key, value); + if (ret < 0) + return ret; } else { /* compare user xattrs only */ if (!strncmp (key, "user.", strlen ("user."))) { @@ -148,9 +154,11 @@ dht_discover_complete (xlator_t *this, call_frame_t *discover_frame) int op_errno = 0; int ret = -1; dht_layout_t *layout = NULL; + dht_conf_t *conf = NULL; local = discover_frame->local; layout = local->layout; + conf = this->private; LOCK(&discover_frame->lock); { @@ -193,11 +201,14 @@ dht_discover_complete (xlator_t *this, call_frame_t *discover_frame) "(overlaps/holes present: %s, " "ENOENT errors: %d)", local->loc.path, (ret < 0) ? "yes" : "no", (ret > 0) ? ret : 0); - op_errno = EINVAL; - goto out; + if ((ret > 0) && (ret == conf->subvolume_cnt)) { + op_errno = ESTALE; + goto out; + } } - dht_layout_set (this, local->inode, layout); + if (local->inode) + dht_layout_set (this, local->inode, layout); } DHT_STACK_UNWIND (lookup, main_frame, local->op_ret, local->op_errno, @@ -226,6 +237,7 @@ dht_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int is_dir = 0; int is_linkfile = 0; int attempt_unwind = 0; + dht_conf_t *conf = 0; GF_VALIDATE_OR_GOTO ("dht", frame, out); GF_VALIDATE_OR_GOTO ("dht", this, out); @@ -235,6 +247,7 @@ dht_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local = frame->local; prev = cookie; + conf = this->private; layout = local->layout; @@ -269,7 +282,8 @@ dht_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto unlock; } - is_linkfile = check_is_linkfile (inode, stbuf, xattr); + is_linkfile = check_is_linkfile (inode, stbuf, xattr, + conf->link_xattr_name); is_dir = check_is_dir (inode, stbuf, xattr); if (is_dir) { @@ -328,23 +342,20 @@ dht_discover (call_frame_t *frame, xlator_t *this, loc_t *loc) int i = 0; call_frame_t *discover_frame = NULL; - conf = this->private; local = frame->local; - ret = dict_set_uint32 (local->xattr_req, - "trusted.glusterfs.dht", 4 * 4); + ret = dict_set_uint32 (local->xattr_req, conf->xattr_name, 4 * 4); if (ret) gf_log (this->name, GF_LOG_WARNING, - "%s: failed to set 'trusted.glusterfs.dht' key", - loc->path); + "%s: failed to set '%s' key", + loc->path, conf->xattr_name); - ret = dict_set_uint32 (local->xattr_req, - "trusted.glusterfs.dht.linkto", 256); + ret = dict_set_uint32 (local->xattr_req, conf->link_xattr_name, 256); if (ret) gf_log (this->name, GF_LOG_WARNING, - "%s: failed to set 'trusted.glusterfs.dht.linkto' key", - loc->path); + "%s: failed to set '%s' key", + loc->path, conf->link_xattr_name); call_cnt = conf->subvolume_cnt; local->call_cnt = call_cnt; @@ -430,7 +441,7 @@ dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, op_ret, op_errno, xattr); if (op_ret == -1) { - local->op_errno = ENOENT; + local->op_errno = op_errno; gf_log (this->name, GF_LOG_DEBUG, "lookup of %s on %s returned error (%s)", local->loc.path, prev->this->name, @@ -585,7 +596,8 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, layout = local->layout; is_dir = check_is_dir (inode, stbuf, xattr); - is_linkfile = check_is_linkfile (inode, stbuf, xattr); + is_linkfile = check_is_linkfile (inode, stbuf, xattr, + conf->link_xattr_name); if (is_linkfile) { gf_log (this->name, GF_LOG_INFO, @@ -597,7 +609,7 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } if (is_dir) { - ret = dht_dir_has_layout (xattr); + ret = dht_dir_has_layout (xattr, conf->xattr_name); if (ret >= 0) { if (is_greater_time(local->stbuf.ia_ctime, local->stbuf.ia_ctime_nsec, @@ -760,6 +772,9 @@ dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie, } unwind: + if (local->linked == _gf_true) + dht_linkfile_attr_heal (frame, this); + DHT_STRIP_PHASE1_FLAGS (&local->stbuf); DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, local->inode, &local->stbuf, local->xattr, @@ -883,7 +898,7 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this) hashed_subvol->name); ret = dht_linkfile_create (frame, - dht_lookup_linkfile_create_cbk, + dht_lookup_linkfile_create_cbk, this, cached_subvol, hashed_subvol, &local->loc); return ret; @@ -921,8 +936,9 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this, xlator_t *subvol = NULL; loc_t *loc = NULL; xlator_t *link_subvol = NULL; - int ret = -1; - int32_t fd_count = 0; + int ret = -1; + int32_t fd_count = 0; + dht_conf_t *conf = NULL; GF_VALIDATE_OR_GOTO ("dht", frame, out); GF_VALIDATE_OR_GOTO ("dht", this, out); @@ -932,6 +948,7 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local = frame->local; loc = &local->loc; + conf = this->private; prev = cookie; subvol = prev->this; @@ -953,7 +970,8 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this, loc->path, prev->this->name); } - is_linkfile = check_is_linkfile (inode, buf, xattr); + is_linkfile = check_is_linkfile (inode, buf, xattr, + conf->link_xattr_name); is_dir = check_is_dir (inode, buf, xattr); if (is_linkfile) { @@ -1114,7 +1132,7 @@ dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie, goto err; } - if (check_is_linkfile (inode, stbuf, xattr)) { + if (check_is_linkfile (inode, stbuf, xattr, conf->link_xattr_name)) { gf_log (this->name, GF_LOG_INFO, "lookup of %s on %s (following linkfile) reached link", local->loc.path, subvol->name); @@ -1291,7 +1309,8 @@ dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto out; } - is_linkfile = check_is_linkfile (inode, stbuf, xattr); + is_linkfile = check_is_linkfile (inode, stbuf, xattr, + conf->link_xattr_name); if (!is_linkfile) { /* non-directory and not a linkfile */ @@ -1396,7 +1415,6 @@ dht_lookup (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (loc, err); VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); conf = this->private; if (!conf) @@ -1471,7 +1489,7 @@ dht_lookup (call_frame_t *frame, xlator_t *this, * revalidates directly go to the cached-subvolume. */ ret = dict_set_uint32 (local->xattr_req, - "trusted.glusterfs.dht", 4 * 4); + conf->xattr_name, 4 * 4); if (IA_ISDIR (local->inode->ia_type)) { local->call_cnt = call_cnt = conf->subvolume_cnt; @@ -1506,10 +1524,10 @@ dht_lookup (call_frame_t *frame, xlator_t *this, do_fresh_lookup: /* TODO: remove the hard-coding */ ret = dict_set_uint32 (local->xattr_req, - "trusted.glusterfs.dht", 4 * 4); + conf->xattr_name, 4 * 4); ret = dict_set_uint32 (local->xattr_req, - DHT_LINKFILE_KEY, 256); + conf->link_xattr_name, 256); /* need it for self-healing linkfiles which is 'in-migration' state */ @@ -1617,7 +1635,8 @@ dht_unlink_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&frame->lock); { - if (op_ret == -1) { + if ((op_ret == -1) && !((op_errno == ENOENT) || + (op_errno == ENOTCONN))) { local->op_errno = op_errno; gf_log (this->name, GF_LOG_DEBUG, "subvolume %s returned -1 (%s)", @@ -1630,7 +1649,7 @@ dht_unlink_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, unlock: UNLOCK (&frame->lock); - if (op_ret == -1) + if (local->op_ret == -1) goto err; cached_subvol = dht_subvol_get_cached (this, local->loc.inode); @@ -1654,41 +1673,6 @@ err: return 0; } -static int -dht_ufo_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) -{ - dht_local_t *local = NULL; - int this_call_cnt = 0; - call_frame_t *prev = NULL; - - local = frame->local; - prev = cookie; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_ret = -1; - local->op_errno = op_errno; - gf_log (this->name, GF_LOG_DEBUG, - "subvolume %s returned -1 (%s)", - prev->this->name, strerror (op_errno)); - goto unlock; - } - } -unlock: - UNLOCK (&frame->lock); - - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - DHT_STACK_UNWIND (setxattr, frame, local->op_ret, - local->op_errno, NULL); - } - - return 0; -} - - int dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xdata) @@ -1761,115 +1745,222 @@ dht_fill_pathinfo_xattr (xlator_t *this, dht_local_t *local, } int -dht_vgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) +dht_vgetxattr_alloc_and_fill (dht_local_t *local, dict_t *xattr, xlator_t *this, + int op_errno) { - dht_local_t *local = NULL; - int ret = 0; - int flag = 0; - int this_call_cnt = 0; - char *value_got = NULL; - char layout_buf[8192] = {0,}; - char *xattr_buf = NULL; - dict_t *dict = NULL; - int32_t alloc_len = 0; - int32_t plen = 0; - call_frame_t *prev = NULL; + int ret = -1; + char *value = NULL; + int32_t plen = 0; - local = frame->local; - prev = cookie; + ret = dict_get_str (xattr, local->xsel, &value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Subvolume %s returned -1 (%s)", this->name, + strerror (op_errno)); + local->op_ret = -1; + local->op_errno = op_errno; + goto out; + } - if (op_ret >= 0) { - ret = dict_get_str (xattr, local->xsel, &value_got); - if (!ret) { - alloc_len = strlen (value_got); + local->alloc_len += strlen(value); - /** - * allocate the buffer:- we allocate 10 bytes extra in - * case we need to append ' Link: ' in the buffer for - * another STACK_WIND - */ + if (!local->xattr_val) { + local->alloc_len += (strlen (DHT_PATHINFO_HEADER) + 10); + local->xattr_val = GF_CALLOC (local->alloc_len, sizeof (char), + gf_common_mt_char); + if (!local->xattr_val) { + ret = -1; + goto out; + } + } + + if (local->xattr_val) { + plen = strlen (local->xattr_val); + if (plen) { + /* extra byte(s) for \0 to be safe */ + local->alloc_len += (plen + 2); + local->xattr_val = GF_REALLOC (local->xattr_val, + local->alloc_len); if (!local->xattr_val) { - alloc_len += (strlen (DHT_PATHINFO_HEADER) + 10); - local->xattr_val = - GF_CALLOC (alloc_len, - sizeof (char), - gf_common_mt_char); + ret = -1; + goto out; } + } - if (local->xattr_val) { - plen = strlen (local->xattr_val); - if (plen) { - void *p; - /* extra byte(s) for \0 to be safe */ - alloc_len += (plen + 2); - p = GF_REALLOC (local->xattr_val, - alloc_len); - if (!p) - goto out; - local->xattr_val = p; - } + (void) strcat (local->xattr_val, value); + (void) strcat (local->xattr_val, " "); + local->op_ret = 0; + } - strcat (local->xattr_val, value_got); - } - local->op_ret = 0; - } + ret = 0; + + out: + return ret; +} + +int +dht_vgetxattr_fill_and_set (dht_local_t *local, dict_t **dict, xlator_t *this, + gf_boolean_t flag) +{ + int ret = -1; + char *xattr_buf = NULL; + char layout_buf[8192] = {0,}; + + if (flag) + fill_layout_info (local->layout, layout_buf); + + *dict = dict_new (); + if (!*dict) + goto out; + + local->xattr_val[strlen (local->xattr_val) - 1] = '\0'; + + /* we would need max this many bytes to create xattr string + * extra 40 bytes is just an estimated amount of additional + * space required as we include translator name and some + * spaces, brackets etc. when forming the pathinfo string. + * + * For node-uuid we just don't have all the pretty formatting, + * but since this is a generic routine for pathinfo & node-uuid + * we dont have conditional space allocation and try to be + * generic + */ + local->alloc_len += (2 * strlen (this->name)) + + strlen (layout_buf) + + 40; + xattr_buf = GF_CALLOC (local->alloc_len, sizeof (char), + gf_common_mt_char); + if (!xattr_buf) + goto out; + + if (XATTR_IS_PATHINFO (local->xsel)) { + (void) dht_fill_pathinfo_xattr (this, local, xattr_buf, + local->alloc_len, flag, + layout_buf); + } else if (XATTR_IS_NODE_UUID (local->xsel)) { + (void) snprintf (xattr_buf, local->alloc_len, "%s", + local->xattr_val); } else { - local->op_ret = -1; - local->op_errno = op_errno; - gf_log (this->name, GF_LOG_ERROR, "Subvolume %s returned -1 " - "(%s)", prev->this->name, strerror (op_errno)); + gf_log (this->name, GF_LOG_WARNING, + "Unknown local->xsel (%s)", local->xsel); + goto out; } + ret = dict_set_dynstr (*dict, local->xsel, xattr_buf); + GF_FREE (local->xattr_val); + out: - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { + return ret; +} - if (local->op_ret == -1) { - goto unwind; - } +int +dht_vgetxattr_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) +{ + int ret = 0; + dht_local_t *local = NULL; + int this_call_cnt = 0; + dict_t *dict = NULL; - if (local->layout->cnt > 1) { - /* Set it for directory */ - fill_layout_info (local->layout, layout_buf); - flag = 1; - } + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (frame->local, out); - dict = dict_new (); - - /* we would need max this many bytes to create xattr string */ - alloc_len += (2 * strlen (this->name)) - + strlen (layout_buf) - + 40; - xattr_buf = GF_CALLOC (alloc_len, - sizeof (char), gf_common_mt_char); - - if (XATTR_IS_PATHINFO (local->xsel)) { - (void) dht_fill_pathinfo_xattr (this, local, xattr_buf, - alloc_len, flag, - layout_buf); - } else if (XATTR_IS_NODE_UUID (local->xsel)) { - (void) snprintf (xattr_buf, alloc_len, "%s", - local->xattr_val); - } else - gf_log (this->name, GF_LOG_WARNING, - "Unknown local->xsel (%s)", local->xsel); + local = frame->local; - ret = dict_set_dynstr (dict, local->xsel, xattr_buf); + LOCK (&frame->lock); + { + this_call_cnt = --local->call_cnt; + if (op_ret < 0) { + if (op_errno != ENOTCONN) { + gf_log (this->name, GF_LOG_ERROR, + "getxattr err (%s) for dir", + strerror (op_errno)); + local->op_ret = -1; + local->op_errno = op_errno; + } - GF_FREE (local->xattr_val); + goto unlock; + } - DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, - xdata); + ret = dht_vgetxattr_alloc_and_fill (local, xattr, this, + op_errno); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "alloc or fill failure"); + } + unlock: + UNLOCK (&frame->lock); - if (dict) - dict_unref (dict); + if (!is_last_call (this_call_cnt)) + goto out; - return 0; + /* -- last call: do patch ups -- */ + + if (local->op_ret == -1) { + goto unwind; } -unwind: + ret = dht_vgetxattr_fill_and_set (local, &dict, this, _gf_true); + if (ret) + goto unwind; + + DHT_STACK_UNWIND (getxattr, frame, 0, 0, dict, xdata); + goto cleanup; + + unwind: DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno, NULL, NULL); + cleanup: + if (dict) + dict_unref (dict); + out: + return 0; +} + +int +dht_vgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) +{ + dht_local_t *local = NULL; + int ret = 0; + dict_t *dict = NULL; + call_frame_t *prev = NULL; + gf_boolean_t flag = _gf_true; + + local = frame->local; + prev = cookie; + + if (op_ret < 0) { + local->op_ret = -1; + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_ERROR, "Subvolume %s returned -1 " + "(%s)", prev->this->name, strerror (op_errno)); + goto unwind; + } + + ret = dht_vgetxattr_alloc_and_fill (local, xattr, this, + op_errno); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "alloc or fill failure"); + goto unwind; + } + + flag = (local->layout->cnt > 1) ? _gf_true : _gf_false; + + ret = dht_vgetxattr_fill_and_set (local, &dict, this, flag); + if (ret) + goto unwind; + + DHT_STACK_UNWIND (getxattr, frame, 0, 0, dict, xdata); + goto cleanup; + + unwind: + DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno, + NULL, NULL); + cleanup: + if (dict) + dict_unref (dict); + return 0; } @@ -1902,10 +1993,13 @@ dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, { int this_call_cnt = 0; dht_local_t *local = NULL; + dht_conf_t *conf = NULL; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (frame->local, out); + VALIDATE_OR_GOTO (this->private, out); + conf = this->private; local = frame->local; this_call_cnt = dht_frame_return (frame); @@ -1913,8 +2007,8 @@ dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (!xattr || (op_ret == -1)) goto out; - if (dict_get (xattr, "trusted.glusterfs.dht")) { - dict_del (xattr, "trusted.glusterfs.dht"); + if (dict_get (xattr, conf->xattr_name)) { + dict_del (xattr, conf->xattr_name); } local->op_ret = 0; @@ -1947,9 +2041,72 @@ dht_getxattr_unwind (call_frame_t *frame, int +dht_getxattr_get_real_filename_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + dict_t *xattr, dict_t *xdata) +{ + int this_call_cnt = 0; + dht_local_t *local = NULL; + + + local = frame->local; + + if (op_ret != -1) { + if (local->xattr) + dict_unref (local->xattr); + local->xattr = dict_ref (xattr); + + if (local->xattr_req) + dict_unref (local->xattr_req); + local->xattr_req = dict_ref (xdata); + } + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno, + local->xattr, local->xattr_req); + } + + return 0; +} + + +int +dht_getxattr_get_real_filename (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *key, dict_t *xdata) +{ + dht_local_t *local = NULL; + int i = 0; + dht_layout_t *layout = NULL; + int cnt = 0; + xlator_t *subvol = NULL; + + + local = frame->local; + layout = local->layout; + + cnt = local->call_cnt = layout->cnt; + + local->op_ret = -1; + local->op_errno = ENODATA; + + for (i = 0; i < cnt; i++) { + subvol = layout->list[i].xlator; + STACK_WIND (frame, dht_getxattr_get_real_filename_cbk, + subvol, subvol->fops->getxattr, + loc, key, xdata); + } + + return 0; +} + + +int dht_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, dict_t *xdata) +#define DHT_IS_DIR(layout) (layout->cnt > 1) { + xlator_t *subvol = NULL; xlator_t *hashed_subvol = NULL; xlator_t *cached_subvol = NULL; @@ -1965,7 +2122,6 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (loc, err); VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); VALIDATE_OR_GOTO (this->private, err); conf = this->private; @@ -1993,8 +2149,40 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, } } - if (key && ((strcmp (key, GF_XATTR_PATHINFO_KEY) == 0) - || strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0)) { + if (key && + (strncmp (key, GF_XATTR_GET_REAL_FILENAME_KEY, + strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0) + && DHT_IS_DIR(layout)) { + dht_getxattr_get_real_filename (frame, this, loc, key, xdata); + return 0; + } + + /* for file use cached subvolume (obviously!): see if {} + * below + * for directory: + * wind to all subvolumes and exclude subvolumes which + * return ENOTCONN (in callback) + * + * NOTE: Don't trust inode here, as that may not be valid + * (until inode_link() happens) + */ + if (key && DHT_IS_DIR(layout) && + ((strcmp (key, GF_XATTR_PATHINFO_KEY) == 0) + || (strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0))) { + (void) strncpy (local->xsel, key, 256); + cnt = local->call_cnt = layout->cnt; + for (i = 0; i < cnt; i++) { + subvol = layout->list[i].xlator; + STACK_WIND (frame, dht_vgetxattr_dir_cbk, + subvol, subvol->fops->getxattr, + loc, key, NULL); + } + return 0; + } + + /* node-uuid or pathinfo for files */ + if (key && ((strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0) + || (strcmp (key, GF_XATTR_PATHINFO_KEY) == 0))) { cached_subvol = local->cached_subvol; (void) strncpy (local->xsel, key, 256); @@ -2004,6 +2192,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, return 0; } + if (key && (strcmp (key, GF_XATTR_LINKINFO_KEY) == 0)) { hashed_subvol = dht_subvol_get_hashed (this, loc); if (!hashed_subvol) { @@ -2036,13 +2225,13 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, } if (key && (!strcmp (GF_XATTR_MARKER_KEY, key)) - && (-1 == frame->root->pid)) { - - if (loc->inode-> ia_type == IA_IFDIR) { + && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) { + if (DHT_IS_DIR(layout)) { cnt = layout->cnt; } else { cnt = 1; } + sub_volumes = alloca ( cnt * sizeof (xlator_t *)); for (i = 0; i < cnt; i++) *(sub_volumes + i) = layout->list[i].xlator; @@ -2050,7 +2239,8 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, if (cluster_getmarkerattr (frame, this, loc, key, local, dht_getxattr_unwind, sub_volumes, cnt, - MARKER_UUID_TYPE, conf->vol_uuid)) { + MARKER_UUID_TYPE, marker_uuid_default_gauge, + conf->vol_uuid)) { op_errno = EINVAL; goto err; } @@ -2060,8 +2250,8 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, if (key && *conf->vol_uuid) { if ((match_uuid_local (key, conf->vol_uuid) == 0) && - (-1 == frame->root->pid)) { - if (loc->inode-> ia_type == IA_IFDIR) { + (GF_CLIENT_PID_GSYNCD == frame->root->pid)) { + if (DHT_IS_DIR(layout)) { cnt = layout->cnt; } else { cnt = 1; @@ -2074,6 +2264,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, local, dht_getxattr_unwind, sub_volumes, cnt, MARKER_XTIME_TYPE, + marker_xtime_default_gauge, conf->vol_uuid)) { op_errno = EINVAL; goto err; @@ -2083,7 +2274,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, } } - if (loc->inode-> ia_type == IA_IFDIR) { + if (DHT_IS_DIR(layout)) { cnt = local->call_cnt = layout->cnt; } else { cnt = local->call_cnt = 1; @@ -2103,6 +2294,7 @@ err: return 0; } +#undef DHT_IS_DIR int dht_fgetxattr (call_frame_t *frame, xlator_t *this, @@ -2174,13 +2366,17 @@ dht_fsetxattr (call_frame_t *frame, xlator_t *this, xlator_t *subvol = NULL; dht_local_t *local = NULL; int op_errno = EINVAL; + dht_conf_t *conf = NULL; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (fd, err); VALIDATE_OR_GOTO (fd->inode, err); + VALIDATE_OR_GOTO (this->private, err); + + conf = this->private; - GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.dht*", xattr, + GF_IF_INTERNAL_XATTR_GOTO (conf->wild_xattr_name, xattr, op_errno, err); local = dht_local_init (frame, NULL, fd, GF_FOP_FSETXATTR); @@ -2284,12 +2480,12 @@ dht_setxattr (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (loc, err); VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); - GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.dht*", xattr, + conf = this->private; + + GF_IF_INTERNAL_XATTR_GOTO (conf->wild_xattr_name, xattr, op_errno, err); - conf = this->private; local = dht_local_init (frame, loc, NULL, GF_FOP_SETXATTR); if (!local) { op_errno = ENOMEM; @@ -2314,25 +2510,6 @@ dht_setxattr (call_frame_t *frame, xlator_t *this, local->call_cnt = call_cnt = layout->cnt; - /* This key is sent by Unified File and Object storage - * to test xattr support in backend. - */ - tmp = dict_get (xattr, "user.ufo-test"); - if (tmp) { - if (IA_ISREG (loc->inode->ia_type)) { - op_errno = ENOTSUP; - goto err; - } - local->op_ret = 0; - for (i = 0; i < call_cnt; i++) { - STACK_WIND (frame, dht_ufo_xattr_cbk, - layout->list[i].xlator, - layout->list[i].xlator->fops->setxattr, - loc, xattr, flags, NULL); - } - return 0; - } - tmp = dict_get (xattr, "distribute.migrate-data"); if (tmp) { if (IA_ISDIR (loc->inode->ia_type)) { @@ -2502,18 +2679,20 @@ dht_removexattr (call_frame_t *frame, xlator_t *this, dht_local_t *local = NULL; dht_layout_t *layout = NULL; int call_cnt = 0; + dht_conf_t *conf = NULL; int i; VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (this->private, err); + + conf = this->private; - GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.dht*", - key, op_errno, err); + GF_IF_NATIVE_XATTR_GOTO (conf->wild_xattr_name, key, op_errno, err); VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (loc, err); VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); local = dht_local_init (frame, loc, NULL, GF_FOP_REMOVEXATTR); if (!local) { @@ -2565,13 +2744,16 @@ dht_fremovexattr (call_frame_t *frame, xlator_t *this, dht_local_t *local = NULL; dht_layout_t *layout = NULL; int call_cnt = 0; + dht_conf_t *conf = 0; int i; VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (this->private, err); - GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.dht*", - key, op_errno, err); + conf = this->private; + + GF_IF_NATIVE_XATTR_GOTO (conf->wild_xattr_name, key, op_errno, err); VALIDATE_OR_GOTO (frame, err); @@ -2742,7 +2924,6 @@ dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (loc, err); VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); VALIDATE_OR_GOTO (this->private, err); conf = this->private; @@ -2862,10 +3043,13 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, list_for_each_entry (orig_entry, (&orig_entries->list), list) { next_offset = orig_entry->d_off; - if ((check_is_dir (NULL, (&orig_entry->d_stat), NULL) && - (prev->this != dht_first_up_subvol (this))) || - check_is_linkfile (NULL, (&orig_entry->d_stat), - orig_entry->dict)) { + if (check_is_dir (NULL, (&orig_entry->d_stat), NULL) && + (prev->this != local->first_up_subvol)) { + continue; + } + if (check_is_linkfile (NULL, (&orig_entry->d_stat), + orig_entry->dict, + conf->link_xattr_name)) { continue; } @@ -2942,13 +3126,16 @@ done: } if (conf->readdir_optimize == _gf_true) { - if (next_subvol != dht_first_up_subvol (this)) { + if (next_subvol != local->first_up_subvol) { ret = dict_set_int32 (local->xattr, GF_READDIR_SKIP_DIRS, 1); if (ret) gf_log (this->name, GF_LOG_ERROR, "dict set failed"); - } + } else { + dict_del (local->xattr, + GF_READDIR_SKIP_DIRS); + } } STACK_WIND (frame, dht_readdirp_cbk, @@ -3081,6 +3268,7 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (this->private, err); conf = this->private; @@ -3093,6 +3281,7 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, local->fd = fd_ref (fd); local->size = size; local->xattr_req = (dict)? dict_ref (dict) : NULL; + local->first_up_subvol = dht_first_up_subvol (this); dht_deitransform (this, yoff, &xvol, (uint64_t *)&xoff); @@ -3105,20 +3294,22 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, if (local->xattr) { ret = dict_set_uint32 (local->xattr, - "trusted.glusterfs.dht.linkto", - 256); + conf->link_xattr_name, 256); if (ret) gf_log (this->name, GF_LOG_WARNING, - "failed to set 'glusterfs.dht.linkto'" - " key"); + "failed to set '%s' key", + conf->link_xattr_name); if (conf->readdir_optimize == _gf_true) { - if (xvol != dht_first_up_subvol (this)) { + if (xvol != local->first_up_subvol) { ret = dict_set_int32 (local->xattr, GF_READDIR_SKIP_DIRS, 1); if (ret) gf_log (this->name, GF_LOG_ERROR, "Dict set failed"); + } else { + dict_del (local->xattr, + GF_READDIR_SKIP_DIRS); } } } @@ -3288,6 +3479,8 @@ dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, op_errno = EINVAL; goto out; } + if (local->linked == _gf_true) + dht_linkfile_attr_heal (frame, this); out: /* * FIXME: ia_size and st_blocks of preparent and postparent do not have @@ -3296,7 +3489,6 @@ out: * corresponding values from each of the subvolume. * See dht_iatt_merge for reference. */ - DHT_STRIP_PHASE1_FLAGS (stbuf); DHT_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, stbuf, preparent, postparent, xdata); @@ -3375,7 +3567,9 @@ dht_mknod (call_frame_t *frame, xlator_t *this, subvol, subvol->fops->mknod, loc, mode, rdev, umask, params); } else { - avail_subvol = dht_free_disk_available_subvol (this, subvol); + + avail_subvol = dht_free_disk_available_subvol (this, subvol, + local); if (avail_subvol != subvol) { /* Choose the minimum filled volume, and create the files there */ @@ -3387,7 +3581,7 @@ dht_mknod (call_frame_t *frame, xlator_t *this, local->umask = umask; dht_linkfile_create (frame, dht_mknod_linkfile_create_cbk, - avail_subvol, subvol, loc); + this, avail_subvol, subvol, loc); } else { gf_log (this->name, GF_LOG_TRACE, "creating %s on %s", loc->path, subvol->name); @@ -3556,7 +3750,10 @@ dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, dht_inode_ctx_time_update (local->loc.parent, this, postparent, 1); } - + if (local->linked == _gf_true) { + local->stbuf = *stbuf; + dht_linkfile_attr_heal (frame, this); + } out: DHT_STRIP_PHASE1_FLAGS (stbuf); DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent, @@ -3643,7 +3840,7 @@ dht_link (call_frame_t *frame, xlator_t *this, if (hashed_subvol != cached_subvol) { uuid_copy (local->gfid, oldloc->inode->gfid); - dht_linkfile_create (frame, dht_link_linkfile_cbk, + dht_linkfile_create (frame, dht_link_linkfile_cbk, this, cached_subvol, hashed_subvol, newloc); } else { STACK_WIND (frame, dht_link_cbk, @@ -3700,7 +3897,10 @@ dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, op_errno = EINVAL; goto out; } - + if (local->linked == _gf_true) { + local->stbuf = *stbuf; + dht_linkfile_attr_heal (frame, this); + } out: DHT_STRIP_PHASE1_FLAGS (stbuf); DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, stbuf, preparent, @@ -3790,7 +3990,7 @@ dht_create (call_frame_t *frame, xlator_t *this, } /* Choose the minimum filled volume, and create the files there */ - avail_subvol = dht_free_disk_available_subvol (this, subvol); + avail_subvol = dht_free_disk_available_subvol (this, subvol, local); if (avail_subvol != subvol) { local->params = dict_ref (params); local->flags = flags; @@ -3801,9 +4001,8 @@ dht_create (call_frame_t *frame, xlator_t *this, gf_log (this->name, GF_LOG_TRACE, "creating %s on %s (link at %s)", loc->path, avail_subvol->name, subvol->name); - dht_linkfile_create (frame, - dht_create_linkfile_create_cbk, - avail_subvol, subvol, loc); + dht_linkfile_create (frame, dht_create_linkfile_create_cbk, + this, avail_subvol, subvol, loc); goto done; } gf_log (this->name, GF_LOG_TRACE, @@ -3876,6 +4075,15 @@ dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ret = dht_layout_merge (this, layout, prev->this, -1, ENOSPC, NULL); } else { + if (op_ret == -1 && op_errno == EEXIST) + /* Very likely just a race between mkdir and + self-heal (from lookup of a concurrent mkdir + attempt). + Ignore error for now. layout setting will + anyways fail if this was a different (old) + pre-existing different directory. + */ + op_ret = 0; ret = dht_layout_merge (this, layout, prev->this, op_ret, op_errno, NULL); } @@ -4333,6 +4541,7 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, call_frame_t *main_frame = NULL; dht_local_t *main_local = NULL; int this_call_cnt = 0; + dht_conf_t *conf = this->private; local = frame->local; prev = cookie; @@ -4344,7 +4553,7 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (op_ret != 0) goto err; - if (check_is_linkfile (inode, stbuf, xattr) == 0) { + if (!check_is_linkfile (inode, stbuf, xattr, conf->link_xattr_name)) { main_local->op_ret = -1; main_local->op_errno = ENOTEMPTY; @@ -4379,6 +4588,7 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this, dht_local_t *lookup_local = NULL; dht_local_t *local = NULL; dict_t *xattrs = NULL; + dht_conf_t *conf = this->private; local = frame->local; @@ -4387,7 +4597,8 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this, continue; if (strcmp (trav->d_name, "..") == 0) continue; - if (check_is_linkfile (NULL, (&trav->d_stat), trav->dict)) { + if (check_is_linkfile (NULL, (&trav->d_stat), trav->dict, + conf->link_xattr_name)) { ret++; continue; } @@ -4405,7 +4616,7 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this, return -1; } - ret = dict_set_uint32 (xattrs, DHT_LINKFILE_KEY, 256); + ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256); if (ret) { gf_log (this->name, GF_LOG_ERROR, "failed to set linkto key" " in dict"); @@ -4528,6 +4739,7 @@ dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, call_frame_t *prev = NULL; dict_t *dict = NULL; int ret = 0; + dht_conf_t *conf = this->private; local = frame->local; prev = cookie; @@ -4551,12 +4763,11 @@ dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto err; } - ret = dict_set_uint32 (dict, - "trusted.glusterfs.dht.linkto", 256); + ret = dict_set_uint32 (dict, conf->link_xattr_name, 256); if (ret) gf_log (this->name, GF_LOG_WARNING, - "%s: failed to set 'trusted.glusterfs.dht.linkto' key", - local->loc.path); + "%s: failed to set '%s' key", + local->loc.path, conf->link_xattr_name); STACK_WIND (frame, dht_rmdir_readdirp_cbk, prev->this, prev->this->fops->readdirp, @@ -4655,7 +4866,6 @@ dht_entrylk (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (loc, err); VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); local = dht_local_init (frame, loc, NULL, GF_FOP_ENTRYLK); if (!local) { @@ -4936,15 +5146,7 @@ unlock: or wait for anything else. Just propagate blindly */ if (have_heard_from_all) { propagate = 1; - if (conf->defrag) { - ret = pthread_create (&conf->defrag->th, NULL, - gf_defrag_start, this); - if (ret) { - conf->defrag = NULL; - GF_FREE (conf->defrag); - kill (getpid(), SIGTERM); - } - } + } @@ -4966,6 +5168,19 @@ unlock: /* continue to check other events for CHILD_UP */ } } + + /* rebalance is started with assert_no_child_down. So we do + * not need to handle CHILD_DOWN event here. + */ + if (conf->defrag) { + ret = gf_thread_create (&conf->defrag->th, NULL, + gf_defrag_start, this); + if (ret) { + conf->defrag = NULL; + GF_FREE (conf->defrag); + kill (getpid(), SIGTERM); + } + } } ret = 0; |
