diff options
Diffstat (limited to 'xlators/cluster/afr/src/afr-common.c')
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 96 |
1 files changed, 83 insertions, 13 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 8dbdb572abd..032ab5c8001 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -45,6 +45,41 @@ afr_quorum_errno(afr_private_t *priv) return ENOTCONN; } +gf_boolean_t +afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name, + pid_t pid) +{ + if (!__is_root_gfid(pargfid)) { + return _gf_false; + } + + if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) { + /*For backward compatibility /.landfill is private*/ + return _gf_true; + } + + if (pid == GF_CLIENT_PID_GSYNCD) { + /*geo-rep needs to create/sync private directory on slave because + * it appears in changelog*/ + return _gf_false; + } + + if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) { + if (strcmp(name, priv->anon_inode_name) == 0) { + /* anonymous-inode dir is private*/ + return _gf_true; + } + } else { + if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) == + 0) { + /* anonymous-inode dir prefix is private for geo-rep to work*/ + return _gf_true; + } + } + + return _gf_false; +} + void afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, unsigned char *replies) @@ -1197,12 +1232,11 @@ afr_inode_get_readable(call_frame_t *frame, inode_t *inode, xlator_t *this, return 0; } -int +static int afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this, int *spb_choice) { int ret = -1; - GF_VALIDATE_OR_GOTO(this->name, inode, out); LOCK(&inode->lock); @@ -1214,6 +1248,40 @@ out: return ret; } +/* + * frame is used to get the favourite policy. Since + * afr_inode_split_brain_choice_get was called with afr_open, it is possible to + * have a frame with out local->replies. So in that case, frame is passed as + * null, hence this function will handle the frame NULL case. + */ +int +afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this, + call_frame_t *frame, int *spb_subvol) +{ + int ret = -1; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("afr", this, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, spb_subvol, out); + + priv = this->private; + + ret = afr_inode_split_brain_choice_get(inode, this, spb_subvol); + if (*spb_subvol < 0 && priv->fav_child_policy && frame && frame->local) { + local = frame->local; + *spb_subvol = afr_sh_get_fav_by_policy(this, local->replies, inode, + NULL); + if (*spb_subvol >= 0) { + ret = 0; + } + } + +out: + return ret; +} int afr_inode_read_subvol_set(inode_t *inode, xlator_t *this, unsigned char *data, unsigned char *metadata, int event) @@ -2238,8 +2306,9 @@ afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv, * need is a low probability that multiple clients * won't converge on the same subvolume. */ + gf_uuid_copy(gfid_copy, args->gfid); pid = getpid(); - memcpy(gfid_copy, &pid, sizeof(pid)); + *(pid_t *)gfid_copy ^= pid; } child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) % priv->child_count; @@ -2823,7 +2892,7 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this, { afr_private_t *priv = NULL; afr_local_t *local = NULL; - int spb_choice = -1; + int spb_subvol = -1; int child_count = -1; if (*read_subvol != -1) @@ -2833,10 +2902,10 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this, local = frame->local; child_count = priv->child_count; - afr_inode_split_brain_choice_get(local->inode, this, &spb_choice); - if ((spb_choice >= 0) && + afr_split_brain_read_subvol_get(local->inode, this, frame, &spb_subvol); + if ((spb_subvol >= 0) && (AFR_COUNT(success_replies, child_count) == child_count)) { - *read_subvol = spb_choice; + *read_subvol = spb_subvol; } else if (!priv->quorum_count || frame->root->pid == GF_CLIENT_PID_GLFS_HEAL) { *read_subvol = afr_first_up_child(frame, this); @@ -3635,7 +3704,7 @@ afr_ta_id_file_check(void *opaque) this = opaque; priv = this->private; - ret = afr_fill_ta_loc(this, &loc); + ret = afr_fill_ta_loc(this, &loc, _gf_false); if (ret) { gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, "Failed to populate thin-arbiter loc for: %s.", loc.name); @@ -3945,11 +4014,10 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) return 0; } - if (__is_root_gfid(loc->parent->gfid)) { - if (!strcmp(loc->name, GF_REPLICATE_TRASH_DIR)) { - op_errno = EPERM; - goto out; - } + if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name, + frame->root->pid)) { + op_errno = EPERM; + goto out; } local = AFR_FRAME_INIT(frame, op_errno); @@ -5627,6 +5695,7 @@ afr_priv_dump(xlator_t *this) priv->background_self_heal_count); gf_proc_dump_write("healers", "%d", priv->healers); gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode); + gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode); if (priv->quorum_count == AFR_QUORUM_AUTO) { gf_proc_dump_write("quorum-type", "auto"); } else if (priv->quorum_count == 0) { @@ -6620,6 +6689,7 @@ afr_priv_destroy(afr_private_t *priv) GF_FREE(priv->local); GF_FREE(priv->pending_key); GF_FREE(priv->children); + GF_FREE(priv->anon_inode); GF_FREE(priv->child_up); GF_FREE(priv->halo_child_up); GF_FREE(priv->child_latency); |