summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src/afr.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster/afr/src/afr.c')
-rw-r--r--xlators/cluster/afr/src/afr.c1759
1 files changed, 1208 insertions, 551 deletions
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index ea686dfde19..df7366f0a65 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -15,673 +15,1330 @@
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
#include "afr-common.c"
-
-#define SHD_INODE_LRU_LIMIT 2048
-#define AFR_EH_HEALED_LIMIT 1024
-#define AFR_EH_HEAL_FAIL_LIMIT 1024
-#define AFR_EH_SPLIT_BRAIN_LIMIT 1024
+#include "afr-messages.h"
struct volume_options options[];
+static char *afr_favorite_child_policies[AFR_FAV_CHILD_POLICY_MAX + 1] = {
+ [AFR_FAV_CHILD_NONE] = "none",
+ [AFR_FAV_CHILD_BY_SIZE] = "size",
+ [AFR_FAV_CHILD_BY_CTIME] = "ctime",
+ [AFR_FAV_CHILD_BY_MTIME] = "mtime",
+ [AFR_FAV_CHILD_BY_MAJORITY] = "majority",
+ [AFR_FAV_CHILD_POLICY_MAX] = NULL,
+};
+
int32_t
-notify (xlator_t *this, int32_t event,
- void *data, ...)
+notify(xlator_t *this, int32_t event, void *data, ...)
{
- int ret = -1;
- va_list ap;
- void *data2 = NULL;
+ int ret = -1;
+ va_list ap;
+ void *data2 = NULL;
- va_start (ap, data);
- data2 = va_arg (ap, dict_t*);
- va_end (ap);
- ret = afr_notify (this, event, data, data2);
+ va_start(ap, data);
+ data2 = va_arg(ap, dict_t *);
+ va_end(ap);
+ ret = afr_notify(this, event, data, data2);
- return ret;
+ return ret;
}
int32_t
-mem_acct_init (xlator_t *this)
+mem_acct_init(xlator_t *this)
{
- int ret = -1;
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init(this, gf_afr_mt_end + 1);
- if (!this)
- return ret;
+ if (ret != 0) {
+ return ret;
+ }
- ret = xlator_mem_acct_init (this, gf_afr_mt_end + 1);
+ return ret;
+}
- if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
+int
+xlator_subvolume_index(xlator_t *this, xlator_t *subvol)
+{
+ int index = -1;
+ int i = 0;
+ xlator_list_t *list = NULL;
+
+ list = this->children;
+
+ while (list) {
+ if (subvol == list->xlator ||
+ strcmp(subvol->name, list->xlator->name) == 0) {
+ index = i;
+ break;
}
+ list = list->next;
+ i++;
+ }
- return ret;
+ return index;
}
+static void
+fix_quorum_options(xlator_t *this, afr_private_t *priv, char *qtype,
+ dict_t *options)
+{
+ if (dict_get_sizen(options, "quorum-type") == NULL) {
+ /* If user doesn't configure anything enable auto-quorum if the
+ * replica has more than two subvolumes */
+ if (priv->child_count > 2)
+ qtype = "auto";
+ }
+
+ if (priv->quorum_count && strcmp(qtype, "fixed")) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_OVERRIDE,
+ "quorum-type %s overriding quorum-count %u", qtype,
+ priv->quorum_count);
+ }
+
+ if (!strcmp(qtype, "none")) {
+ priv->quorum_count = 0;
+ } else if (!strcmp(qtype, "auto")) {
+ priv->quorum_count = AFR_QUORUM_AUTO;
+ }
+}
int
-xlator_subvolume_index (xlator_t *this, xlator_t *subvol)
+afr_set_favorite_child_policy(afr_private_t *priv, char *policy)
{
- int index = -1;
- int i = 0;
- xlator_list_t *list = NULL;
-
- list = this->children;
-
- while (list) {
- if (subvol == list->xlator ||
- strcmp (subvol->name, list->xlator->name) == 0) {
- index = i;
- break;
- }
- list = list->next;
- i++;
- }
+ int index = -1;
+
+ index = gf_get_index_by_elem(afr_favorite_child_policies, policy);
+ if (index < 0 || index >= AFR_FAV_CHILD_POLICY_MAX)
+ return -1;
+
+ priv->fav_child_policy = index;
+
+ return 0;
+}
- return index;
+static void
+set_data_self_heal_algorithm(afr_private_t *priv, char *algo)
+{
+ if (!algo) {
+ priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_DYNAMIC;
+ } else if (strcmp(algo, "full") == 0) {
+ priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_FULL;
+ } else if (strcmp(algo, "diff") == 0) {
+ priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_DIFF;
+ } else {
+ priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_DYNAMIC;
+ }
}
void
-fix_quorum_options (xlator_t *this, afr_private_t *priv, char *qtype)
+afr_handle_anon_inode_options(afr_private_t *priv, dict_t *options)
{
- if (priv->quorum_count && strcmp(qtype,"fixed")) {
- gf_log(this->name,GF_LOG_WARNING,
- "quorum-type %s overriding quorum-count %u",
- qtype, priv->quorum_count);
- }
- if (!strcmp(qtype,"none")) {
- priv->quorum_count = 0;
- }
- else if (!strcmp(qtype,"auto")) {
- priv->quorum_count = AFR_QUORUM_AUTO;
- }
+ char *volfile_id_str = NULL;
+ uuid_t anon_inode_gfid = {0};
+
+ /*If volume id is not present don't enable anything*/
+ if (dict_get_str(options, "volume-id", &volfile_id_str))
+ return;
+ GF_ASSERT(strlen(AFR_ANON_DIR_PREFIX) + strlen(volfile_id_str) <= NAME_MAX);
+ /*anon_inode_name is not supposed to change once assigned*/
+ if (!priv->anon_inode_name[0]) {
+ snprintf(priv->anon_inode_name, sizeof(priv->anon_inode_name), "%s-%s",
+ AFR_ANON_DIR_PREFIX, volfile_id_str);
+ gf_uuid_parse(volfile_id_str, anon_inode_gfid);
+ /*Flip a bit to make sure volfile-id and anon-gfid are not same*/
+ anon_inode_gfid[0] ^= 1;
+ uuid_utoa_r(anon_inode_gfid, priv->anon_gfid_str);
+ }
}
int
-reconfigure (xlator_t *this, dict_t *options)
+reconfigure(xlator_t *this, dict_t *options)
{
- afr_private_t *priv = NULL;
- xlator_t *read_subvol = NULL;
- int read_subvol_index = -1;
- int ret = -1;
- int index = -1;
- char *qtype = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t *read_subvol = NULL;
+ int read_subvol_index = -1;
+ int timeout_old = 0;
+ int ret = -1;
+ int index = -1;
+ char *qtype = NULL;
+ char *fav_child_policy = NULL;
+ char *data_self_heal = NULL;
+ char *data_self_heal_algorithm = NULL;
+ char *locking_scheme = NULL;
+ gf_boolean_t consistent_io = _gf_false;
+ gf_boolean_t choose_local_old = _gf_false;
+ gf_boolean_t enabled_old = _gf_false;
- priv = this->private;
+ priv = this->private;
- GF_OPTION_RECONF ("background-self-heal-count",
- priv->background_self_heal_count, options, uint32,
- out);
+ GF_OPTION_RECONF("metadata-splitbrain-forced-heal",
+ priv->metadata_splitbrain_forced_heal, options, bool, out);
- GF_OPTION_RECONF ("metadata-self-heal",
- priv->metadata_self_heal, options, bool, out);
+ GF_OPTION_RECONF("background-self-heal-count",
+ priv->background_self_heal_count, options, uint32, out);
- GF_OPTION_RECONF ("data-self-heal", priv->data_self_heal, options, str,
- out);
+ GF_OPTION_RECONF("heal-wait-queue-length", priv->heal_wait_qlen, options,
+ uint32, out);
- GF_OPTION_RECONF ("entry-self-heal", priv->entry_self_heal, options,
- bool, out);
+ GF_OPTION_RECONF("metadata-self-heal", priv->metadata_self_heal, options,
+ bool, out);
- GF_OPTION_RECONF ("strict-readdir", priv->strict_readdir, options, bool,
- out);
+ GF_OPTION_RECONF("data-self-heal", data_self_heal, options, str, out);
+ if (gf_string2boolean(data_self_heal, &priv->data_self_heal) == -1)
+ goto out;
- GF_OPTION_RECONF ("data-self-heal-window-size",
- priv->data_self_heal_window_size, options,
- uint32, out);
+ GF_OPTION_RECONF("entry-self-heal", priv->entry_self_heal, options, bool,
+ out);
- GF_OPTION_RECONF ("data-change-log", priv->data_change_log, options,
- bool, out);
+ GF_OPTION_RECONF("data-self-heal-window-size",
+ priv->data_self_heal_window_size, options, uint32, out);
- GF_OPTION_RECONF ("metadata-change-log",
- priv->metadata_change_log, options, bool, out);
+ GF_OPTION_RECONF("data-self-heal-algorithm", data_self_heal_algorithm,
+ options, str, out);
+ set_data_self_heal_algorithm(priv, data_self_heal_algorithm);
- GF_OPTION_RECONF ("entry-change-log", priv->entry_change_log, options,
- bool, out);
+ GF_OPTION_RECONF("halo-enabled", priv->halo_enabled, options, bool, out);
- GF_OPTION_RECONF ("data-self-heal-algorithm",
- priv->data_self_heal_algorithm, options, str, out);
+ GF_OPTION_RECONF("halo-shd-max-latency", priv->shd.halo_max_latency_msec,
+ options, uint32, out);
- GF_OPTION_RECONF ("self-heal-daemon", priv->shd.enabled, options, bool, out);
+ GF_OPTION_RECONF("halo-nfsd-max-latency", priv->nfsd.halo_max_latency_msec,
+ options, uint32, out);
- GF_OPTION_RECONF ("read-subvolume", read_subvol, options, xlator, out);
+ GF_OPTION_RECONF("halo-max-latency", priv->halo_max_latency_msec, options,
+ uint32, out);
- GF_OPTION_RECONF ("read-hash-mode", priv->hash_mode,
- options, uint32, out);
+ GF_OPTION_RECONF("halo-max-replicas", priv->halo_max_replicas, options,
+ uint32, out);
- if (read_subvol) {
- index = xlator_subvolume_index (this, read_subvol);
- if (index == -1) {
- gf_log (this->name, GF_LOG_ERROR, "%s not a subvolume",
- read_subvol->name);
- goto out;
- }
- priv->read_child = index;
- }
+ GF_OPTION_RECONF("halo-min-replicas", priv->halo_min_replicas, options,
+ uint32, out);
+
+ GF_OPTION_RECONF("read-subvolume", read_subvol, options, xlator, out);
- GF_OPTION_RECONF ("read-subvolume-index",read_subvol_index, options,int32,out);
+ choose_local_old = priv->choose_local;
+ GF_OPTION_RECONF("choose-local", priv->choose_local, options, bool, out);
- if (read_subvol_index >-1) {
- index=read_subvol_index;
- if (index >= priv->child_count) {
- gf_log (this->name, GF_LOG_ERROR, "%d not a subvolume-index",
- index);
- goto out;
- }
- priv->read_child = index;
+ if (choose_local_old != priv->choose_local) {
+ priv->read_child = -1;
+ if (choose_local_old == _gf_false)
+ priv->did_discovery = _gf_false;
+ }
+
+ GF_OPTION_RECONF("read-hash-mode", priv->hash_mode, options, uint32, out);
+
+ if (read_subvol) {
+ index = xlator_subvolume_index(this, read_subvol);
+ if (index == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
+ "%s not a subvolume", read_subvol->name);
+ goto out;
}
+ priv->read_child = index;
+ }
+
+ GF_OPTION_RECONF("read-subvolume-index", read_subvol_index, options, int32,
+ out);
+
+ if (read_subvol_index > -1) {
+ index = read_subvol_index;
+ if (index >= priv->child_count) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
+ "%d not a subvolume-index", index);
+ goto out;
+ }
+ priv->read_child = index;
+ }
+
+ GF_OPTION_RECONF("pre-op-compat", priv->pre_op_compat, options, bool, out);
+ GF_OPTION_RECONF("locking-scheme", locking_scheme, options, str, out);
+ priv->granular_locks = (strcmp(locking_scheme, "granular") == 0);
+ GF_OPTION_RECONF("full-lock", priv->full_lock, options, bool, out);
+ GF_OPTION_RECONF("granular-entry-heal", priv->esh_granular, options, bool,
+ out);
+
+ GF_OPTION_RECONF("eager-lock", priv->eager_lock, options, bool, out);
+ GF_OPTION_RECONF("optimistic-change-log", priv->optimistic_change_log,
+ options, bool, out);
+ GF_OPTION_RECONF("quorum-type", qtype, options, str, out);
+ GF_OPTION_RECONF("quorum-count", priv->quorum_count, options, uint32, out);
+ fix_quorum_options(this, priv, qtype, options);
+ if (priv->quorum_count && !afr_has_quorum(priv->child_up, this, NULL))
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL,
+ "Client-quorum is not met");
+
+ GF_OPTION_RECONF("post-op-delay-secs", priv->post_op_delay_secs, options,
+ uint32, out);
+
+ GF_OPTION_RECONF(AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, options,
+ size_uint64, out);
+ /* Reset this so we re-discover in case the topology changed. */
+ GF_OPTION_RECONF("ensure-durability", priv->ensure_durability, options,
+ bool, out);
+
+ enabled_old = priv->shd.enabled;
+ GF_OPTION_RECONF("self-heal-daemon", priv->shd.enabled, options, bool, out);
+
+ GF_OPTION_RECONF("iam-self-heal-daemon", priv->shd.iamshd, options, bool,
+ out);
+
+ timeout_old = priv->shd.timeout;
+ GF_OPTION_RECONF("heal-timeout", priv->shd.timeout, options, int32, out);
+
+ GF_OPTION_RECONF("consistent-metadata", priv->consistent_metadata, options,
+ bool, out);
+
+ GF_OPTION_RECONF("shd-max-threads", priv->shd.max_threads, options, uint32,
+ out);
+
+ GF_OPTION_RECONF("shd-wait-qlength", priv->shd.wait_qlength, options,
+ uint32, out);
+
+ GF_OPTION_RECONF("favorite-child-policy", fav_child_policy, options, str,
+ out);
+ if (afr_set_favorite_child_policy(priv, fav_child_policy) == -1)
+ goto out;
+
+ priv->did_discovery = _gf_false;
+
+ GF_OPTION_RECONF("consistent-io", consistent_io, options, bool, out);
+ if (priv->quorum_count != 0)
+ consistent_io = _gf_false;
+ priv->consistent_io = consistent_io;
+
+ afr_handle_anon_inode_options(priv, options);
+
+ GF_OPTION_RECONF("use-anonymous-inode", priv->use_anon_inode, options, bool,
+ out);
+ if (priv->shd.enabled) {
+ if ((priv->shd.enabled != enabled_old) ||
+ (timeout_old != priv->shd.timeout))
+ afr_selfheal_childup(this, priv);
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
- GF_OPTION_RECONF ("eager-lock", priv->eager_lock, options, bool, out);
- GF_OPTION_RECONF ("quorum-type", qtype, options, str, out);
- GF_OPTION_RECONF ("quorum-count", priv->quorum_count, options,
- uint32, out);
- fix_quorum_options(this,priv,qtype);
- GF_OPTION_RECONF ("heal-timeout", priv->shd.timeout, options,
- int32, out);
+static int
+afr_pending_xattrs_init(afr_private_t *priv, xlator_t *this)
+{
+ int ret = -1;
+ int i = 0;
+ char *ptr = NULL;
+ char *ptr1 = NULL;
+ char *xattrs_list = NULL;
+ xlator_list_t *trav = NULL;
+ int child_count = -1;
+
+ trav = this->children;
+ child_count = priv->child_count;
+ if (priv->thin_arbiter_count) {
+ /* priv->pending_key[THIN_ARBITER_BRICK_INDEX] is used as the
+ * name of the thin arbiter file for persistence across add/
+ * removal of DHT subvols.*/
+ child_count++;
+ }
+
+ GF_OPTION_INIT("afr-pending-xattr", xattrs_list, str, out);
+ priv->pending_key = GF_CALLOC(sizeof(*priv->pending_key), child_count,
+ gf_afr_mt_char);
+ if (!priv->pending_key) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (!xattrs_list) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_NO_CHANGELOG,
+ "Unable to fetch afr-pending-xattr option from volfile."
+ " Falling back to using client translator names. ");
- GF_OPTION_RECONF ("post-op-delay-secs", priv->post_op_delay_secs, options,
- uint32, out);
+ while (i < child_count) {
+ ret = gf_asprintf(&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX,
+ trav->xlator->name);
+ if (ret == -1) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ trav = trav->next;
+ i++;
+ }
+ ret = 0;
+ goto out;
+ }
- GF_OPTION_RECONF (AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size,
- options, size, out);
- /* Reset this so we re-discover in case the topology changed. */
- priv->did_discovery = _gf_false;
+ ptr = ptr1 = gf_strdup(xattrs_list);
+ if (!ptr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ for (i = 0, ptr = strtok(ptr, ","); ptr; ptr = strtok(NULL, ",")) {
+ ret = gf_asprintf(&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX,
+ ptr);
+ if (ret == -1) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ i++;
+ }
+ ret = 0;
- ret = 0;
out:
- return ret;
-
+ GF_FREE(ptr1);
+ return ret;
}
-
-static const char *favorite_child_warning_str = "You have specified subvolume '%s' "
- "as the 'favorite child'. This means that if a discrepancy in the content "
- "or attributes (ownership, permission, etc.) of a file is detected among "
- "the subvolumes, the file on '%s' will be considered the definitive "
- "version and its contents will OVERWRITE the contents of the file on other "
- "subvolumes. All versions of the file except that on '%s' "
- "WILL BE LOST.";
-
+void
+afr_ta_init(afr_private_t *priv)
+{
+ priv->thin_arbiter_count = 1;
+ priv->child_count--;
+ priv->ta_child_up = 0;
+ priv->ta_bad_child_index = AFR_CHILD_UNKNOWN;
+ priv->ta_notify_dom_lock_offset = 0;
+ priv->ta_in_mem_txn_count = 0;
+ priv->ta_on_wire_txn_count = 0;
+ priv->release_ta_notify_dom_lock = _gf_false;
+ INIT_LIST_HEAD(&priv->ta_waitq);
+ INIT_LIST_HEAD(&priv->ta_onwireq);
+ gf_uuid_clear(priv->ta_gfid);
+}
int32_t
-init (xlator_t *this)
+init(xlator_t *this)
{
- afr_private_t *priv = NULL;
- int child_count = 0;
- xlator_list_t *trav = NULL;
- int i = 0;
- int ret = -1;
- GF_UNUSED int op_errno = 0;
- xlator_t *read_subvol = NULL;
- int read_subvol_index = -1;
- xlator_t *fav_child = NULL;
- char *qtype = NULL;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "replicate translator needs more than one "
- "subvolume defined.");
- return -1;
+ afr_private_t *priv = NULL;
+ int child_count = 0;
+ xlator_list_t *trav = NULL;
+ int i = 0;
+ int ret = -1;
+ GF_UNUSED int op_errno = 0;
+ xlator_t *read_subvol = NULL;
+ int read_subvol_index = -1;
+ char *qtype = NULL;
+ char *fav_child_policy = NULL;
+ char *thin_arbiter = NULL;
+ char *data_self_heal = NULL;
+ char *locking_scheme = NULL;
+ char *data_self_heal_algorithm = NULL;
+
+ if (!this->children) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_CHILD_MISCONFIGURED,
+ "replicate translator needs more than one "
+ "subvolume defined.");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_VOL_MISCONFIGURED,
+ "Volume is dangling.");
+ }
+
+ this->private = GF_CALLOC(1, sizeof(afr_private_t),
+ gf_afr_mt_afr_private_t);
+ if (!this->private)
+ goto out;
+
+ priv = this->private;
+ INIT_LIST_HEAD(&priv->saved_locks);
+ INIT_LIST_HEAD(&priv->lk_healq);
+ LOCK_INIT(&priv->lock);
+
+ child_count = xlator_subvolume_count(this);
+
+ priv->child_count = child_count;
+
+ priv->read_child = -1;
+
+ GF_OPTION_INIT("arbiter-count", priv->arbiter_count, uint32, out);
+ GF_OPTION_INIT("thin-arbiter", thin_arbiter, str, out);
+ if (thin_arbiter && strlen(thin_arbiter) > 0) {
+ afr_ta_init(priv);
+ }
+ INIT_LIST_HEAD(&priv->healing);
+ INIT_LIST_HEAD(&priv->heal_waiting);
+
+ priv->spb_choice_timeout = AFR_DEFAULT_SPB_CHOICE_TIMEOUT;
+
+ GF_OPTION_INIT("afr-dirty-xattr", priv->afr_dirty, str, out);
+
+ GF_OPTION_INIT("metadata-splitbrain-forced-heal",
+ priv->metadata_splitbrain_forced_heal, bool, out);
+
+ GF_OPTION_INIT("read-subvolume", read_subvol, xlator, out);
+ if (read_subvol) {
+ priv->read_child = xlator_subvolume_index(this, read_subvol);
+ if (priv->read_child == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
+ "%s not a subvolume", read_subvol->name);
+ goto out;
}
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "Volume is dangling.");
+ }
+ GF_OPTION_INIT("read-subvolume-index", read_subvol_index, int32, out);
+ if (read_subvol_index > -1) {
+ if (read_subvol_index >= priv->child_count) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
+ "%d not a subvolume-index", read_subvol_index);
+ goto out;
}
+ priv->read_child = read_subvol_index;
+ }
+ GF_OPTION_INIT("choose-local", priv->choose_local, bool, out);
- this->private = GF_CALLOC (1, sizeof (afr_private_t),
- gf_afr_mt_afr_private_t);
- if (!this->private)
- goto out;
-
- priv = this->private;
- LOCK_INIT (&priv->lock);
- LOCK_INIT (&priv->read_child_lock);
- //lock recovery is not done in afr
- pthread_mutex_init (&priv->mutex, NULL);
- INIT_LIST_HEAD (&priv->saved_fds);
+ priv->pending_reads = GF_CALLOC(sizeof(*priv->pending_reads),
+ priv->child_count, gf_afr_mt_atomic_t);
- child_count = xlator_subvolume_count (this);
+ GF_OPTION_INIT("read-hash-mode", priv->hash_mode, uint32, out);
- priv->child_count = child_count;
-
- priv->read_child = -1;
+ priv->favorite_child = -1;
- GF_OPTION_INIT ("read-subvolume", read_subvol, xlator, out);
- if (read_subvol) {
- priv->read_child = xlator_subvolume_index (this, read_subvol);
- if (priv->read_child == -1) {
- gf_log (this->name, GF_LOG_ERROR, "%s not a subvolume",
- read_subvol->name);
- goto out;
- }
- }
- GF_OPTION_INIT ("read-subvolume-index",read_subvol_index,int32,out);
- if (read_subvol_index > -1) {
- if (read_subvol_index >= priv->child_count) {
- gf_log (this->name, GF_LOG_ERROR, "%d not a subvolume-index",
- read_subvol_index);
- goto out;
- }
- priv->read_child = read_subvol_index;
- }
- GF_OPTION_INIT ("choose-local", priv->choose_local, bool, out);
-
- GF_OPTION_INIT ("read-hash-mode", priv->hash_mode, uint32, out);
-
- priv->favorite_child = -1;
- GF_OPTION_INIT ("favorite-child", fav_child, xlator, out);
- if (fav_child) {
- priv->favorite_child = xlator_subvolume_index (this, fav_child);
- if (priv->favorite_child == -1) {
- gf_log (this->name, GF_LOG_ERROR, "%s not a subvolume",
- fav_child->name);
- goto out;
- }
- gf_log (this->name, GF_LOG_WARNING,
- favorite_child_warning_str, fav_child->name,
- fav_child->name, fav_child->name);
- }
+ GF_OPTION_INIT("favorite-child-policy", fav_child_policy, str, out);
+ if (afr_set_favorite_child_policy(priv, fav_child_policy) == -1)
+ goto out;
+ GF_OPTION_INIT("shd-max-threads", priv->shd.max_threads, uint32, out);
- GF_OPTION_INIT ("background-self-heal-count",
- priv->background_self_heal_count, uint32, out);
+ GF_OPTION_INIT("shd-wait-qlength", priv->shd.wait_qlength, uint32, out);
- GF_OPTION_INIT ("data-self-heal", priv->data_self_heal, str, out);
+ GF_OPTION_INIT("background-self-heal-count",
+ priv->background_self_heal_count, uint32, out);
- GF_OPTION_INIT ("data-self-heal-algorithm",
- priv->data_self_heal_algorithm, str, out);
+ GF_OPTION_INIT("heal-wait-queue-length", priv->heal_wait_qlen, uint32, out);
- GF_OPTION_INIT ("data-self-heal-window-size",
- priv->data_self_heal_window_size, uint32, out);
+ GF_OPTION_INIT("data-self-heal", data_self_heal, str, out);
+ if (gf_string2boolean(data_self_heal, &priv->data_self_heal) == -1)
+ goto out;
- GF_OPTION_INIT ("metadata-self-heal", priv->metadata_self_heal, bool,
- out);
+ GF_OPTION_INIT("data-self-heal-algorithm", data_self_heal_algorithm, str,
+ out);
+ set_data_self_heal_algorithm(priv, data_self_heal_algorithm);
- GF_OPTION_INIT ("entry-self-heal", priv->entry_self_heal, bool, out);
+ GF_OPTION_INIT("data-self-heal-window-size",
+ priv->data_self_heal_window_size, uint32, out);
- GF_OPTION_INIT ("self-heal-daemon", priv->shd.enabled, bool, out);
+ GF_OPTION_INIT("metadata-self-heal", priv->metadata_self_heal, bool, out);
- GF_OPTION_INIT ("iam-self-heal-daemon", priv->shd.iamshd, bool, out);
+ GF_OPTION_INIT("entry-self-heal", priv->entry_self_heal, bool, out);
- GF_OPTION_INIT ("data-change-log", priv->data_change_log, bool, out);
+ GF_OPTION_INIT("halo-shd-max-latency", priv->shd.halo_max_latency_msec,
+ uint32, out);
- GF_OPTION_INIT ("metadata-change-log", priv->metadata_change_log, bool,
- out);
+ GF_OPTION_INIT("halo-max-latency", priv->halo_max_latency_msec, uint32,
+ out);
+ GF_OPTION_INIT("halo-max-replicas", priv->halo_max_replicas, uint32, out);
+ GF_OPTION_INIT("halo-min-replicas", priv->halo_min_replicas, uint32, out);
- GF_OPTION_INIT ("entry-change-log", priv->entry_change_log, bool, out);
+ GF_OPTION_INIT("halo-enabled", priv->halo_enabled, bool, out);
- GF_OPTION_INIT ("optimistic-change-log", priv->optimistic_change_log,
- bool, out);
+ GF_OPTION_INIT("halo-nfsd-max-latency", priv->nfsd.halo_max_latency_msec,
+ uint32, out);
- GF_OPTION_INIT ("inodelk-trace", priv->inodelk_trace, bool, out);
+ GF_OPTION_INIT("iam-nfs-daemon", priv->nfsd.iamnfsd, bool, out);
- GF_OPTION_INIT ("entrylk-trace", priv->entrylk_trace, bool, out);
+ GF_OPTION_INIT("optimistic-change-log", priv->optimistic_change_log, bool,
+ out);
- GF_OPTION_INIT ("strict-readdir", priv->strict_readdir, bool, out);
+ GF_OPTION_INIT("pre-op-compat", priv->pre_op_compat, bool, out);
+ GF_OPTION_INIT("locking-scheme", locking_scheme, str, out);
+ priv->granular_locks = (strcmp(locking_scheme, "granular") == 0);
+ GF_OPTION_INIT("full-lock", priv->full_lock, bool, out);
+ GF_OPTION_INIT("granular-entry-heal", priv->esh_granular, bool, out);
- GF_OPTION_INIT ("eager-lock", priv->eager_lock, bool, out);
- GF_OPTION_INIT ("quorum-type", qtype, str, out);
- GF_OPTION_INIT ("quorum-count", priv->quorum_count, uint32, out);
- GF_OPTION_INIT (AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, size,
- out);
- fix_quorum_options(this,priv,qtype);
+ GF_OPTION_INIT("eager-lock", priv->eager_lock, bool, out);
+ GF_OPTION_INIT("quorum-type", qtype, str, out);
+ GF_OPTION_INIT("quorum-count", priv->quorum_count, uint32, out);
+ GF_OPTION_INIT(AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, size_uint64,
+ out);
+ fix_quorum_options(this, priv, qtype, this->options);
- GF_OPTION_INIT ("post-op-delay-secs", priv->post_op_delay_secs, uint32, out);
+ GF_OPTION_INIT("post-op-delay-secs", priv->post_op_delay_secs, uint32, out);
+ GF_OPTION_INIT("ensure-durability", priv->ensure_durability, bool, out);
- priv->wait_count = 1;
+ GF_OPTION_INIT("self-heal-daemon", priv->shd.enabled, bool, out);
- priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
- gf_afr_mt_char);
- if (!priv->child_up) {
- ret = -ENOMEM;
- goto out;
- }
+ GF_OPTION_INIT("iam-self-heal-daemon", priv->shd.iamshd, bool, out);
+ GF_OPTION_INIT("heal-timeout", priv->shd.timeout, int32, out);
- for (i = 0; i < child_count; i++)
- priv->child_up[i] = -1; /* start with unknown state.
- this initialization needed
- for afr_notify() to work
- reliably
- */
+ GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out);
+ GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out);
+ afr_handle_anon_inode_options(priv, this->options);
- priv->children = GF_CALLOC (sizeof (xlator_t *), child_count,
- gf_afr_mt_xlator_t);
- if (!priv->children) {
- ret = -ENOMEM;
- goto out;
- }
+ GF_OPTION_INIT("use-anonymous-inode", priv->use_anon_inode, bool, out);
+ if (priv->quorum_count != 0)
+ priv->consistent_io = _gf_false;
- priv->pending_key = GF_CALLOC (sizeof (*priv->pending_key),
- child_count,
- gf_afr_mt_char);
- if (!priv->pending_key) {
- ret = -ENOMEM;
- goto out;
- }
+ priv->wait_count = 1;
- trav = this->children;
- i = 0;
- while (i < child_count) {
- priv->children[i] = trav->xlator;
-
- ret = gf_asprintf (&priv->pending_key[i], "%s.%s",
- AFR_XATTR_PREFIX,
- trav->xlator->name);
- if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed to set pending key");
- ret = -ENOMEM;
- goto out;
- }
-
- trav = trav->next;
- i++;
- }
+ priv->local = GF_CALLOC(sizeof(unsigned char), child_count, gf_afr_mt_char);
+ if (!priv->local) {
+ ret = -ENOMEM;
+ goto out;
+ }
- priv->last_event = GF_CALLOC (child_count, sizeof (*priv->last_event),
- gf_afr_mt_int32_t);
- if (!priv->last_event) {
- ret = -ENOMEM;
- goto out;
- }
+ priv->anon_inode = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
- /* keep more local here as we may need them for self-heal etc */
- this->local_pool = mem_pool_new (afr_local_t, 512);
- if (!this->local_pool) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- goto out;
- }
+ priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
- priv->first_lookup = 1;
- priv->root_inode = NULL;
+ priv->child_latency = GF_MALLOC(sizeof(*priv->child_latency) * child_count,
+ gf_afr_mt_child_latency_t);
+ priv->halo_child_up = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
- if (!priv->shd.iamshd) {
- ret = 0;
- goto out;
- }
+ if (!priv->child_up || !priv->child_latency || !priv->halo_child_up ||
+ !priv->anon_inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ /*Initialize to -ve ping timeout so that they are not considered
+ * in child-up events until ping-event comes*/
+ for (i = 0; i < child_count; i++)
+ priv->child_latency[i] = -1;
+
+ priv->children = GF_CALLOC(sizeof(xlator_t *), child_count,
+ gf_afr_mt_xlator_t);
+ if (!priv->children) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = afr_pending_xattrs_init(priv, this);
+ if (ret)
+ goto out;
+
+ trav = this->children;
+ i = 0;
+ while (i < child_count) {
+ priv->children[i] = trav->xlator;
+ trav = trav->next;
+ i++;
+ }
+
+ ret = gf_asprintf(&priv->sh_domain, AFR_SH_DATA_DOMAIN_FMT, this->name);
+ if (-1 == ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ priv->last_event = GF_CALLOC(child_count, sizeof(*priv->last_event),
+ gf_afr_mt_int32_t);
+ if (!priv->last_event) {
ret = -ENOMEM;
- priv->shd.pos = GF_CALLOC (sizeof (*priv->shd.pos), child_count,
- gf_afr_mt_brick_pos_t);
- if (!priv->shd.pos)
- goto out;
+ goto out;
+ }
- priv->shd.pending = GF_CALLOC (sizeof (*priv->shd.pending), child_count,
- gf_afr_mt_int32_t);
- if (!priv->shd.pending)
- goto out;
+ this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this);
+ if (!this->itable) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (priv->shd.iamshd) {
+ ret = afr_selfheal_daemon_init(this);
+ if (ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
- priv->shd.inprogress = GF_CALLOC (sizeof (*priv->shd.inprogress),
- child_count, gf_afr_mt_shd_bool_t);
- if (!priv->shd.inprogress)
- goto out;
- priv->shd.timer = GF_CALLOC (sizeof (*priv->shd.timer), child_count,
- gf_afr_mt_shd_timer_t);
- if (!priv->shd.timer)
- goto out;
+ /* keep more local here as we may need them for self-heal etc */
+ this->local_pool = mem_pool_new(afr_local_t, 512);
+ if (!this->local_pool) {
+ ret = -1;
+ goto out;
+ }
- priv->shd.healed = eh_new (AFR_EH_HEALED_LIMIT, _gf_false);
- if (!priv->shd.healed)
- goto out;
+ priv->root_inode = NULL;
- priv->shd.heal_failed = eh_new (AFR_EH_HEAL_FAIL_LIMIT, _gf_false);
- if (!priv->shd.heal_failed)
- goto out;
+ ret = 0;
+out:
+ return ret;
+}
+void
+afr_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
+{
+ int ret = -1;
+
+ if (!healer)
+ return;
+
+ if (healer->running) {
+ /*
+ * If there are any resources to cleanup, We need
+ * to do that gracefully using pthread_cleanup_push
+ */
+ ret = gf_thread_cleanup_xint(healer->thread);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SELF_HEAL_FAILED,
+ "Failed to clean up healer threads.");
+ healer->thread = 0;
+ }
+ pthread_cond_destroy(&healer->cond);
+ pthread_mutex_destroy(&healer->mutex);
+}
- priv->shd.split_brain = eh_new (AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false);
- if (!priv->shd.split_brain)
- goto out;
+void
+afr_selfheal_daemon_fini(xlator_t *this)
+{
+ struct subvol_healer *healer = NULL;
+ afr_self_heald_t *shd = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+ if (!priv)
+ return;
+
+ shd = &priv->shd;
+ if (!shd->iamshd)
+ return;
+
+ for (i = 0; i < priv->child_count; i++) {
+ healer = &shd->index_healers[i];
+ afr_destroy_healer_object(this, healer);
+
+ healer = &shd->full_healers[i];
+ afr_destroy_healer_object(this, healer);
+
+ if (shd->statistics[i])
+ eh_destroy(shd->statistics[i]);
+ }
+ GF_FREE(shd->index_healers);
+ GF_FREE(shd->full_healers);
+ GF_FREE(shd->statistics);
+ if (shd->split_brain)
+ eh_destroy(shd->split_brain);
+}
+void
+fini(xlator_t *this)
+{
+ afr_private_t *priv = NULL;
- this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this);
- if (!this->itable)
- goto out;
- priv->root_inode = inode_ref (this->itable->root);
- GF_OPTION_INIT ("node-uuid", priv->shd.node_uuid, str, out);
- GF_OPTION_INIT ("heal-timeout", priv->shd.timeout, int32, out);
+ priv = this->private;
- ret = 0;
-out:
- return ret;
-}
+ afr_selfheal_daemon_fini(this);
+ GF_ASSERT(list_empty(&priv->saved_locks));
+ LOCK(&priv->lock);
+ if (priv->timer != NULL) {
+ gf_timer_call_cancel(this->ctx, priv->timer);
+ priv->timer = NULL;
+ }
+ UNLOCK(&priv->lock);
-int
-fini (xlator_t *this)
-{
- afr_private_t *priv = NULL;
+ if (this->local_pool != NULL) {
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
- priv = this->private;
- this->private = NULL;
- afr_priv_destroy (priv);
- if (this->itable);//I dont see any destroy func
+ this->private = NULL;
+ afr_priv_destroy(priv);
+ if (this->itable) {
+ inode_table_destroy(this->itable);
+ this->itable = NULL;
+ }
- return 0;
+ return;
}
-
struct xlator_fops fops = {
- .lookup = afr_lookup,
- .open = afr_open,
- .lk = afr_lk,
- .flush = afr_flush,
- .statfs = afr_statfs,
- .fsync = afr_fsync,
- .fsyncdir = afr_fsyncdir,
- .xattrop = afr_xattrop,
- .fxattrop = afr_fxattrop,
- .inodelk = afr_inodelk,
- .finodelk = afr_finodelk,
- .entrylk = afr_entrylk,
- .fentrylk = afr_fentrylk,
-
- /* inode read */
- .access = afr_access,
- .stat = afr_stat,
- .fstat = afr_fstat,
- .readlink = afr_readlink,
- .getxattr = afr_getxattr,
- .fgetxattr = afr_fgetxattr,
- .readv = afr_readv,
-
- /* inode write */
- .writev = afr_writev,
- .truncate = afr_truncate,
- .ftruncate = afr_ftruncate,
- .setxattr = afr_setxattr,
- .fsetxattr = afr_fsetxattr,
- .setattr = afr_setattr,
- .fsetattr = afr_fsetattr,
- .removexattr = afr_removexattr,
- .fremovexattr = afr_fremovexattr,
-
- /* dir read */
- .opendir = afr_opendir,
- .readdir = afr_readdir,
- .readdirp = afr_readdirp,
-
- /* dir write */
- .create = afr_create,
- .mknod = afr_mknod,
- .mkdir = afr_mkdir,
- .unlink = afr_unlink,
- .rmdir = afr_rmdir,
- .link = afr_link,
- .symlink = afr_symlink,
- .rename = afr_rename,
+ .lookup = afr_lookup,
+ .lk = afr_lk,
+ .flush = afr_flush,
+ .statfs = afr_statfs,
+ .fsyncdir = afr_fsyncdir,
+ .inodelk = afr_inodelk,
+ .finodelk = afr_finodelk,
+ .entrylk = afr_entrylk,
+ .fentrylk = afr_fentrylk,
+ .ipc = afr_ipc,
+ .lease = afr_lease,
+
+ /* inode read */
+ .access = afr_access,
+ .stat = afr_stat,
+ .fstat = afr_fstat,
+ .readlink = afr_readlink,
+ .getxattr = afr_getxattr,
+ .fgetxattr = afr_fgetxattr,
+ .readv = afr_readv,
+ .seek = afr_seek,
+
+ /* inode write */
+ .writev = afr_writev,
+ .truncate = afr_truncate,
+ .ftruncate = afr_ftruncate,
+ .setxattr = afr_setxattr,
+ .fsetxattr = afr_fsetxattr,
+ .setattr = afr_setattr,
+ .fsetattr = afr_fsetattr,
+ .removexattr = afr_removexattr,
+ .fremovexattr = afr_fremovexattr,
+ .fallocate = afr_fallocate,
+ .discard = afr_discard,
+ .zerofill = afr_zerofill,
+ .xattrop = afr_xattrop,
+ .fxattrop = afr_fxattrop,
+ .fsync = afr_fsync,
+
+ /*inode open*/
+ .opendir = afr_opendir,
+ .open = afr_open,
+
+ /* dir read */
+ .readdir = afr_readdir,
+ .readdirp = afr_readdirp,
+
+ /* dir write */
+ .create = afr_create,
+ .mknod = afr_mknod,
+ .mkdir = afr_mkdir,
+ .unlink = afr_unlink,
+ .rmdir = afr_rmdir,
+ .link = afr_link,
+ .symlink = afr_symlink,
+ .rename = afr_rename,
};
-
struct xlator_dumpops dumpops = {
- .priv = afr_priv_dump,
+ .priv = afr_priv_dump,
};
-
struct xlator_cbks cbks = {
- .release = afr_release,
- .releasedir = afr_releasedir,
- .forget = afr_forget,
+ .release = afr_release,
+ .releasedir = afr_releasedir,
+ .forget = afr_forget,
};
-
struct volume_options options[] = {
- { .key = {"read-subvolume" },
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = {"read-subvolume-index" },
- .type = GF_OPTION_TYPE_INT,
- .default_value = "-1",
- },
- { .key = {"read-hash-mode" },
- .type = GF_OPTION_TYPE_INT,
- .min = 0,
- .max = 2,
- .default_value = "0",
- .description = "0 = first responder, "
- "1 = hash by GFID (all clients use same subvolume), "
- "2 = hash by GFID and client PID",
- },
- { .key = {"choose-local" },
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "true",
- .description = "Choose a local subvolume to read from if "
- "read-subvolume is not explicitly set.",
- },
- { .key = {"favorite-child"},
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = {"background-self-heal-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0,
- .default_value = "16",
- .validate = GF_OPT_VALIDATE_MIN,
- },
- { .key = {"data-self-heal"},
- .type = GF_OPTION_TYPE_STR,
- .value = {"1", "on", "yes", "true", "enable",
- "0", "off", "no", "false", "disable",
- "open"},
- .default_value = "on",
- .description = "\"open\" means data self-heal action will"
- "only be triggered by file open operations."
- },
- { .key = {"data-self-heal-algorithm"},
- .type = GF_OPTION_TYPE_STR,
- .default_value = "",
- .description = "Select between \"full\", \"diff\". The "
- "\"full\" algorithm copies the entire file from "
- "source to sink. The \"diff\" algorithm copies to "
- "sink only those blocks whose checksums don't match "
- "with those of source.",
- .value = { "diff", "full", "" }
- },
- { .key = {"data-self-heal-window-size"},
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .max = 1024,
- .default_value = "1",
- .description = "Maximum number blocks per file for which self-heal "
- "process would be applied simultaneously."
- },
- { .key = {"metadata-self-heal"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- },
- { .key = {"entry-self-heal"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- },
- { .key = {"data-change-log"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- },
- { .key = {"metadata-change-log"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- },
- { .key = {"entry-change-log"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- },
- { .key = {"optimistic-change-log"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- },
- { .key = {"strict-readdir"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"inodelk-trace"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"entrylk-trace"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"eager-lock"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- },
- { .key = {"self-heal-daemon"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"iam-self-heal-daemon"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"quorum-type"},
- .type = GF_OPTION_TYPE_STR,
- .value = { "none", "auto", "fixed", "" },
- .default_value = "none",
- .description = "If value is \"fixed\" only allow writes if "
- "quorum-count bricks are present. If value is "
- "\"auto\" only allow writes if more than half of "
- "bricks, or exactly half including the first, are "
- "present.",
- },
- { .key = {"quorum-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .max = INT_MAX,
- .default_value = 0,
- .description = "If quorum-type is \"fixed\" only allow writes if "
- "this many bricks or present. Other quorum types "
- "will OVERWRITE this value.",
- },
- { .key = {"node-uuid"},
- .type = GF_OPTION_TYPE_STR,
- .description = "Local glusterd uuid string",
- },
- { .key = {"heal-timeout"},
- .type = GF_OPTION_TYPE_INT,
- .min = 60,
- .max = INT_MAX,
- .default_value = "600",
- .description = "Poll timeout for checking the need to self-heal"
- },
- { .key = {"post-op-delay-secs"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0,
- .max = INT_MAX,
- .default_value = "1",
- .description = "Time interval induced artificially before "
- "post-operation phase of the transaction to "
- "enhance overlap of adjacent write operations.",
- },
- { .key = {AFR_SH_READDIR_SIZE_KEY},
- .type = GF_OPTION_TYPE_SIZET,
- .description = "readdirp size for performing entry self-heal",
- .min = 1024,
- .max = 131072,
- .default_value = "1KB",
- },
- { .key = {NULL} },
+ {.key = {"read-subvolume"},
+ .type = GF_OPTION_TYPE_XLATOR,
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. Afr will prefer the one specified using "
+ "this option if it is not stale. Option value must be "
+ "one of the xlator names of the children. "
+ "Ex: <volname>-client-0 till "
+ "<volname>-client-<number-of-bricks - 1>"},
+ {.key = {"read-subvolume-index"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "-1",
+ .op_version = {2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. AFR will prefer the one specified using "
+ "this option if it is not stale. allowed options"
+ " include -1 till replica-count - 1"},
+ {.key = {"read-hash-mode"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 5,
+ .default_value = "1",
+ .op_version = {2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description =
+ "inode-read fops happen only on one of the bricks in "
+ "replicate. AFR will prefer the one computed using "
+ "the method specified using this option.\n"
+ "0 = first readable child of AFR, starting from 1st child.\n"
+ "1 = hash by GFID of file (all clients use "
+ "same subvolume).\n"
+ "2 = hash by GFID of file and client PID.\n"
+ "3 = brick having the least outstanding read requests.\n"
+ "4 = brick having the least network ping latency.\n"
+ "5 = Hybrid mode between 3 and 4, ie least value among "
+ "network-latency multiplied by outstanding-read-requests."},
+ {
+ .key = {"choose-local"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .op_version = {2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Choose a local subvolume (i.e. Brick) to read from"
+ " if read-subvolume is not explicitly set.",
+ },
+ {.key = {"background-self-heal-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 256,
+ .default_value = "8",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This specifies the number of per client self-heal "
+ "jobs that can perform parallel heals in the "
+ "background."},
+ {.key = {"halo-shd-max-latency"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "99999",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate", "halo"},
+ .description = "Maximum latency for shd halo replication in msec."},
+ {.key = {"halo-enabled"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "False",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate", "halo"},
+ .description = "Enable Halo (geo) replication mode."},
+ {.key = {"halo-nfsd-max-latency"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "5",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate", "halo"},
+ .description = "Maximum latency for nfsd halo replication in msec."},
+ {.key = {"halo-max-latency"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = AFR_HALO_MAX_LATENCY,
+ .default_value = "5",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate", "halo"},
+ .description = "Maximum latency for halo replication in msec."},
+ {.key = {"halo-max-replicas"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "99999",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate", "halo"},
+ .description = "The maximum number of halo replicas; replicas"
+ " beyond this value will be written asynchronously"
+ "via the SHD."},
+ {.key = {"halo-min-replicas"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "2",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate", "halo"},
+ .description = "The minimmum number of halo replicas, before adding "
+ "out of region replicas."},
+ {.key = {"heal-wait-queue-length"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 10000, /*Around 100MB with sizeof(afr_local_t)= 10496 bytes*/
+ .default_value = "128",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .op_version = {GD_OP_VERSION_3_7_10},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This specifies the number of heals that can be queued"
+ " for the parallel background self heal jobs."},
+ {.key = {"data-self-heal"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"1", "on", "yes", "true", "enable", "0", "off", "no", "false",
+ "disable", "open"},
+ .default_value = "off",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Using this option we can enable/disable data "
+ "self-heal on the file. \"open\" means data "
+ "self-heal action will only be triggered by file "
+ "open operations."},
+ {.key = {"data-self-heal-algorithm"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Select between \"full\", \"diff\". The "
+ "\"full\" algorithm copies the entire file from "
+ "source to sink. The \"diff\" algorithm copies to "
+ "sink only those blocks whose checksums don't match "
+ "with those of source. If no option is configured "
+ "the option is chosen dynamically as follows: "
+ "If the file does not exist on one of the sinks "
+ "or empty file exists or if the source file size is "
+ "about the same as page size the entire file will "
+ "be read and written i.e \"full\" algo, "
+ "otherwise \"diff\" algo is chosen.",
+ .value = {"diff", "full"}},
+ {.key = {"data-self-heal-window-size"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 1024,
+ .default_value = "1",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Maximum number blocks per file for which self-heal "
+ "process would be applied simultaneously."},
+ {.key = {"metadata-self-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ /*.validate_fn = validate_replica*/
+ .description = "Using this option we can enable/disable metadata "
+ "i.e. Permissions, ownerships, xattrs self-heal on "
+ "the file/directory."},
+ {.key = {"entry-self-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ /*.validate_fn = validate_replica*/
+ .description = "Using this option we can enable/disable entry "
+ "self-heal on the directory."},
+ {.key = {"data-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option exists only for backward compatibility "
+ "and configuring it doesn't have any effect"},
+ {.key = {"metadata-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option exists only for backward compatibility "
+ "and configuring it doesn't have any effect"},
+ {.key = {"entry-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option exists only for backward compatibility "
+ "and configuring it doesn't have any effect"},
+ {.key = {"optimistic-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Entry/Metadata fops will not perform "
+ "pre fop changelog operations in afr transaction "
+ "if this option is enabled."},
+ {.key = {"inodelk-trace"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enabling this option logs inode lock/unlocks"},
+ {.key = {"entrylk-trace"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enabling this option logs entry lock/unlocks"},
+ {.key = {"pre-op-compat"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Use separate pre-op xattrop() FOP rather than "
+ "overloading xdata of the OP"},
+ {.key = {"eager-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description =
+ "Enable/Disable eager lock for replica volume. "
+ "Lock phase of a transaction has two sub-phases. "
+ "First is an attempt to acquire locks in parallel by "
+ "broadcasting non-blocking lock requests. If lock "
+ "acquisition fails on any server, then the held locks "
+ "are unlocked and we revert to a blocking locks mode "
+ "sequentially on one server after another. If this "
+ "option is enabled the initial broadcasting lock "
+ "request attempts to acquire a full lock on the entire file. "
+ "If this fails, we revert back to the sequential "
+ "\"regional\" blocking locks as before. In the case "
+ "where such an \"eager\" lock is granted in the "
+ "non-blocking phase, it gives rise to an opportunity "
+ "for optimization. i.e, if the next write transaction "
+ "on the same FD arrives before the unlock phase of "
+ "the first transaction, it \"takes over\" the full "
+ "file lock. Similarly if yet another data transaction "
+ "arrives before the unlock phase of the \"optimized\" "
+ "transaction, that in turn \"takes over\" the lock as "
+ "well. The actual unlock now happens at the end of "
+ "the last \"optimized\" transaction."
+
+ },
+ {.key = {"self-heal-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ /*.validate_fn = validate_replica_heal_enable_disable*/
+ .description = "This option applies to only self-heal-daemon. "
+ "Index directory crawl and automatic healing of files "
+ "will not be performed if this option is turned off."},
+ {.key = {"iam-self-heal-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option differentiates if the replicate "
+ "translator is running as part of self-heal-daemon "
+ "or not."},
+ {.key = {"iam-nfs-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option differentiates if the replicate "
+ "translator is running as part of an NFS daemon "
+ "or not."},
+ {
+ .key = {"quorum-type"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"none", "auto", "fixed"},
+ .default_value = "none",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ /*.option = quorum-type*/
+ .description = "If value is \"fixed\" only allow writes if "
+ "quorum-count bricks are present. If value is "
+ "\"auto\" only allow writes if more than half of "
+ "bricks, or exactly half including the first, are "
+ "present.",
+ },
+ {
+ .key = {"quorum-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = INT_MAX,
+ .default_value = 0,
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ /*.option = quorum-count*/
+ /*.validate_fn = validate_quorum_count*/
+ .description = "If quorum-type is \"fixed\" only allow writes if "
+ "this many bricks are present. Other quorum types "
+ "will OVERWRITE this value.",
+ },
+ {
+ .key = {"quorum-reads"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option has been removed. Reads are not allowed "
+ "if quorum is not met.",
+ },
+ {
+ .key = {"node-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Local glusterd uuid string, used in starting "
+ "self-heal-daemon so that it can crawl only on "
+ "local index directories.",
+ },
+ {
+ .key = {"post-op-delay-secs"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = INT_MAX,
+ .default_value = "1",
+ .op_version = {2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Time interval induced artificially before "
+ "post-operation phase of the transaction to "
+ "enhance overlap of adjacent write operations.",
+ },
+ {
+ .key = {AFR_SH_READDIR_SIZE_KEY},
+ .type = GF_OPTION_TYPE_SIZET,
+ .description = "readdirp size for performing entry self-heal",
+ .min = 1024,
+ .max = 131072,
+ .op_version = {2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ .default_value = "1KB",
+ },
+ {
+ .key = {"ensure-durability"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .op_version = {3},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Afr performs fsyncs for transactions if this "
+ "option is on to make sure the changelogs/data is "
+ "written to the disk",
+ .default_value = "on",
+ },
+ {
+ .key = {"afr-dirty-xattr"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = AFR_DIRTY_DEFAULT,
+ },
+ {.key = {"afr-pending-xattr"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Comma separated list of xattrs that are used to "
+ "capture information on pending heals."},
+ {
+ .key = {"metadata-splitbrain-forced-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+ {.key = {"heal-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 5,
+ .max = INT_MAX,
+ .default_value = "600",
+ .op_version = {2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "time interval for checking the need to self-heal "
+ "in self-heal-daemon"},
+ {
+ .key = {"consistent-metadata"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "If this option is enabled, readdirp will force "
+ "lookups on those entries read whose read child is "
+ "not the same as that of the parent. This will "
+ "guarantee that all read operations on a file serve "
+ "attributes from the same subvol as long as it holds "
+ " a good copy of the file/dir.",
+ },
+ {.key = {"arbiter-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .description = "subset of child_count. Has to be 0 or 1."},
+ {
+ .key = {"thin-arbiter"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ .description = "contains host:path of thin abriter brick",
+ },
+ {.key = {"shd-max-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 64,
+ .default_value = "1",
+ .op_version = {GD_OP_VERSION_3_7_12},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Maximum number of parallel heals SHD can do per "
+ "local brick. This can substantially lower heal times"
+ ", but can also crush your bricks if you don't have "
+ "the storage hardware to support this."},
+ {
+ .key = {"shd-wait-qlength"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 655536,
+ .default_value = "1024",
+ .op_version = {GD_OP_VERSION_3_7_12},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option can be used to control number of heals"
+ " that can wait in SHD per subvolume",
+ },
+ {
+ .key = {"locking-scheme"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"full", "granular"},
+ .default_value = "full",
+ .op_version = {GD_OP_VERSION_3_7_12},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "If this option is set to granular, self-heal will "
+ "stop being compatible with afr-v1, which helps afr "
+ "be more granular while self-healing",
+ },
+ {.key = {"full-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "yes",
+ .op_version = {GD_OP_VERSION_3_13_2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ .description = "If this option is disabled, then the IOs will take "
+ "range locks same as versions till 3.13.1."},
+ {
+ .key = {"granular-entry-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_3_8_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "If this option is enabled, self-heal will resort to "
+ "granular way of recording changelogs and doing entry "
+ "self-heal.",
+ },
+ {
+ .key = {"favorite-child-policy"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"none", "size", "ctime", "mtime", "majority"},
+ .default_value = "none",
+ .op_version = {GD_OP_VERSION_3_7_12},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option can be used to automatically resolve "
+ "split-brains using various policies without user "
+ "intervention. \"size\" picks the file with the "
+ "biggest size as the source. \"ctime\" and \"mtime\" "
+ "pick the file with the latest ctime and mtime "
+ "respectively as the source. \"majority\" picks a file"
+ " with identical mtime and size in more than half the "
+ "number of bricks in the replica.",
+ },
+ {
+ .key = {"consistent-io"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .description = "If this option is enabled, i/o will fail even if "
+ "one of the bricks is down in the replicas",
+ },
+ {.key = {"use-compound-fops"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_3_8_4},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option exists only for backward compatibility "
+ "and configuring it doesn't have any effect"},
+ {.key = {"use-anonymous-inode"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_8_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ .description = "Setting this option heals directory renames efficiently"},
+
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "replicate",
+ .category = GF_MAINTAINED,
};