summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
Diffstat (limited to 'xlators')
-rw-r--r--xlators/cluster/afr/src/afr-common.c364
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h3
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h1
-rw-r--r--xlators/cluster/afr/src/afr.c98
-rw-r--r--xlators/cluster/afr/src/afr.h14
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-rpc.c1
-rw-r--r--xlators/features/changelog/src/changelog-ev-handle.c2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c22
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c33
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in24
-rw-r--r--xlators/nfs/server/src/exports.h2
-rw-r--r--xlators/nfs/server/src/mount3.c11
-rw-r--r--xlators/nfs/server/src/nfs.c24
-rw-r--r--xlators/protocol/client/src/client-handshake.c2
-rw-r--r--xlators/protocol/client/src/client.c98
16 files changed, 601 insertions, 108 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index ac834e90f4b..17943d7baae 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -2531,7 +2531,6 @@ unwind:
return 0;
}
-
int
afr_lookup_entry_heal (call_frame_t *frame, xlator_t *this)
{
@@ -3227,7 +3226,7 @@ afr_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
if (call_count == 0)
AFR_STACK_UNWIND (flush, frame, local->op_ret,
@@ -4655,20 +4654,292 @@ __get_heard_from_all_status (xlator_t *this)
return heard_from_all;
}
+static int
+find_best_down_child (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = -1;
+ int32_t best_child = -1;
+ int64_t best_latency = INT64_MAX;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->child_up[i] &&
+ priv->child_latency[i] >= 0 &&
+ priv->child_latency[i] < best_latency) {
+ best_child = i;
+ best_latency = priv->child_latency[i];
+ }
+ }
+ if (best_child >= 0) {
+ gf_msg_debug (this->name, 0, "Found best down child (%d) "
+ "@ %ld ms latency", best_child, best_latency);
+ }
+ return best_child;
+}
+
+int
+find_worst_up_child (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = -1;
+ int32_t worst_child = -1;
+ int64_t worst_latency = INT64_MIN;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->child_up[i] &&
+ priv->child_latency[i] >= 0 &&
+ priv->child_latency[i] > worst_latency) {
+ worst_child = i;
+ worst_latency = priv->child_latency[i];
+ }
+ }
+ if (worst_child >= 0) {
+ gf_msg_debug (this->name, 0, "Found worst up child (%d)"
+ " @ %ld ms latency", worst_child, worst_latency);
+ }
+ return worst_child;
+}
+
+void
+__afr_handle_ping_event (xlator_t *this, xlator_t *child_xlator,
+ const int idx, int64_t halo_max_latency_msec, int32_t *event,
+ int64_t child_latency_msec)
+{
+ afr_private_t *priv = NULL;
+ int up_children = 0;
+
+ priv = this->private;
+
+ priv->child_latency[idx] = child_latency_msec;
+ gf_msg_debug (child_xlator->name, 0, "Client ping @ %ld ms",
+ child_latency_msec);
+
+ up_children = __afr_get_up_children_count (priv);
+
+ if (child_latency_msec > halo_max_latency_msec &&
+ priv->child_up[idx] == 1 &&
+ up_children > priv->halo_min_replicas) {
+ if ((up_children - 1) <
+ priv->halo_min_replicas) {
+ gf_log (child_xlator->name, GF_LOG_INFO,
+ "Overriding halo threshold, "
+ "min replicas: %d",
+ priv->halo_min_replicas);
+ } else {
+ gf_log (child_xlator->name, GF_LOG_INFO,
+ "Child latency (%ld ms) "
+ "exceeds halo threshold (%ld), "
+ "marking child down.",
+ child_latency_msec,
+ halo_max_latency_msec);
+ *event = GF_EVENT_CHILD_DOWN;
+ }
+ } else if (child_latency_msec < halo_max_latency_msec &&
+ priv->child_up[idx] == 0) {
+ if (up_children < priv->halo_max_replicas) {
+ gf_log (child_xlator->name, GF_LOG_INFO,
+ "Child latency (%ld ms) "
+ "below halo threshold (%ld), "
+ "marking child up.",
+ child_latency_msec,
+ halo_max_latency_msec);
+ *event = GF_EVENT_CHILD_UP;
+ } else {
+ gf_log (child_xlator->name, GF_LOG_INFO,
+ "Not marking child %d up, "
+ "max replicas (%d) reached.", idx,
+ priv->halo_max_replicas);
+ }
+ }
+}
+
+void
+__afr_handle_child_up_event (xlator_t *this, xlator_t *child_xlator,
+ const int idx, int64_t halo_max_latency_msec,
+ int32_t *event, int32_t *call_psh, int32_t *up_child)
+{
+ afr_private_t *priv = NULL;
+ int up_children = 0;
+ int worst_up_child = -1;
+
+ priv = this->private;
+
+ /*
+ * This only really counts if the child was never up
+ * (value = -1) or had been down (value = 0). See
+ * comment at GF_EVENT_CHILD_DOWN for a more detailed
+ * explanation.
+ */
+ if (priv->child_up[idx] != 1) {
+ priv->event_generation++;
+ }
+ priv->child_up[idx] = 1;
+
+ *call_psh = 1;
+ *up_child = idx;
+ up_children = __afr_get_up_children_count (priv);
+
+ /*
+ * Handle the edge case where we exceed
+ * halo_min_replicas and we've got a child which is
+ * marked up as it was helping to satisfy the
+ * halo_min_replicas even though it's latency exceeds
+ * halo_max_latency_msec.
+ */
+ if (up_children > priv->halo_min_replicas) {
+ worst_up_child = find_worst_up_child (this);
+ if (worst_up_child >= 0 &&
+ priv->child_latency[worst_up_child] >
+ halo_max_latency_msec) {
+ gf_msg_debug (this->name, 0, "Marking child %d down, "
+ "doesn't meet halo threshold (%ld), and > "
+ "halo_min_replicas (%d)",
+ worst_up_child, halo_max_latency_msec,
+ priv->halo_min_replicas);
+ priv->child_up[worst_up_child] = 0;
+ up_children--;
+ }
+ }
+ if (up_children > priv->halo_max_replicas &&
+ !priv->shd.iamshd) {
+ worst_up_child = find_worst_up_child (this);
+ if (worst_up_child < 0) {
+ worst_up_child = idx;
+ }
+ priv->child_up[worst_up_child] = 0;
+ up_children--;
+ gf_msg_debug (this->name, 0, "Marking child %d down, "
+ "up_children (%d) > halo_max_replicas (%d)",
+ worst_up_child, up_children, priv->halo_max_replicas);
+ }
+
+ if (up_children == 1) {
+ gf_msg (this->name, GF_LOG_INFO, 0, AFR_MSG_SUBVOL_UP,
+ "Subvolume '%s' came back up; "
+ "going online.",
+ child_xlator->name);
+ } else {
+ *event = GF_EVENT_SOME_DESCENDENT_UP;
+ }
+
+ priv->last_event[idx] = *event;
+}
+
+void
+__afr_handle_child_down_event (xlator_t *this, xlator_t *child_xlator,
+ int idx, int64_t child_latency_msec, int32_t *event,
+ int32_t *call_psh, int32_t *up_child)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int up_children = 0;
+ int down_children = 0;
+ int best_down_child = -1;
+
+ priv = this->private;
+
+ /*
+ * If a brick is down when we start, we'll get a
+ * CHILD_DOWN to indicate its initial state. There
+ * was never a CHILD_UP in this case, so if we
+ * increment "down_count" the difference between than
+ * and "up_count" will no longer be the number of
+ * children that are currently up. This has serious
+ * implications e.g. for quorum enforcement, so we
+ * don't increment these values unless the event
+ * represents an actual state transition between "up"
+ * (value = 1) and anything else.
+ */
+ if (priv->child_up[idx] == 1) {
+ priv->event_generation++;
+ }
+
+ /*
+ * If this is an _actual_ CHILD_DOWN event, we
+ * want to set the child_latency to < 0 to indicate
+ * the child is really disconnected.
+ */
+ if (child_latency_msec < 0) {
+ priv->child_latency[idx] = child_latency_msec;
+ }
+ priv->child_up[idx] = 0;
+
+ up_children = __afr_get_up_children_count (priv);
+ /*
+ * Handle the edge case where we need to find the
+ * next best child (to mark up) as marking this child
+ * down would cause us to fall below halo_min_replicas.
+ * We will also force the SHD to heal this child _now_
+ * as we want it to be up to date if we are going to
+ * begin using it synchronously.
+ */
+ if (up_children < priv->halo_min_replicas) {
+ best_down_child = find_best_down_child (this);
+ if (best_down_child >= 0) {
+ gf_msg_debug (this->name, 0,
+ "Swapping out child %d for "
+ "child %d to satisfy halo_min_replicas (%d).",
+ idx, best_down_child, priv->halo_min_replicas);
+ priv->child_up[best_down_child] = 1;
+ *call_psh = 1;
+ *up_child = best_down_child;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == 0)
+ down_children++;
+ if (down_children == priv->child_count) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_SUBVOLS_DOWN,
+ "All subvolumes are down. Going "
+ "offline until atleast one of them "
+ "comes back up.");
+ } else {
+ *event = GF_EVENT_SOME_DESCENDENT_DOWN;
+ }
+ priv->last_event[idx] = *event;
+}
+
+static int64_t
+afr_get_halo_latency (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int64_t halo_max_latency_msec = 0;
+
+ priv = this->private;
+
+ if (priv->shd.iamshd) {
+ halo_max_latency_msec = priv->shd.halo_max_latency_msec;
+ } else if (priv->nfsd.iamnfsd) {
+ halo_max_latency_msec =
+ priv->nfsd.halo_max_latency_msec;
+ } else {
+ halo_max_latency_msec = priv->halo_max_latency_msec;
+ }
+ gf_msg_debug (this->name, 0, "Using halo latency %ld",
+ halo_max_latency_msec);
+ return halo_max_latency_msec;
+}
+
+
int32_t
afr_notify (xlator_t *this, int32_t event,
void *data, void *data2)
{
afr_private_t *priv = NULL;
+ xlator_t *child_xlator = NULL;
int i = -1;
- int up_children = 0;
- int down_children = 0;
int propagate = 0;
int had_heard_from_all = 0;
int have_heard_from_all = 0;
int idx = -1;
int ret = -1;
int call_psh = 0;
+ int up_child = -1;
dict_t *input = NULL;
dict_t *output = NULL;
gf_boolean_t had_quorum = _gf_false;
@@ -4677,6 +4948,10 @@ afr_notify (xlator_t *this, int32_t event,
struct gf_upcall_cache_invalidation *up_ci = NULL;
inode_table_t *itable = NULL;
inode_t *inode = NULL;
+ int64_t halo_max_latency_msec = 0;
+ int64_t child_latency_msec = -1;
+
+ child_xlator = (xlator_t *)data;
priv = this->private;
@@ -4701,7 +4976,7 @@ afr_notify (xlator_t *this, int32_t event,
* subsequent revalidate lookup happens on all the dht's subvolumes
* which triggers afr self-heals if any.
*/
- idx = find_child_index (this, data);
+ idx = find_child_index (this, child_xlator);
if (idx < 0) {
gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_CHILD_UP,
"Received child_up from invalid subvolume");
@@ -4710,6 +4985,30 @@ afr_notify (xlator_t *this, int32_t event,
had_quorum = priv->quorum_count && afr_has_quorum (priv->child_up,
this);
+ if (priv->halo_enabled) {
+ halo_max_latency_msec = afr_get_halo_latency (this);
+
+ if (event == GF_EVENT_CHILD_PING) {
+ /* Calculates the child latency and sets event
+ */
+ child_latency_msec = (int64_t)(uintptr_t)data2;
+ LOCK (&priv->lock);
+ {
+ __afr_handle_ping_event (this, child_xlator,
+ idx, halo_max_latency_msec, &event,
+ child_latency_msec);
+ }
+ UNLOCK (&priv->lock);
+ }
+ }
+
+ if (event == GF_EVENT_CHILD_PING) {
+ /* This is the only xlator that handles PING, no reason to
+ * propagate.
+ */
+ goto out;
+ }
+
if (event == GF_EVENT_TRANSLATOR_OP) {
LOCK (&priv->lock);
{
@@ -4736,57 +5035,15 @@ afr_notify (xlator_t *this, int32_t event,
propagate = 1;
break;
case GF_EVENT_CHILD_UP:
- /*
- * This only really counts if the child was never up
- * (value = -1) or had been down (value = 0). See
- * comment at GF_EVENT_CHILD_DOWN for a more detailed
- * explanation.
- */
- if (priv->child_up[idx] != 1) {
- priv->event_generation++;
- }
- priv->child_up[idx] = 1;
-
- call_psh = 1;
- up_children = __afr_get_up_children_count (priv);
- if (up_children == 1) {
- gf_msg (this->name, GF_LOG_INFO, 0,
- AFR_MSG_SUBVOL_UP,
- "Subvolume '%s' came back up; "
- "going online.", ((xlator_t *)data)->name);
- gf_event (EVENT_AFR_SUBVOL_UP,
- "subvol=%s", this->name);
-
- } else {
- event = GF_EVENT_SOME_DESCENDENT_UP;
- }
-
- priv->last_event[idx] = event;
-
+ __afr_handle_child_up_event (this, child_xlator,
+ idx, halo_max_latency_msec, &event, &call_psh,
+ &up_child);
break;
case GF_EVENT_CHILD_DOWN:
- if (priv->child_up[idx] == 1) {
- priv->event_generation++;
- }
- priv->child_up[idx] = 0;
-
- for (i = 0; i < priv->child_count; i++)
- if (priv->child_up[i] == 0)
- down_children++;
- if (down_children == priv->child_count) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- AFR_MSG_SUBVOLS_DOWN,
- "All subvolumes are down. Going offline "
- "until atleast one of them comes back up.");
- gf_event (EVENT_AFR_SUBVOLS_DOWN,
- "subvol=%s", this->name);
- } else {
- event = GF_EVENT_SOME_DESCENDENT_DOWN;
- }
-
- priv->last_event[idx] = event;
-
+ __afr_handle_child_down_event (this, child_xlator, idx,
+ child_latency_msec, &event, &call_psh,
+ &up_child);
break;
case GF_EVENT_CHILD_CONNECTING:
@@ -4839,7 +5096,6 @@ afr_notify (xlator_t *this, int32_t event,
had come up, propagate CHILD_UP, but only this time
*/
event = GF_EVENT_CHILD_DOWN;
- up_children = __afr_get_up_children_count (priv);
for (i = 0; i < priv->child_count; i++) {
if (priv->last_event[i] == GF_EVENT_CHILD_UP) {
event = GF_EVENT_CHILD_UP;
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index 7f7962013d7..c7d6261b110 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -46,7 +46,8 @@ enum gf_afr_mem_types_ {
gf_afr_mt_spbc_timeout_t,
gf_afr_mt_spb_status_t,
gf_afr_mt_empty_brick_t,
- gf_afr_mt_end
+ gf_afr_mt_child_latency_t,
+ gf_afr_mt_end
};
#endif
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
index c6ac5ebfd1b..4ac1d32f58a 100644
--- a/xlators/cluster/afr/src/afr-self-heald.h
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -58,6 +58,7 @@ typedef struct {
eh_t **statistics;
uint32_t max_threads;
uint32_t wait_qlength;
+ uint32_t halo_max_latency_msec;
} afr_self_heald_t;
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index ceaa034dbbb..17b34822c17 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -184,6 +184,27 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("data-self-heal-algorithm",
priv->data_self_heal_algorithm, options, str, out);
+ GF_OPTION_RECONF ("halo-enabled",
+ priv->halo_enabled, options, bool,
+ out);
+
+ GF_OPTION_RECONF ("halo-shd-max-latency",
+ priv->shd.halo_max_latency_msec, options, uint32,
+ out);
+
+ GF_OPTION_RECONF ("halo-nfsd-max-latency",
+ priv->nfsd.halo_max_latency_msec, options, uint32,
+ out);
+
+ GF_OPTION_RECONF ("halo-max-latency", priv->halo_max_latency_msec,
+ options, uint32, out);
+
+ GF_OPTION_RECONF ("halo-max-replicas", priv->halo_max_replicas, options,
+ uint32, out);
+
+ GF_OPTION_RECONF ("halo-min-replicas", priv->halo_min_replicas, options,
+ uint32, out);
+
GF_OPTION_RECONF ("read-subvolume", read_subvol, options, xlator, out);
GF_OPTION_RECONF ("read-hash-mode", priv->hash_mode,
@@ -473,6 +494,24 @@ init (xlator_t *this)
GF_OPTION_INIT ("entry-self-heal", priv->entry_self_heal, bool, out);
+ GF_OPTION_INIT ("halo-shd-max-latency", priv->shd.halo_max_latency_msec,
+ uint32, out);
+
+ GF_OPTION_INIT ("halo-max-latency", priv->halo_max_latency_msec,
+ uint32, out);
+ GF_OPTION_INIT ("halo-max-replicas", priv->halo_max_replicas, uint32,
+ out);
+ GF_OPTION_INIT ("halo-min-replicas", priv->halo_min_replicas, uint32,
+ out);
+
+ GF_OPTION_INIT ("halo-enabled",
+ priv->halo_enabled, bool, out);
+
+ GF_OPTION_INIT ("halo-nfsd-max-latency",
+ priv->nfsd.halo_max_latency_msec, uint32, out);
+
+ GF_OPTION_INIT ("iam-nfs-daemon", priv->nfsd.iamnfsd, bool, out);
+
GF_OPTION_INIT ("data-change-log", priv->data_change_log, bool, out);
GF_OPTION_INIT ("metadata-change-log", priv->metadata_change_log, bool,
@@ -528,7 +567,12 @@ init (xlator_t *this)
priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
gf_afr_mt_char);
- if (!priv->child_up) {
+
+ priv->child_latency = GF_CALLOC (sizeof (*priv->child_latency),
+ child_count,
+ gf_afr_mt_child_latency_t);
+
+ if (!priv->child_up || !priv->child_latency) {
ret = -ENOMEM;
goto out;
}
@@ -736,7 +780,50 @@ struct volume_options options[] = {
"jobs that can perform parallel heals in the "
"background."
},
- { .key = {"heal-wait-queue-length"},
+ { .key = {"halo-shd-max-latency"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "99999",
+ .description = "Maximum latency for shd halo replication in msec."
+ },
+ { .key = {"halo-enabled"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "False",
+ .description = "Enable Halo (geo) replication mode."
+ },
+ { .key = {"halo-nfsd-max-latency"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "5",
+ .description = "Maximum latency for nfsd halo replication in msec."
+ },
+ { .key = {"halo-max-latency"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "5",
+ .description = "Maximum latency for halo replication in msec."
+ },
+ { .key = {"halo-max-replicas"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "99999",
+ .description = "The maximum number of halo replicas; replicas"
+ " beyond this value will be written asynchronously"
+ "via the SHD."
+ },
+ { .key = {"halo-min-replicas"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "2",
+ .description = "The minimmum number of halo replicas, before adding "
+ "out of region replicas."
+ },
+ { .key = {"heal-wait-queue-length"},
.type = GF_OPTION_TYPE_INT,
.min = 0,
.max = 10000, /*Around 100MB with sizeof(afr_local_t)= 10496 bytes*/
@@ -876,6 +963,13 @@ struct volume_options options[] = {
"translator is running as part of self-heal-daemon "
"or not."
},
+ { .key = {"iam-nfs-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option differentiates if the replicate "
+ "translator is running as part of an NFS daemon "
+ "or not."
+ },
{ .key = {"quorum-type"},
.type = GF_OPTION_TYPE_STR,
.value = { "none", "auto", "fixed"},
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 0535e7c7271..3be15175dc7 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -74,6 +74,11 @@ typedef enum {
AFR_FAV_CHILD_POLICY_MAX,
} afr_favorite_child_policy;
+struct afr_nfsd {
+ gf_boolean_t iamnfsd;
+ uint32_t halo_max_latency_msec;
+};
+
typedef struct _afr_private {
gf_lock_t lock; /* to guard access to child_count, etc */
unsigned int child_count; /* total number of children */
@@ -85,6 +90,7 @@ typedef struct _afr_private {
inode_t *root_inode;
unsigned char *child_up;
+ int64_t *child_latency;
unsigned char *local;
char **pending_key;
@@ -155,8 +161,14 @@ typedef struct _afr_private {
gf_boolean_t ensure_durability;
char *sh_domain;
char *afr_dirty;
+ gf_boolean_t halo_enabled;
+
+ uint32_t halo_max_latency_msec;
+ uint32_t halo_max_replicas;
+ uint32_t halo_min_replicas;
- afr_self_heald_t shd;
+ afr_self_heald_t shd;
+ struct afr_nfsd nfsd;
gf_boolean_t consistent_metadata;
uint64_t spb_choice_timeout;
diff --git a/xlators/features/changelog/lib/src/gf-changelog-rpc.c b/xlators/features/changelog/lib/src/gf-changelog-rpc.c
index b9339a770d1..c1139423d6d 100644
--- a/xlators/features/changelog/lib/src/gf-changelog-rpc.c
+++ b/xlators/features/changelog/lib/src/gf-changelog-rpc.c
@@ -26,6 +26,7 @@ gf_changelog_rpc_notify (struct rpc_clnt *rpc,
case RPC_CLNT_DISCONNECT:
case RPC_CLNT_MSG:
case RPC_CLNT_DESTROY:
+ case RPC_CLNT_PING:
break;
}
diff --git a/xlators/features/changelog/src/changelog-ev-handle.c b/xlators/features/changelog/src/changelog-ev-handle.c
index cada369ba0f..caa5bbbadcd 100644
--- a/xlators/features/changelog/src/changelog-ev-handle.c
+++ b/xlators/features/changelog/src/changelog-ev-handle.c
@@ -179,6 +179,8 @@ changelog_rpc_notify (struct rpc_clnt *rpc,
/* Free up mydata */
changelog_rpc_clnt_unref (crpc);
break;
+ case RPC_CLNT_PING:
+ break;
}
return 0;
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 77b7d55fa29..f0713e6e64a 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -5038,6 +5038,24 @@ out:
return ret;
}
+static int
+volgen_graph_set_iam_nfsd (const volgen_graph_t *graph)
+{
+ xlator_t *trav;
+ int ret = 0;
+
+ for (trav = first_of ((volgen_graph_t *)graph); trav;
+ trav = trav->next) {
+ if (strcmp (trav->type, "cluster/replicate") != 0)
+ continue;
+
+ ret = xlator_set_option (trav, "iam-nfs-daemon", "yes");
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
/* builds a graph for nfs server role, with option overrides in mod_dict */
int
build_nfs_graph (volgen_graph_t *graph, dict_t *mod_dict)
@@ -5176,6 +5194,10 @@ build_nfs_graph (volgen_graph_t *graph, dict_t *mod_dict)
if (ret)
goto out;
+ ret = volgen_graph_set_iam_nfsd (&cgraph);
+ if (ret)
+ goto out;
+
ret = volgen_graph_merge_sub (graph, &cgraph, 1);
if (ret)
goto out;
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index 5d164fd8df3..2dd5a92f310 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -287,6 +287,11 @@ __glusterd_handle_create_volume (rpcsvc_request_t *req)
int32_t type = 0;
char *username = NULL;
char *password = NULL;
+#ifdef IPV6_DEFAULT
+ char *addr_family = "inet6";
+#else
+ char *addr_family = "inet";
+#endif
GF_ASSERT (req);
@@ -395,11 +400,12 @@ __glusterd_handle_create_volume (rpcsvc_request_t *req)
if (conf->op_version >= GD_OP_VERSION_3_8_0) {
ret = dict_set_dynstr_with_alloc (dict,
"transport.address-family",
- "inet");
+ addr_family);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"failed to set "
- "transport.address-family");
+ "transport.address-family "
+ "to %s", addr_family);
goto out;
}
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index 728da74b7a6..8b2ac810e09 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -3323,6 +3323,39 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.op_version = GD_OP_VERSION_3_10_1,
.flags = OPT_FLAG_CLIENT_OPT
},
+
+ /* Halo replication options */
+ { .key = "cluster.halo-enabled",
+ .voltype = "cluster/replicate",
+ .op_version = GD_OP_VERSION_3_11_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.halo-shd-max-latency",
+ .voltype = "cluster/replicate",
+ .op_version = GD_OP_VERSION_3_11_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.halo-nfsd-max-latency",
+ .voltype = "cluster/replicate",
+ .op_version = GD_OP_VERSION_3_11_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.halo-max-latency",
+ .voltype = "cluster/replicate",
+ .op_version = GD_OP_VERSION_3_11_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.halo-max-replicas",
+ .voltype = "cluster/replicate",
+ .op_version = GD_OP_VERSION_3_11_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.halo-min-replicas",
+ .voltype = "cluster/replicate",
+ .op_version = GD_OP_VERSION_3_11_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+
{ .key = NULL
}
};
diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in
index 6c4cdfed062..b5c90ba1dff 100755
--- a/xlators/mount/fuse/utils/mount.glusterfs.in
+++ b/xlators/mount/fuse/utils/mount.glusterfs.in
@@ -186,6 +186,21 @@ start_glusterfs ()
fi
#options with values start here
+ if [ -n "$halo_max_latency" ]; then
+ cmd_line=$(echo "$cmd_line --xlator-option \
+ *replicate*.halo-max-latency=$halo_max_latency");
+ fi
+
+ if [ -n "$halo_max_replicas" ]; then
+ cmd_line=$(echo "$cmd_line --xlator-option \
+ *replicate*.halo-max-replicas=$halo_max_replicas");
+ fi
+
+ if [ -n "$halo_min_replicas" ]; then
+ cmd_line=$(echo "$cmd_line --xlator-option \
+ *replicate*.halo-min-replicas=$halo_min_replicas");
+ fi
+
if [ -n "$log_level" ]; then
cmd_line=$(echo "$cmd_line --log-level=$log_level");
fi
@@ -479,6 +494,15 @@ with_options()
[ -z "$fuse_mountopts" ] || fuse_mountopts="$fuse_mountopts,"
fuse_mountopts="${fuse_mountopts}$key=\"$value\""
;;
+ "halo-max-latency")
+ halo_max_latency=$value
+ ;;
+ "halo-max-replicas")
+ halo_max_replicas=$value
+ ;;
+ "halo-min-replicas")
+ halo_min_replicas=$value
+ ;;
x-*)
# comments or userspace application-specific options, drop them
;;
diff --git a/xlators/nfs/server/src/exports.h b/xlators/nfs/server/src/exports.h
index bc9af2f0b8b..0079b9a3deb 100644
--- a/xlators/nfs/server/src/exports.h
+++ b/xlators/nfs/server/src/exports.h
@@ -22,7 +22,7 @@
#define GF_EXP GF_NFS"-exports"
#define NETGROUP_REGEX_PATTERN "(@([a-zA-Z0-9\\(=, .])+)())"
-#define HOSTNAME_REGEX_PATTERN "[[:space:]]([a-zA-Z0-9.\\(=,*/)-]+)"
+#define HOSTNAME_REGEX_PATTERN "[[:space:]]([a-zA-Z0-9.\\(=,*/:)-]+)"
#define OPTIONS_REGEX_PATTERN "([a-zA-Z0-9=\\.]+)"
#define NETGROUP_MAX_LEN 128
diff --git a/xlators/nfs/server/src/mount3.c b/xlators/nfs/server/src/mount3.c
index 3f6415dba85..2d022263aca 100644
--- a/xlators/nfs/server/src/mount3.c
+++ b/xlators/nfs/server/src/mount3.c
@@ -1907,7 +1907,7 @@ _mnt3_get_host_from_peer (const char *peer_addr)
size_t host_len = 0;
char *colon = NULL;
- colon = strchr (peer_addr, ':');
+ colon = strrchr (peer_addr, ':');
if (!colon) {
gf_msg (GF_MNT, GF_LOG_ERROR, 0, NFS_MSG_BAD_PEER,
"Bad peer %s", peer_addr);
@@ -4152,6 +4152,15 @@ mnt1svc_init (xlator_t *nfsx)
}
}
+#ifdef IPV6_DEFAULT
+ ret = dict_set_str (options, "transport.address-family", "inet6");
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR,
+ "dict_set_str error when trying to enable ipv6");
+ goto err;
+ }
+#endif
+
ret = rpcsvc_create_listeners (nfs->rpcsvc, options, nfsx->name);
if (ret == -1) {
gf_msg (GF_NFS, GF_LOG_ERROR, errno,
diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c
index 3e7199c036d..1973b95c5a5 100644
--- a/xlators/nfs/server/src/nfs.c
+++ b/xlators/nfs/server/src/nfs.c
@@ -204,6 +204,9 @@ nfs_program_register_portmap_all (struct nfs_state *nfs)
if (nfs->override_portnum)
prog->progport = nfs->override_portnum;
(void) rpcsvc_program_register_portmap (prog, prog->progport);
+#ifdef IPV6_DEFAULT
+ (void) rpcsvc_program_register_rpcbind6 (prog, prog->progport);
+#endif
}
return (0);
@@ -339,6 +342,17 @@ nfs_init_versions (struct nfs_state *nfs, xlator_t *this)
if (version->required)
goto err;
}
+#ifdef IPV6_DEFAULT
+ ret = rpcsvc_program_register_rpcbind6 (prog,
+ prog->progport);
+ if (ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0,
+ NFS_MSG_PGM_REG_FAIL,
+ "Program (ipv6) %s registration failed",
+ prog->progname);
+ goto err;
+ }
+#endif
}
}
@@ -901,6 +915,16 @@ nfs_init_state (xlator_t *this)
}
}
+#ifdef IPV6_DEFAULT
+ ret = dict_set_str (this->options, "transport.address-family",
+ "inet6");
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "dict_set_str error");
+ goto free_foppool;
+ }
+#endif
+
+
/* Right only socket support exists between nfs client and
* gluster nfs, so we can set default value as socket
*/
diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
index 447366c0deb..a36f6e435a7 100644
--- a/xlators/protocol/client/src/client-handshake.c
+++ b/xlators/protocol/client/src/client-handshake.c
@@ -1553,7 +1553,7 @@ client_query_portmap_cbk (struct rpc_req *req, struct iovec *iov, int count, voi
rpc_clnt_reconfig (conf->rpc, &config);
conf->skip_notify = 1;
- conf->quick_reconnect = 1;
+ conf->quick_reconnect = 1;
out:
if (frame)
diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
index e8db8eed166..6cb5b6b38ca 100644
--- a/xlators/protocol/client/src/client.c
+++ b/xlators/protocol/client/src/client.c
@@ -464,7 +464,7 @@ int32_t
client_forget (xlator_t *this, inode_t *inode)
{
/* Nothing here */
- return 0;
+ return 0;
}
int32_t
@@ -542,7 +542,7 @@ out:
STACK_UNWIND_STRICT (lookup, frame, -1, ENOTCONN,
NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
@@ -568,7 +568,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (stat, frame, -1, ENOTCONN, NULL, NULL);
- return 0;
+ return 0;
}
@@ -597,7 +597,7 @@ out:
STACK_UNWIND_STRICT (truncate, frame, -1, ENOTCONN, NULL, NULL, NULL);
- return 0;
+ return 0;
}
@@ -625,7 +625,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOTCONN, NULL, NULL, NULL);
- return 0;
+ return 0;
}
@@ -654,7 +654,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (access, frame, -1, ENOTCONN, NULL);
- return 0;
+ return 0;
}
@@ -684,7 +684,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (readlink, frame, -1, ENOTCONN, NULL, NULL, NULL);
- return 0;
+ return 0;
}
@@ -715,7 +715,7 @@ out:
STACK_UNWIND_STRICT (mknod, frame, -1, ENOTCONN,
NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
@@ -745,7 +745,7 @@ out:
STACK_UNWIND_STRICT (mkdir, frame, -1, ENOTCONN,
NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
@@ -775,7 +775,7 @@ out:
STACK_UNWIND_STRICT (unlink, frame, -1, ENOTCONN,
NULL, NULL, NULL);
- return 0;
+ return 0;
}
int32_t
@@ -804,7 +804,7 @@ out:
STACK_UNWIND_STRICT (rmdir, frame, -1, ENOTCONN,
NULL, NULL, NULL);
- return 0;
+ return 0;
}
@@ -834,7 +834,7 @@ out:
STACK_UNWIND_STRICT (symlink, frame, -1, ENOTCONN,
NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
@@ -864,7 +864,7 @@ out:
STACK_UNWIND_STRICT (rename, frame, -1, ENOTCONN,
NULL, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
@@ -894,7 +894,7 @@ out:
STACK_UNWIND_STRICT (link, frame, -1, ENOTCONN,
NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
@@ -929,7 +929,7 @@ out:
STACK_UNWIND_STRICT (create, frame, -1, ENOTCONN,
NULL, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
@@ -962,7 +962,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (open, frame, -1, ENOTCONN, NULL, NULL);
- return 0;
+ return 0;
}
@@ -997,7 +997,7 @@ out:
STACK_UNWIND_STRICT (readv, frame, -1, ENOTCONN,
NULL, 0, NULL, NULL, NULL);
- return 0;
+ return 0;
}
@@ -1035,7 +1035,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (writev, frame, -1, ENOTCONN, NULL, NULL, NULL);
- return 0;
+ return 0;
}
@@ -1061,7 +1061,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (flush, frame, -1, ENOTCONN, NULL);
- return 0;
+ return 0;
}
@@ -1090,7 +1090,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (fsync, frame, -1, ENOTCONN, NULL, NULL, NULL);
- return 0;
+ return 0;
}
@@ -1117,7 +1117,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (fstat, frame, -1, ENOTCONN, NULL, NULL);
- return 0;
+ return 0;
}
@@ -1146,7 +1146,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (opendir, frame, -1, ENOTCONN, NULL, NULL);
- return 0;
+ return 0;
}
@@ -1174,7 +1174,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (fsyncdir, frame, -1, ENOTCONN, NULL);
- return 0;
+ return 0;
}
@@ -1201,7 +1201,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (statfs, frame, -1, ENOTCONN, NULL, NULL);
- return 0;
+ return 0;
}
static gf_boolean_t
@@ -1390,7 +1390,7 @@ out:
if (need_unwind)
STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, NULL);
- return 0;
+ return 0;
}
@@ -1420,7 +1420,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOTCONN, NULL);
- return 0;
+ return 0;
}
@@ -1450,7 +1450,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOTCONN, NULL, NULL);
- return 0;
+ return 0;
}
@@ -1479,7 +1479,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (getxattr, frame, -1, ENOTCONN, NULL, NULL);
- return 0;
+ return 0;
}
@@ -1509,7 +1509,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (xattrop, frame, -1, ENOTCONN, NULL, NULL);
- return 0;
+ return 0;
}
@@ -1539,7 +1539,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (fxattrop, frame, -1, ENOTCONN, NULL, NULL);
- return 0;
+ return 0;
}
@@ -1568,7 +1568,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (removexattr, frame, -1, ENOTCONN, NULL);
- return 0;
+ return 0;
}
int32_t
@@ -1595,7 +1595,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (fremovexattr, frame, -1, ENOTCONN, NULL);
- return 0;
+ return 0;
}
int32_t
@@ -1651,7 +1651,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (lk, frame, -1, ENOTCONN, NULL, NULL);
- return 0;
+ return 0;
}
@@ -1681,7 +1681,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (inodelk, frame, -1, ENOTCONN, NULL);
- return 0;
+ return 0;
}
@@ -1712,7 +1712,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (finodelk, frame, -1, ENOTCONN, NULL);
- return 0;
+ return 0;
}
@@ -1744,7 +1744,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (entrylk, frame, -1, ENOTCONN, NULL);
- return 0;
+ return 0;
}
@@ -1777,7 +1777,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOTCONN, NULL);
- return 0;
+ return 0;
}
@@ -1806,7 +1806,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (rchecksum, frame, -1, ENOTCONN, 0, NULL, NULL);
- return 0;
+ return 0;
}
int32_t
@@ -1837,7 +1837,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (readdir, frame, -1, ENOTCONN, NULL, NULL);
- return 0;
+ return 0;
}
@@ -1869,7 +1869,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (readdirp, frame, -1, ENOTCONN, NULL, NULL);
- return 0;
+ return 0;
}
@@ -1898,7 +1898,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (setattr, frame, -1, ENOTCONN, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int32_t
@@ -1926,7 +1926,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOTCONN, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int32_t
@@ -2152,7 +2152,7 @@ out:
if (ret)
STACK_UNWIND_STRICT (getspec, frame, -1, EINVAL, NULL);
- return 0;
+ return 0;
}
@@ -2223,6 +2223,15 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
conf = this->private;
switch (event) {
+ case RPC_CLNT_PING:
+ {
+ ret = default_notify (this, GF_EVENT_CHILD_PING, data);
+ if (ret)
+ gf_log (this->name, GF_LOG_INFO,
+ "CHILD_PING notify failed");
+ conf->last_sent_event = GF_EVENT_CHILD_PING;
+ break;
+ }
case RPC_CLNT_CONNECT:
{
conf->connected = 1;
@@ -2314,7 +2323,6 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
} else {
rpc->conn.config.remote_port = 0;
-
}
break;
@@ -2666,7 +2674,7 @@ reconfigure (xlator_t *this, dict_t *options)
ret = 0;
out:
- return ret;
+ return ret;
}