summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
Diffstat (limited to 'xlators')
-rw-r--r--xlators/cluster/afr/src/afr-common.c218
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c15
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c14
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c11
-rw-r--r--xlators/cluster/afr/src/afr-open.c8
-rw-r--r--xlators/cluster/afr/src/afr-read-txn.c10
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c187
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c205
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-name.c35
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h5
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c184
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h2
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c152
-rw-r--r--xlators/cluster/afr/src/afr.c47
-rw-r--r--xlators/cluster/afr/src/afr.h28
-rw-r--r--xlators/cluster/dht/src/dht-common.c181
-rw-r--r--xlators/cluster/dht/src/dht-common.h138
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c12
-rw-r--r--xlators/cluster/dht/src/dht-helper.c12
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c32
-rw-r--r--xlators/cluster/dht/src/dht-mem-types.h3
-rw-r--r--xlators/cluster/dht/src/dht-messages.h36
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c1146
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c68
-rw-r--r--xlators/cluster/dht/src/dht-shared.c85
-rw-r--r--xlators/cluster/dht/src/nufa.c1
-rw-r--r--xlators/cluster/ec/src/ec-combine.c19
-rw-r--r--xlators/cluster/ec/src/ec-common.c78
-rw-r--r--xlators/cluster/ec/src/ec-dir-read.c11
-rw-r--r--xlators/cluster/ec/src/ec-heal.c124
-rw-r--r--xlators/cluster/ec/src/ec-heald.c73
-rw-r--r--xlators/cluster/ec/src/ec-inode-read.c27
-rw-r--r--xlators/cluster/ec/src/ec-locks.c69
-rw-r--r--xlators/cluster/ec/src/ec-types.h16
-rw-r--r--xlators/cluster/ec/src/ec.c20
-rw-r--r--xlators/cluster/ec/src/ec.h1
-rw-r--r--xlators/debug/error-gen/src/error-gen.c46
-rw-r--r--xlators/debug/io-stats/src/io-stats.c145
-rw-r--r--xlators/debug/trace/src/trace.c20
-rw-r--r--xlators/features/Makefile.am6
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c12
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h20
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.c68
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h5
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub.c68
-rw-r--r--xlators/features/changelog/src/changelog-helpers.c52
-rw-r--r--xlators/features/changelog/src/changelog-helpers.h21
-rw-r--r--xlators/features/changelog/src/changelog-messages.h8
-rw-r--r--xlators/features/changelog/src/changelog.c51
-rw-r--r--xlators/features/cloudsync/src/Makefile.am4
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c2
-rw-r--r--xlators/features/index/src/index.c4
-rw-r--r--xlators/features/leases/src/leases-internal.c4
-rw-r--r--xlators/features/locks/src/clear.c4
-rw-r--r--xlators/features/locks/src/common.c307
-rw-r--r--xlators/features/locks/src/common.h41
-rw-r--r--xlators/features/locks/src/entrylk.c31
-rw-r--r--xlators/features/locks/src/inodelk.c162
-rw-r--r--xlators/features/locks/src/locks.h41
-rw-r--r--xlators/features/locks/src/posix.c216
-rw-r--r--xlators/features/metadisp/Makefile.am3
-rw-r--r--xlators/features/metadisp/src/Makefile.am38
-rw-r--r--xlators/features/metadisp/src/backend.c45
-rw-r--r--xlators/features/metadisp/src/fops-tmpl.c10
-rw-r--r--xlators/features/metadisp/src/gen-fops.py160
-rw-r--r--xlators/features/metadisp/src/metadisp-create.c101
-rw-r--r--xlators/features/metadisp/src/metadisp-fops.h51
-rw-r--r--xlators/features/metadisp/src/metadisp-fsync.c54
-rw-r--r--xlators/features/metadisp/src/metadisp-lookup.c90
-rw-r--r--xlators/features/metadisp/src/metadisp-open.c70
-rw-r--r--xlators/features/metadisp/src/metadisp-readdir.c65
-rw-r--r--xlators/features/metadisp/src/metadisp-setattr.c90
-rw-r--r--xlators/features/metadisp/src/metadisp-stat.c124
-rw-r--r--xlators/features/metadisp/src/metadisp-unlink.c160
-rw-r--r--xlators/features/metadisp/src/metadisp.c46
-rw-r--r--xlators/features/metadisp/src/metadisp.h45
-rw-r--r--xlators/features/quota/src/quota-enforcer-client.c6
-rw-r--r--xlators/features/quota/src/quota.c59
-rw-r--r--xlators/features/quota/src/quota.h5
-rw-r--r--xlators/features/read-only/src/worm-helper.c15
-rw-r--r--xlators/features/shard/src/shard.c525
-rw-r--r--xlators/features/shard/src/shard.h7
-rw-r--r--xlators/features/trash/src/trash.c4
-rw-r--r--xlators/features/upcall/src/upcall-internal.c10
-rw-r--r--xlators/meta/src/meta-helpers.c9
-rw-r--r--xlators/mgmt/glusterd/src/Makefile.am8
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c336
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c19
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-ganesha.c64
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.c55
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c11
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c259
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handshake.c143
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.c100
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-log-ops.c10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mem-types.h1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-messages.h152
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c211
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.c431
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.h10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.c10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c283
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-peer-utils.c47
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c12
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.c14
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quotad-svc.c4
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c23
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c74
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-scrub-svc.c4
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-server-quorum.c13
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc.c99
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.c78
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc.c17
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c76
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c158
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c289
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h15
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-helper.c31
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c19
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c77
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c1033
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h5
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c354
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c424
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c54
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c69
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h18
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c171
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h43
-rw-r--r--xlators/mount/fuse/src/fuse-helpers.c71
-rwxr-xr-xxlators/mount/fuse/utils/mount_glusterfs.in9
-rw-r--r--xlators/nfs/server/src/acl3.c17
-rw-r--r--xlators/nfs/server/src/acl3.h2
-rw-r--r--xlators/nfs/server/src/auth-cache.c4
-rw-r--r--xlators/nfs/server/src/mount3.c11
-rw-r--r--xlators/nfs/server/src/mount3udp_svc.c2
-rw-r--r--xlators/nfs/server/src/nfs.c14
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.c4
-rw-r--r--xlators/nfs/server/src/nfs3.c2
-rw-r--r--xlators/nfs/server/src/nlm4.c17
-rw-r--r--xlators/nfs/server/src/nlmcbk_svc.c5
-rw-r--r--xlators/performance/io-cache/src/io-cache.c36
-rw-r--r--xlators/performance/io-cache/src/io-cache.h27
-rw-r--r--xlators/performance/io-cache/src/page.c6
-rw-r--r--xlators/performance/io-threads/src/io-threads.c11
-rw-r--r--xlators/performance/md-cache/src/md-cache.c428
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-helper.c8
-rw-r--r--xlators/performance/nl-cache/src/nl-cache.c6
-rw-r--r--xlators/performance/nl-cache/src/nl-cache.h2
-rw-r--r--xlators/performance/open-behind/src/open-behind-messages.h6
-rw-r--r--xlators/performance/open-behind/src/open-behind.c1340
-rw-r--r--xlators/performance/quick-read/src/quick-read.c41
-rw-r--r--xlators/performance/quick-read/src/quick-read.h2
-rw-r--r--xlators/performance/write-behind/src/write-behind.c4
-rw-r--r--xlators/protocol/server/src/Makefile.am4
-rw-r--r--xlators/protocol/server/src/server-common.c2
-rw-r--r--xlators/protocol/server/src/server.c3
-rw-r--r--xlators/storage/posix/src/posix-common.c50
-rw-r--r--xlators/storage/posix/src/posix-entry-ops.c39
-rw-r--r--xlators/storage/posix/src/posix-helpers.c172
-rw-r--r--xlators/storage/posix/src/posix-inode-fd-ops.c105
-rw-r--r--xlators/storage/posix/src/posix.h16
-rw-r--r--xlators/system/posix-acl/src/posix-acl.c10
166 files changed, 9470 insertions, 4739 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 4ee83659c6e..032ab5c8001 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -45,7 +45,42 @@ afr_quorum_errno(afr_private_t *priv)
return ENOTCONN;
}
-static void
+gf_boolean_t
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
+ pid_t pid)
+{
+ if (!__is_root_gfid(pargfid)) {
+ return _gf_false;
+ }
+
+ if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) {
+ /*For backward compatibility /.landfill is private*/
+ return _gf_true;
+ }
+
+ if (pid == GF_CLIENT_PID_GSYNCD) {
+ /*geo-rep needs to create/sync private directory on slave because
+ * it appears in changelog*/
+ return _gf_false;
+ }
+
+ if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) {
+ if (strcmp(name, priv->anon_inode_name) == 0) {
+ /* anonymous-inode dir is private*/
+ return _gf_true;
+ }
+ } else {
+ if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) ==
+ 0) {
+ /* anonymous-inode dir prefix is private for geo-rep to work*/
+ return _gf_true;
+ }
+ }
+
+ return _gf_false;
+}
+
+void
afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
unsigned char *replies)
{
@@ -885,7 +920,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
metadatamap |= (1 << index);
}
if (metadatamap_old != metadatamap) {
- event = 0;
+ __afr_inode_need_refresh_set(inode, this);
}
break;
@@ -898,7 +933,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
datamap |= (1 << index);
}
if (datamap_old != datamap)
- event = 0;
+ __afr_inode_need_refresh_set(inode, this);
break;
default:
@@ -1062,34 +1097,6 @@ out:
}
int
-__afr_inode_event_gen_reset_small(inode_t *inode, xlator_t *this)
-{
- int ret = -1;
- uint16_t datamap = 0;
- uint16_t metadatamap = 0;
- uint32_t event = 0;
- uint64_t val = 0;
- afr_inode_ctx_t *ctx = NULL;
-
- ret = __afr_inode_ctx_get(this, inode, &ctx);
- if (ret)
- return ret;
-
- val = ctx->read_subvol;
-
- metadatamap = (val & 0x000000000000ffff) >> 0;
- datamap = (val & 0x00000000ffff0000) >> 16;
- event = 0;
-
- val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
- (((uint64_t)event) << 32);
-
- ctx->read_subvol = val;
-
- return ret;
-}
-
-int
__afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
unsigned char *metadata, int *event_p)
{
@@ -1160,22 +1167,6 @@ out:
}
int
-__afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- int ret = -1;
-
- priv = this->private;
-
- if (priv->child_count <= 16)
- ret = __afr_inode_event_gen_reset_small(inode, this);
- else
- ret = -1;
-
- return ret;
-}
-
-int
afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
unsigned char *metadata, int *event_p)
{
@@ -1241,12 +1232,11 @@ afr_inode_get_readable(call_frame_t *frame, inode_t *inode, xlator_t *this,
return 0;
}
-int
+static int
afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this,
int *spb_choice)
{
int ret = -1;
-
GF_VALIDATE_OR_GOTO(this->name, inode, out);
LOCK(&inode->lock);
@@ -1258,6 +1248,40 @@ out:
return ret;
}
+/*
+ * frame is used to get the favourite policy. Since
+ * afr_inode_split_brain_choice_get was called with afr_open, it is possible to
+ * have a frame with out local->replies. So in that case, frame is passed as
+ * null, hence this function will handle the frame NULL case.
+ */
+int
+afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this,
+ call_frame_t *frame, int *spb_subvol)
+{
+ int ret = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("afr", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, spb_subvol, out);
+
+ priv = this->private;
+
+ ret = afr_inode_split_brain_choice_get(inode, this, spb_subvol);
+ if (*spb_subvol < 0 && priv->fav_child_policy && frame && frame->local) {
+ local = frame->local;
+ *spb_subvol = afr_sh_get_fav_by_policy(this, local->replies, inode,
+ NULL);
+ if (*spb_subvol >= 0) {
+ ret = 0;
+ }
+ }
+
+out:
+ return ret;
+}
int
afr_inode_read_subvol_set(inode_t *inode, xlator_t *this, unsigned char *data,
unsigned char *metadata, int event)
@@ -1324,30 +1348,22 @@ out:
return need_refresh;
}
-static int
-afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
+int
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
{
int ret = -1;
afr_inode_ctx_t *ctx = NULL;
- GF_VALIDATE_OR_GOTO(this->name, inode, out);
-
- LOCK(&inode->lock);
- {
- ret = __afr_inode_ctx_get(this, inode, &ctx);
- if (ret)
- goto unlock;
-
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret == 0) {
ctx->need_refresh = _gf_true;
}
-unlock:
- UNLOCK(&inode->lock);
-out:
+
return ret;
}
int
-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
{
int ret = -1;
@@ -1355,7 +1371,7 @@ afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
LOCK(&inode->lock);
{
- ret = __afr_inode_event_gen_reset(inode, this);
+ ret = __afr_inode_need_refresh_set(inode, this);
}
UNLOCK(&inode->lock);
out:
@@ -1790,7 +1806,7 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
ret = afr_inode_get_readable(frame, inode, this, local->readable,
&event_generation, local->transaction.type);
- if (ret == -EIO || (local->is_read_txn && !event_generation)) {
+ if (ret == -EIO) {
/* No readable subvolume even after refresh ==> splitbrain.*/
if (!priv->fav_child_policy) {
err = EIO;
@@ -2290,8 +2306,9 @@ afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv,
* need is a low probability that multiple clients
* won't converge on the same subvolume.
*/
+ gf_uuid_copy(gfid_copy, args->gfid);
pid = getpid();
- memcpy(gfid_copy, &pid, sizeof(pid));
+ *(pid_t *)gfid_copy ^= pid;
}
child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) %
priv->child_count;
@@ -2875,7 +2892,7 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this,
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int spb_choice = -1;
+ int spb_subvol = -1;
int child_count = -1;
if (*read_subvol != -1)
@@ -2885,10 +2902,10 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this,
local = frame->local;
child_count = priv->child_count;
- afr_inode_split_brain_choice_get(local->inode, this, &spb_choice);
- if ((spb_choice >= 0) &&
+ afr_split_brain_read_subvol_get(local->inode, this, frame, &spb_subvol);
+ if ((spb_subvol >= 0) &&
(AFR_COUNT(success_replies, child_count) == child_count)) {
- *read_subvol = spb_choice;
+ *read_subvol = spb_subvol;
} else if (!priv->quorum_count ||
frame->root->pid == GF_CLIENT_PID_GLFS_HEAL) {
*read_subvol = afr_first_up_child(frame, this);
@@ -2929,6 +2946,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
0,
};
gf_boolean_t locked_entry = _gf_false;
+ gf_boolean_t in_flight_create = _gf_false;
gf_boolean_t can_interpret = _gf_true;
inode_t *parent = NULL;
ia_type_t ia_type = IA_INVAL;
@@ -2972,17 +2990,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
if (!replies[i].valid)
continue;
- if (locked_entry && replies[i].op_ret == -1 &&
- replies[i].op_errno == ENOENT) {
- /* Second, check entry is still
- "underway" in creation */
- local->op_ret = -1;
- local->op_errno = ENOENT;
- goto error;
- }
-
- if (replies[i].op_ret == -1)
+ if (replies[i].op_ret == -1) {
+ if (locked_entry && replies[i].op_errno == ENOENT) {
+ in_flight_create = _gf_true;
+ }
continue;
+ }
if (read_subvol == -1 || !readable[read_subvol]) {
read_subvol = i;
@@ -2992,6 +3005,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
}
}
+ if (in_flight_create && !afr_has_quorum(success_replies, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ goto error;
+ }
+
if (read_subvol == -1)
goto error;
/* We now have a read_subvol, which is readable[] (if there
@@ -3050,7 +3069,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
if (read_subvol == -1)
goto cant_interpret;
if (ret) {
- afr_inode_event_gen_reset(local->inode, this);
+ afr_inode_need_refresh_set(local->inode, this);
dict_del_sizen(local->replies[read_subvol].xdata, GF_CONTENT_KEY);
}
} else {
@@ -3103,7 +3122,7 @@ error:
* others in that they must be given higher priority while
* returning to the user.
*
- * The hierarchy is ENODATA > ENOENT > ESTALE > others
+ * The hierarchy is ENODATA > ENOENT > ESTALE > ENOSPC others
*/
int
@@ -3115,6 +3134,8 @@ afr_higher_errno(int32_t old_errno, int32_t new_errno)
return ENOENT;
if (old_errno == ESTALE || new_errno == ESTALE)
return ESTALE;
+ if (old_errno == ENOSPC || new_errno == ENOSPC)
+ return ENOSPC;
return new_errno;
}
@@ -3606,6 +3627,7 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
int read_subvol = -1;
+ int ret = 0;
unsigned char *data_readable = NULL;
unsigned char *success_replies = NULL;
@@ -3627,7 +3649,10 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
if (!afr_has_quorum(success_replies, this, frame))
goto unwind;
- afr_replies_interpret(frame, this, local->inode, NULL);
+ ret = afr_replies_interpret(frame, this, local->inode, NULL);
+ if (ret) {
+ afr_inode_need_refresh_set(local->inode, this);
+ }
read_subvol = afr_read_subvol_decide(local->inode, this, NULL,
data_readable);
@@ -3679,7 +3704,7 @@ afr_ta_id_file_check(void *opaque)
this = opaque;
priv = this->private;
- ret = afr_fill_ta_loc(this, &loc);
+ ret = afr_fill_ta_loc(this, &loc, _gf_false);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate thin-arbiter loc for: %s.", loc.name);
@@ -3888,11 +3913,7 @@ afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
afr_read_subvol_get(loc->inode, this, NULL, NULL, &event,
AFR_DATA_TRANSACTION, NULL);
- if (afr_is_inode_refresh_reqd(loc->inode, this, event,
- local->event_generation))
- afr_inode_refresh(frame, this, loc->inode, NULL, afr_discover_do);
- else
- afr_discover_do(frame, this, 0);
+ afr_discover_do(frame, this, 0);
return 0;
out:
@@ -3993,11 +4014,10 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
return 0;
}
- if (__is_root_gfid(loc->parent->gfid)) {
- if (!strcmp(loc->name, GF_REPLICATE_TRASH_DIR)) {
- op_errno = EPERM;
- goto out;
- }
+ if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name,
+ frame->root->pid)) {
+ op_errno = EPERM;
+ goto out;
}
local = AFR_FRAME_INIT(frame, op_errno);
@@ -4033,11 +4053,7 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
afr_read_subvol_get(loc->parent, this, NULL, NULL, &event,
AFR_DATA_TRANSACTION, NULL);
- if (afr_is_inode_refresh_reqd(loc->inode, this, event,
- local->event_generation))
- afr_inode_refresh(frame, this, loc->parent, NULL, afr_lookup_do);
- else
- afr_lookup_do(frame, this, 0);
+ afr_lookup_do(frame, this, 0);
return 0;
out:
@@ -5679,6 +5695,7 @@ afr_priv_dump(xlator_t *this)
priv->background_self_heal_count);
gf_proc_dump_write("healers", "%d", priv->healers);
gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode);
+ gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode);
if (priv->quorum_count == AFR_QUORUM_AUTO) {
gf_proc_dump_write("quorum-type", "auto");
} else if (priv->quorum_count == 0) {
@@ -6655,6 +6672,8 @@ afr_priv_destroy(afr_private_t *priv)
if (!priv)
goto out;
+
+ GF_FREE(priv->sh_domain);
GF_FREE(priv->last_event);
child_count = priv->child_count;
@@ -6670,6 +6689,7 @@ afr_priv_destroy(afr_private_t *priv)
GF_FREE(priv->local);
GF_FREE(priv->pending_key);
GF_FREE(priv->children);
+ GF_FREE(priv->anon_inode);
GF_FREE(priv->child_up);
GF_FREE(priv->halo_child_up);
GF_FREE(priv->child_latency);
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index 74f71fdc76a..f8bf8340dab 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -67,7 +67,8 @@ afr_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
int
-afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -163,8 +164,8 @@ afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol)
}
static void
-afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol,
- gf_dirent_t *entries, fd_t *fd)
+afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
+ int subvol, gf_dirent_t *entries, fd_t *fd)
{
int ret = -1;
gf_dirent_t *entry = NULL;
@@ -182,8 +183,8 @@ afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol,
list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list)
{
- if (__is_root_gfid(fd->inode->gfid) &&
- !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) {
+ if (afr_is_private_directory(priv, fd->inode->gfid, entry->d_name,
+ frame->root->pid)) {
continue;
}
@@ -227,8 +228,8 @@ afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
if (op_ret >= 0)
- afr_readdir_transform_entries(subvol_entries, (long)cookie, &entries,
- local->fd);
+ afr_readdir_transform_entries(frame, subvol_entries, (long)cookie,
+ &entries, local->fd);
AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata);
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index e96b7d0798e..b7cceb79158 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -119,11 +119,11 @@ __afr_dir_write_finalize(call_frame_t *frame, xlator_t *this)
continue;
if (local->replies[i].op_ret < 0) {
if (local->inode)
- afr_inode_event_gen_reset(local->inode, this);
+ afr_inode_need_refresh_set(local->inode, this);
if (local->parent)
- afr_inode_event_gen_reset(local->parent, this);
+ afr_inode_need_refresh_set(local->parent, this);
if (local->parent2)
- afr_inode_event_gen_reset(local->parent2, this);
+ afr_inode_need_refresh_set(local->parent2, this);
continue;
}
@@ -345,6 +345,7 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this)
afr_private_t *priv = NULL;
int pre_op_count = 0;
int failed_count = 0;
+ unsigned char *success_replies = NULL;
local = frame->local;
priv = this->private;
@@ -360,9 +361,16 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this)
failed_count = AFR_COUNT(local->transaction.failed_subvols,
priv->child_count);
+ /* FOP succeeded on all bricks. */
if (pre_op_count == priv->child_count && !failed_count)
return;
+ /* FOP did not suceed on quorum no. of bricks. */
+ success_replies = alloca0(priv->child_count);
+ afr_fill_success_replies(local, priv, success_replies);
+ if (!afr_has_quorum(success_replies, this, NULL))
+ return;
+
if (priv->thin_arbiter_count) {
/*Mark new entry using ta file*/
local->is_new_entry = _gf_true;
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index c01b4131d58..1d6e4f3570a 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -2506,6 +2506,7 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
call_frame_t *transaction_frame = NULL;
int ret = -1;
int32_t op_errno = ENOMEM;
+ int8_t last_fsync = 0;
AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
@@ -2516,10 +2517,16 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
if (!local)
goto out;
- if (xdata)
+ if (xdata) {
local->xdata_req = dict_copy_with_ref(xdata, NULL);
- else
+ if (dict_get_int8(xdata, "last-fsync", &last_fsync) == 0) {
+ if (last_fsync) {
+ local->transaction.disable_delayed_post_op = _gf_true;
+ }
+ }
+ } else {
local->xdata_req = dict_new();
+ }
if (!local->xdata_req)
goto out;
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index a5b004f4258..64856042b65 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -137,7 +137,7 @@ afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int spb_choice = 0;
+ int spb_subvol = 0;
int event_generation = 0;
int ret = 0;
int32_t op_errno = 0;
@@ -179,9 +179,9 @@ afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
ret = afr_inode_get_readable(frame, local->inode, this, NULL,
&event_generation, AFR_DATA_TRANSACTION);
if ((ret < 0) &&
- (afr_inode_split_brain_choice_get(local->inode, this, &spb_choice) ==
- 0) &&
- spb_choice < 0) {
+ (afr_split_brain_read_subvol_get(local->inode, this, NULL,
+ &spb_subvol) == 0) &&
+ spb_subvol < 0) {
afr_inode_refresh(frame, this, local->inode, local->inode->gfid,
afr_open_continue);
} else {
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
index 772b59f9a2f..6fc2c75145c 100644
--- a/xlators/cluster/afr/src/afr-read-txn.c
+++ b/xlators/cluster/afr/src/afr-read-txn.c
@@ -164,7 +164,7 @@ afr_ta_read_txn(void *opaque)
xdata_rsp = NULL;
/* It doesn't. So query thin-arbiter to see if it blames any data brick. */
- ret = afr_fill_ta_loc(this, &loc);
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate thin-arbiter loc for: %s.", loc.name);
@@ -272,7 +272,7 @@ afr_read_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
int read_subvol = -1;
inode_t *inode = NULL;
int ret = -1;
- int spb_choice = -1;
+ int spb_subvol = -1;
local = frame->local;
inode = local->inode;
@@ -303,9 +303,9 @@ afr_read_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
local->read_attempted[read_subvol] = 1;
readfn:
if (read_subvol == -1) {
- ret = afr_inode_split_brain_choice_get(inode, this, &spb_choice);
- if ((ret == 0) && spb_choice >= 0)
- read_subvol = spb_choice;
+ ret = afr_split_brain_read_subvol_get(inode, this, frame, &spb_subvol);
+ if ((ret == 0) && spb_subvol >= 0)
+ read_subvol = spb_subvol;
}
if (read_subvol == -1) {
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index fdec66340ba..a580a1584cc 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -140,7 +140,7 @@ heal:
}
}
out:
- if (gfid_idx && (*gfid_idx == -1) && (ret == 0)) {
+ if (gfid_idx && (*gfid_idx == -1) && (ret == 0) && local) {
ret = -afr_final_errno(local, priv);
}
loc_wipe(&loc);
@@ -1909,7 +1909,8 @@ afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid,
dict_t *dict = NULL;
local = frame->local;
- if (local && local->xattr_req)
+
+ if (local->xattr_req)
dict = local->xattr_req;
return afr_selfheal_unlocked_discover_on(frame, inode, gfid, replies,
@@ -2749,3 +2750,185 @@ afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources,
out:
return source;
}
+
+static int
+afr_anon_inode_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ int i = (long)cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->replies[i].poststat = *buf;
+ local->replies[i].preparent = *preparent;
+ local->replies[i].postparent = *postparent;
+ }
+ if (xdata) {
+ local->replies[i].xdata = dict_ref(xdata);
+ }
+
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
+
+int
+afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode)
+{
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = this->private;
+ unsigned char *mkdir_on = alloca0(priv->child_count);
+ unsigned char *lookup_on = alloca0(priv->child_count);
+ loc_t loc = {0};
+ int32_t op_errno = 0;
+ int32_t child_op_errno = 0;
+ struct iatt iatt = {0};
+ dict_t *xdata = NULL;
+ uuid_t anon_inode_gfid = {0};
+ int mkdir_count = 0;
+ int i = 0;
+
+ /*Try to mkdir everywhere and return success if the dir exists on 'child'
+ */
+
+ if (!priv->use_anon_inode) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ frame = afr_frame_create(this, &op_errno);
+ if (op_errno) {
+ goto out;
+ }
+ local = frame->local;
+ if (!local->child_up[child]) {
+ /*Other bricks may need mkdir so don't error out yet*/
+ child_op_errno = ENOTCONN;
+ }
+ gf_uuid_parse(priv->anon_gfid_str, anon_inode_gfid);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+
+ if (priv->anon_inode[i]) {
+ mkdir_on[i] = 0;
+ } else {
+ mkdir_on[i] = 1;
+ mkdir_count++;
+ }
+ }
+
+ if (mkdir_count == 0) {
+ *linked_inode = inode_find(this->itable, anon_inode_gfid);
+ if (*linked_inode) {
+ op_errno = 0;
+ goto out;
+ }
+ }
+
+ loc.parent = inode_ref(this->itable->root);
+ loc.name = priv->anon_inode_name;
+ loc.inode = inode_new(this->itable);
+ if (!loc.inode) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ xdata = dict_new();
+ if (!xdata) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_errno = -dict_set_gfuuid(xdata, "gfid-req", anon_inode_gfid, _gf_true);
+ if (op_errno) {
+ goto out;
+ }
+
+ if (mkdir_count == 0) {
+ memcpy(lookup_on, local->child_up, priv->child_count);
+ goto lookup;
+ }
+
+ AFR_ONLIST(mkdir_on, frame, afr_anon_inode_mkdir_cbk, mkdir, &loc, 0755, 0,
+ xdata);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!mkdir_on[i]) {
+ continue;
+ }
+
+ if (local->replies[i].op_ret == 0) {
+ priv->anon_inode[i] = 1;
+ iatt = local->replies[i].poststat;
+ } else if (local->replies[i].op_ret < 0 &&
+ local->replies[i].op_errno == EEXIST) {
+ lookup_on[i] = 1;
+ } else if (i == child) {
+ child_op_errno = local->replies[i].op_errno;
+ }
+ }
+
+ if (AFR_COUNT(lookup_on, priv->child_count) == 0) {
+ goto link;
+ }
+
+lookup:
+ AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ xdata);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!lookup_on[i]) {
+ continue;
+ }
+
+ if (local->replies[i].op_ret == 0) {
+ if (gf_uuid_compare(anon_inode_gfid,
+ local->replies[i].poststat.ia_gfid) == 0) {
+ priv->anon_inode[i] = 1;
+ iatt = local->replies[i].poststat;
+ } else {
+ if (i == child)
+ child_op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_DATA,
+ "%s has gfid: %s", priv->anon_inode_name,
+ uuid_utoa(local->replies[i].poststat.ia_gfid));
+ }
+ } else if (i == child) {
+ child_op_errno = local->replies[i].op_errno;
+ }
+ }
+link:
+ if (!gf_uuid_is_null(iatt.ia_gfid)) {
+ *linked_inode = inode_link(loc.inode, loc.parent, loc.name, &iatt);
+ if (*linked_inode) {
+ op_errno = 0;
+ inode_lookup(*linked_inode);
+ } else {
+ op_errno = ENOMEM;
+ }
+ goto out;
+ }
+
+out:
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(&loc);
+ /*child_op_errno takes precedence*/
+ if (child_op_errno == 0) {
+ child_op_errno = op_errno;
+ }
+
+ if (child_op_errno && *linked_inode) {
+ inode_unref(*linked_inode);
+ *linked_inode = NULL;
+ }
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ return -child_op_errno;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index ac31751997f..64893f441e3 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -16,54 +16,170 @@
#include <glusterfs/syncop-utils.h>
#include <glusterfs/events.h>
-static int
-afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
- inode_t *inode, int child, struct afr_reply *replies)
+int
+afr_selfheal_entry_anon_inode(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child,
+ struct afr_reply *replies,
+ gf_boolean_t *anon_inode)
{
afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
xlator_t *subvol = NULL;
int ret = 0;
+ int i = 0;
+ char g[64] = {0};
+ unsigned char *lookup_success = NULL;
+ call_frame_t *frame = NULL;
+ loc_t loc2 = {
+ 0,
+ };
loc_t loc = {
0,
};
- char g[64];
priv = this->private;
-
subvol = priv->children[child];
+ lookup_success = alloca0(priv->child_count);
+ uuid_utoa_r(replies[child].poststat.ia_gfid, g);
+ loc.inode = inode_new(inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (replies[child].poststat.ia_type == IA_IFDIR) {
+ /* This directory may have sub-directory hierarchy which may need to
+ * be preserved for subsequent heals. So unconditionally move the
+ * directory to anonymous-inode directory*/
+ *anon_inode = _gf_true;
+ goto anon_inode;
+ }
+
+ frame = afr_frame_create(this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+ local = frame->local;
+ gf_uuid_copy(loc.gfid, replies[child].poststat.ia_gfid);
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ lookup_success[i] = 1;
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ ret = -local->replies[i].op_errno;
+ }
+ }
+
+ if (priv->quorum_count) {
+ if (afr_has_quorum(lookup_success, this, NULL)) {
+ *anon_inode = _gf_true;
+ }
+ } else if (AFR_COUNT(lookup_success, priv->child_count) > 1) {
+ *anon_inode = _gf_true;
+ } else if (ret) {
+ goto out;
+ }
+
+anon_inode:
+ if (!*anon_inode) {
+ ret = 0;
+ goto out;
+ }
loc.parent = inode_ref(dir);
gf_uuid_copy(loc.pargfid, dir->gfid);
loc.name = name;
- loc.inode = inode_ref(inode);
- if (replies[child].valid && replies[child].op_ret == 0) {
- switch (replies[child].poststat.ia_type) {
- case IA_IFDIR:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid),
- name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
- subvol->name);
- ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
- break;
- default:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
- name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
- subvol->name);
- ret = syncop_unlink(subvol, &loc, NULL, NULL);
- break;
- }
+ ret = afr_anon_inode_create(this, child, &loc2.parent);
+ if (ret < 0)
+ goto out;
+
+ loc2.name = g;
+ ret = syncop_rename(subvol, &loc, &loc2, NULL, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "Rename to %s dir %s/%s (%s) on %s failed",
+ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
+ subvol->name);
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "Rename to %s dir %s/%s (%s) on %s successful",
+ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
+ subvol->name);
}
+out:
loc_wipe(&loc);
+ loc_wipe(&loc2);
+ if (frame) {
+ AFR_STACK_DESTROY(frame);
+ }
return ret;
}
int
+afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child, struct afr_reply *replies)
+{
+ char g[64] = {0};
+ afr_private_t *priv = NULL;
+ xlator_t *subvol = NULL;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ gf_boolean_t anon_inode = _gf_false;
+
+ priv = this->private;
+ subvol = priv->children[child];
+
+ if ((!replies[child].valid) || (replies[child].op_ret < 0)) {
+ /*Nothing to do*/
+ ret = 0;
+ goto out;
+ }
+
+ if (priv->use_anon_inode) {
+ ret = afr_selfheal_entry_anon_inode(this, dir, name, inode, child,
+ replies, &anon_inode);
+ if (ret < 0 || anon_inode)
+ goto out;
+ }
+
+ loc.parent = inode_ref(dir);
+ loc.inode = inode_new(inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ loc.name = name;
+ switch (replies[child].poststat.ia_type) {
+ case IA_IFDIR:
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), name,
+ uuid_utoa_r(replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
+ break;
+ default:
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
+ name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_unlink(subvol, &loc, NULL, NULL);
+ break;
+ }
+
+out:
+ loc_wipe(&loc);
+ return ret;
+}
+
+int
afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
unsigned char *sources, inode_t *dir,
const char *name, inode_t *inode,
@@ -76,6 +192,9 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
loc_t srcloc = {
0,
};
+ loc_t anonloc = {
+ 0,
+ };
xlator_t *this = frame->this;
afr_private_t *priv = NULL;
dict_t *xdata = NULL;
@@ -86,15 +205,17 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
0,
};
unsigned char *newentry = NULL;
- char dir_uuid_str[64] = {0}, iatt_uuid_str[64] = {0};
+ char iatt_uuid_str[64] = {0};
+ char dir_uuid_str[64] = {0};
priv = this->private;
iatt = &replies[source].poststat;
+ uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str);
if (iatt->ia_type == IA_INVAL || gf_uuid_is_null(iatt->ia_gfid)) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED,
"Invalid ia_type (%d) or gfid(%s). source brick=%d, "
"pargfid=%s, name=%s",
- iatt->ia_type, uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str), source,
+ iatt->ia_type, iatt_uuid_str, source,
uuid_utoa_r(dir->gfid, dir_uuid_str), name);
ret = -EINVAL;
goto out;
@@ -120,14 +241,24 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
srcloc.inode = inode_ref(inode);
gf_uuid_copy(srcloc.gfid, iatt->ia_gfid);
- if (iatt->ia_type != IA_IFDIR)
- ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
- if (iatt->ia_type == IA_IFDIR || ret == -ENOENT || ret == -ESTALE) {
+ ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
+ if (ret == -ENOENT || ret == -ESTALE) {
newentry[dst] = 1;
ret = afr_selfheal_newentry_mark(frame, this, inode, source, replies,
sources, newentry);
if (ret)
goto out;
+ } else if (ret == 0 && iatt->ia_type == IA_IFDIR && priv->use_anon_inode) {
+ // Try rename from hidden directory
+ ret = afr_anon_inode_create(this, dst, &anonloc.parent);
+ if (ret < 0)
+ goto out;
+ anonloc.inode = inode_ref(inode);
+ anonloc.name = iatt_uuid_str;
+ ret = syncop_rename(priv->children[dst], &anonloc, &loc, NULL, NULL);
+ if (ret == -ENOENT || ret == -ESTALE)
+ ret = -1; /*This sets 'mismatch' to true*/
+ goto out;
}
mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type);
@@ -166,6 +297,7 @@ out:
GF_FREE(linkname);
loc_wipe(&loc);
loc_wipe(&srcloc);
+ loc_wipe(&anonloc);
return ret;
}
@@ -578,6 +710,11 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
+ if (afr_is_private_directory(priv, fd->inode->gfid, name,
+ GF_CLIENT_PID_SELF_HEALD)) {
+ return 0;
+ }
+
xattr = dict_new();
if (!xattr)
return -ENOMEM;
@@ -626,7 +763,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
replies);
if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) {
- ret = afr_shd_index_purge(subvol, parent_idx_inode, name,
+ ret = afr_shd_entry_purge(subvol, parent_idx_inode, name,
inode->ia_type);
/* Why is ret force-set to 0? We do not care about
* index purge failing for full heal as it is quite
@@ -756,10 +893,6 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd,
if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
continue;
- if (__is_root_gfid(fd->inode->gfid) &&
- !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR))
- continue;
-
ret = afr_selfheal_entry_dirent(iter_frame, this, fd, entry->d_name,
loc.inode, subvol,
local->need_full_crawl);
@@ -822,7 +955,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry,
/* The name indices under the pgfid index dir are guaranteed
* to be regular files. Hence the hardcoding.
*/
- afr_shd_index_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
+ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
ret = 0;
goto out;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
index dace07131cb..834aac86d48 100644
--- a/xlators/cluster/afr/src/afr-self-heal-name.c
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -98,21 +98,12 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
const char *bname, inode_t *inode,
struct afr_reply *replies)
{
- loc_t loc = {
- 0,
- };
int i = 0;
afr_private_t *priv = NULL;
- char g[64];
int ret = 0;
priv = this->private;
- loc.parent = inode_ref(parent);
- gf_uuid_copy(loc.pargfid, pargfid);
- loc.name = bname;
- loc.inode = inode_ref(inode);
-
for (i = 0; i < priv->child_count; i++) {
if (!replies[i].valid)
continue;
@@ -120,30 +111,10 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
if (replies[i].op_ret)
continue;
- switch (replies[i].poststat.ia_type) {
- case IA_IFDIR:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging dir %s/%s (%s) on %s", uuid_utoa(pargfid),
- bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g),
- priv->children[i]->name);
-
- ret |= syncop_rmdir(priv->children[i], &loc, 1, NULL, NULL);
- break;
- default:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging file %s/%s (%s) on %s", uuid_utoa(pargfid),
- bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g),
- priv->children[i]->name);
-
- ret |= syncop_unlink(priv->children[i], &loc, NULL, NULL);
- break;
- }
+ ret |= afr_selfheal_entry_delete(this, parent, bname, inode, i,
+ replies);
}
- loc_wipe(&loc);
-
return ret;
}
@@ -381,7 +352,7 @@ __afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
ret = __afr_selfheal_assign_gfid(this, parent, pargfid, bname, inode,
replies, gfid, locked_on, source, sources,
is_gfid_absent, &gfid_idx);
- if (ret)
+ if (ret || (gfid_idx < 0))
return ret;
ret = __afr_selfheal_name_impunge(frame, this, parent, pargfid, bname,
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index 7a038fa7fe3..48e6dbcfb18 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -369,4 +369,9 @@ gf_boolean_t
afr_is_file_empty_on_all_children(afr_private_t *priv,
struct afr_reply *replies);
+int
+afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child, struct afr_reply *replies);
+int
+afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode);
#endif /* !_AFR_SELFHEAL_H */
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index 2219a53b277..109fd4b7421 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -94,7 +94,7 @@ __afr_shd_healer_wait(struct subvol_healer *healer)
priv = healer->this->private;
disabled_loop:
- wait_till.tv_sec = time(NULL) + priv->shd.timeout;
+ wait_till.tv_sec = gf_time() + priv->shd.timeout;
while (!healer->rerun) {
ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till);
@@ -222,7 +222,7 @@ out:
}
int
-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name,
+afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
ia_type_t type)
{
int ret = 0;
@@ -371,7 +371,7 @@ afr_shd_sweep_prepare(struct subvol_healer *healer)
event->split_brain_count = 0;
event->heal_failed_count = 0;
- time(&event->start_time);
+ event->start_time = gf_time();
event->end_time = 0;
_mask_cancellation();
}
@@ -386,7 +386,7 @@ afr_shd_sweep_done(struct subvol_healer *healer)
event = &healer->crawl_event;
shd = &(((afr_private_t *)healer->this->private)->shd);
- time(&event->end_time);
+ event->end_time = gf_time();
history = gf_memdup(event, sizeof(*event));
event->start_time = 0;
@@ -424,7 +424,7 @@ afr_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
ret = afr_shd_selfheal(healer, healer->subvol, gfid);
if (ret == -ENOENT || ret == -ESTALE)
- afr_shd_index_purge(subvol, parent->inode, entry->d_name, val);
+ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, val);
if (ret == 2)
/* If bricks crashed in pre-op after creating indices/xattrop
@@ -843,6 +843,176 @@ out:
return need_heal;
}
+static int
+afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
+{
+ struct subvol_healer *healer = data;
+ afr_private_t *priv = healer->this->private;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+ loc_t loc = {0};
+ int count = 0;
+ int i = 0;
+ int op_errno = 0;
+ struct iatt *iatt = NULL;
+ gf_boolean_t multiple_links = _gf_false;
+ unsigned char *gfid_present = alloca0(priv->child_count);
+ unsigned char *entry_present = alloca0(priv->child_count);
+ char *type = "file";
+
+ frame = afr_frame_create(healer->this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+ local = frame->local;
+ if (AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) {
+ gf_msg_debug(healer->this->name, 0,
+ "Not all bricks are up. Skipping "
+ "cleanup of %s on %s",
+ entry->d_name, subvol->name);
+ ret = 0;
+ goto out;
+ }
+
+ loc.inode = inode_new(parent->inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = gf_uuid_parse(entry->d_name, loc.gfid);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ count++;
+ gfid_present[i] = 1;
+ iatt = &local->replies[i].poststat;
+ if (iatt->ia_type == IA_IFDIR) {
+ type = "dir";
+ }
+
+ if (i == healer->subvol) {
+ if (local->replies[i].poststat.ia_nlink > 1) {
+ multiple_links = _gf_true;
+ }
+ }
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ /*We don't have complete view. Skip the entry*/
+ gf_msg_debug(healer->this->name, local->replies[i].op_errno,
+ "Skipping cleanup of %s on %s", entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ /*Inode is deleted from subvol*/
+ if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) {
+ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type,
+ priv->anon_inode_name, entry->d_name, subvol->name);
+ ret = afr_shd_entry_purge(subvol, parent->inode, entry->d_name,
+ iatt->ia_type);
+ if (ret == -ENOENT || ret == -ESTALE)
+ ret = 0;
+ } else if (count > 1) {
+ loc_wipe(&loc);
+ loc.parent = inode_ref(parent->inode);
+ loc.name = entry->d_name;
+ loc.inode = inode_new(parent->inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup,
+ &loc, NULL);
+ count = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ count++;
+ entry_present[i] = 1;
+ iatt = &local->replies[i].poststat;
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ /*We don't have complete view. Skip the entry*/
+ gf_msg_debug(healer->this->name, local->replies[i].op_errno,
+ "Skipping cleanup of %s on %s", entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (gfid_present[i] && !entry_present[i]) {
+ /*Entry is not anonymous on at least one subvol*/
+ gf_msg_debug(healer->this->name, 0,
+ "Valid entry present on %s "
+ "Skipping cleanup of %s on %s",
+ priv->children[i]->name, entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging %s %s/%s on all subvols", type, priv->anon_inode_name,
+ entry->d_name);
+ ret = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ op_errno = -afr_shd_entry_purge(priv->children[i], loc.parent,
+ entry->d_name, iatt->ia_type);
+ if (op_errno != ENOENT && op_errno != ESTALE) {
+ ret |= -op_errno;
+ }
+ }
+ }
+
+out:
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ loc_wipe(&loc);
+ return ret;
+}
+
+static void
+afr_cleanup_anon_inode_dir(struct subvol_healer *healer)
+{
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ afr_private_t *priv = healer->this->private;
+ loc_t loc = {0};
+
+ ret = afr_anon_inode_create(healer->this, healer->subvol, &loc.inode);
+ if (ret)
+ goto out;
+
+ frame = afr_frame_create(healer->this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+
+ ret = syncop_mt_dir_scan(frame, priv->children[healer->subvol], &loc,
+ GF_CLIENT_PID_SELF_HEALD, healer,
+ afr_shd_anon_inode_cleaner, NULL,
+ priv->shd.max_threads, priv->shd.wait_qlength);
+out:
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ loc_wipe(&loc);
+ return;
+}
+
void *
afr_shd_index_healer(void *data)
{
@@ -900,6 +1070,10 @@ afr_shd_index_healer(void *data)
sleep(1);
} while (ret > 0);
+ if (ret == 0) {
+ afr_cleanup_anon_inode_dir(healer);
+ }
+
if (ret == 0 && pre_crawl_xdata &&
!healer->crawl_event.heal_failed_count) {
afr_shd_ta_check_and_unset_xattrs(this, &loc, healer,
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
index 687c28e6472..18db728ea7b 100644
--- a/xlators/cluster/afr/src/afr-self-heald.h
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -70,6 +70,6 @@ afr_shd_gfid_to_path(xlator_t *this, xlator_t *subvol, uuid_t gfid,
char **path_p);
int
-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name,
+afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
ia_type_t type);
#endif /* !_AFR_SELF_HEALD_H */
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 78438f91331..a51f79b1f43 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -124,9 +124,9 @@ afr_release_notify_lock_for_ta(void *opaque)
this = (xlator_t *)opaque;
priv = this->private;
- ret = afr_fill_ta_loc(this, &loc);
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate loc for thin-arbiter.");
goto out;
}
@@ -521,42 +521,6 @@ afr_compute_pre_op_sources(call_frame_t *frame, xlator_t *this)
local->transaction.pre_op_sources[j] = 0;
}
-gf_boolean_t
-afr_has_arbiter_fop_cbk_quorum(call_frame_t *frame)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- xlator_t *this = NULL;
- gf_boolean_t fop_failed = _gf_false;
- unsigned char *pre_op_sources = NULL;
- int i = 0;
-
- local = frame->local;
- this = frame->this;
- priv = this->private;
- pre_op_sources = local->transaction.pre_op_sources;
-
- /* If the fop failed on the brick, it is not a source. */
- for (i = 0; i < priv->child_count; i++)
- if (local->transaction.failed_subvols[i])
- pre_op_sources[i] = 0;
-
- switch (AFR_COUNT(pre_op_sources, priv->child_count)) {
- case 1:
- if (pre_op_sources[ARBITER_BRICK_INDEX])
- fop_failed = _gf_true;
- break;
- case 0:
- fop_failed = _gf_true;
- break;
- }
-
- if (fop_failed)
- return _gf_false;
-
- return _gf_true;
-}
-
void
afr_txn_arbitrate_fop(call_frame_t *frame, xlator_t *this)
{
@@ -971,12 +935,8 @@ afr_need_dirty_marking(call_frame_t *frame, xlator_t *this)
priv->child_count)
return _gf_false;
- if (priv->arbiter_count) {
- if (!afr_has_arbiter_fop_cbk_quorum(frame))
- need_dirty = _gf_true;
- } else if (!afr_has_fop_cbk_quorum(frame)) {
+ if (!afr_has_fop_cbk_quorum(frame))
need_dirty = _gf_true;
- }
return need_dirty;
}
@@ -1026,12 +986,8 @@ afr_handle_quorum(call_frame_t *frame, xlator_t *this)
* no split-brain with the fix. The problem is eliminated completely.
*/
- if (priv->arbiter_count) {
- if (afr_has_arbiter_fop_cbk_quorum(frame))
- return;
- } else if (afr_has_fop_cbk_quorum(frame)) {
+ if (afr_has_fop_cbk_quorum(frame))
return;
- }
if (afr_need_dirty_marking(frame, this))
goto set_response;
@@ -1073,7 +1029,7 @@ set_response:
}
int
-afr_fill_ta_loc(xlator_t *this, loc_t *loc)
+afr_fill_ta_loc(xlator_t *this, loc_t *loc, gf_boolean_t is_gfid_based_fop)
{
afr_private_t *priv = NULL;
@@ -1081,6 +1037,11 @@ afr_fill_ta_loc(xlator_t *this, loc_t *loc)
loc->parent = inode_ref(priv->root_inode);
gf_uuid_copy(loc->pargfid, loc->parent->gfid);
loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX];
+ if (is_gfid_based_fop && gf_uuid_is_null(priv->ta_gfid)) {
+ /* Except afr_ta_id_file_check() which is path based, all other gluster
+ * FOPS need gfid.*/
+ return -EINVAL;
+ }
gf_uuid_copy(loc->gfid, priv->ta_gfid);
loc->inode = inode_new(loc->parent->table);
if (!loc->inode) {
@@ -1090,86 +1051,6 @@ afr_fill_ta_loc(xlator_t *this, loc_t *loc)
return 0;
}
-int
-afr_changelog_thin_arbiter_post_op(xlator_t *this, afr_local_t *local)
-{
- int ret = 0;
- afr_private_t *priv = NULL;
- dict_t *xattr = NULL;
- int failed_count = 0;
- struct gf_flock flock = {
- 0,
- };
- loc_t loc = {
- 0,
- };
- int i = 0;
-
- priv = this->private;
- if (!priv->thin_arbiter_count)
- return 0;
-
- failed_count = AFR_COUNT(local->transaction.failed_subvols,
- priv->child_count);
- if (!failed_count)
- return 0;
-
- GF_ASSERT(failed_count == 1);
- ret = afr_fill_ta_loc(this, &loc);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
- "Failed to populate thin-arbiter loc for: %s.", loc.name);
- goto out;
- }
-
- xattr = dict_new();
- if (!xattr) {
- ret = -ENOMEM;
- goto out;
- }
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_static_bin(xattr, priv->pending_key[i],
- local->pending[i],
- AFR_NUM_CHANGE_LOGS * sizeof(int));
- if (ret)
- goto out;
- }
-
- flock.l_type = F_WRLCK;
- flock.l_start = 0;
- flock.l_len = 0;
-
- /*TODO: Convert to two domain locking. */
- ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
- AFR_TA_DOM_NOTIFY, &loc, F_SETLKW, &flock, NULL, NULL);
- if (ret)
- goto out;
-
- ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], &loc,
- GF_XATTROP_ADD_ARRAY, xattr, NULL, NULL, NULL);
-
- if (ret == -EINVAL) {
- gf_msg(this->name, GF_LOG_INFO, -ret, AFR_MSG_THIN_ARB,
- "Thin-arbiter has denied post-op on %s for gfid %s.",
- priv->pending_key[THIN_ARBITER_BRICK_INDEX],
- uuid_utoa(local->inode->gfid));
-
- } else if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
- "Post-op on thin-arbiter id file %s failed for gfid %s.",
- priv->pending_key[THIN_ARBITER_BRICK_INDEX],
- uuid_utoa(local->inode->gfid));
- }
- flock.l_type = F_UNLCK;
- syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX], AFR_TA_DOM_NOTIFY,
- &loc, F_SETLK, &flock, NULL, NULL);
-out:
- if (xattr)
- dict_unref(xattr);
-
- return ret;
-}
-
static int
afr_ta_post_op_done(int ret, call_frame_t *frame, void *opaque)
{
@@ -1264,9 +1145,9 @@ afr_ta_post_op_do(void *opaque)
this = local->transaction.frame->this;
priv = this->private;
- ret = afr_fill_ta_loc(this, &loc);
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate loc for thin-arbiter.");
goto out;
}
@@ -2466,8 +2347,13 @@ afr_is_delayed_changelog_post_op_needed(call_frame_t *frame, xlator_t *this,
goto out;
}
- if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP)) {
- /*Only allow writes but shard does [f]xattrops on writes, so
+ if (local->transaction.disable_delayed_post_op) {
+ goto out;
+ }
+
+ if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP) &&
+ (local->op != GF_FOP_FSYNC)) {
+ /*Only allow writes/fsyncs but shard does [f]xattrops on writes, so
* they are fine too*/
goto out;
}
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index a38489d9932..df7366f0a65 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -135,6 +135,27 @@ set_data_self_heal_algorithm(afr_private_t *priv, char *algo)
}
}
+void
+afr_handle_anon_inode_options(afr_private_t *priv, dict_t *options)
+{
+ char *volfile_id_str = NULL;
+ uuid_t anon_inode_gfid = {0};
+
+ /*If volume id is not present don't enable anything*/
+ if (dict_get_str(options, "volume-id", &volfile_id_str))
+ return;
+ GF_ASSERT(strlen(AFR_ANON_DIR_PREFIX) + strlen(volfile_id_str) <= NAME_MAX);
+ /*anon_inode_name is not supposed to change once assigned*/
+ if (!priv->anon_inode_name[0]) {
+ snprintf(priv->anon_inode_name, sizeof(priv->anon_inode_name), "%s-%s",
+ AFR_ANON_DIR_PREFIX, volfile_id_str);
+ gf_uuid_parse(volfile_id_str, anon_inode_gfid);
+ /*Flip a bit to make sure volfile-id and anon-gfid are not same*/
+ anon_inode_gfid[0] ^= 1;
+ uuid_utoa_r(anon_inode_gfid, priv->anon_gfid_str);
+ }
+}
+
int
reconfigure(xlator_t *this, dict_t *options)
{
@@ -168,7 +189,8 @@ reconfigure(xlator_t *this, dict_t *options)
bool, out);
GF_OPTION_RECONF("data-self-heal", data_self_heal, options, str, out);
- gf_string2boolean(data_self_heal, &priv->data_self_heal);
+ if (gf_string2boolean(data_self_heal, &priv->data_self_heal) == -1)
+ goto out;
GF_OPTION_RECONF("entry-self-heal", priv->entry_self_heal, options, bool,
out);
@@ -289,6 +311,10 @@ reconfigure(xlator_t *this, dict_t *options)
consistent_io = _gf_false;
priv->consistent_io = consistent_io;
+ afr_handle_anon_inode_options(priv, options);
+
+ GF_OPTION_RECONF("use-anonymous-inode", priv->use_anon_inode, options, bool,
+ out);
if (priv->shd.enabled) {
if ((priv->shd.enabled != enabled_old) ||
(timeout_old != priv->shd.timeout))
@@ -485,7 +511,8 @@ init(xlator_t *this)
GF_OPTION_INIT("heal-wait-queue-length", priv->heal_wait_qlen, uint32, out);
GF_OPTION_INIT("data-self-heal", data_self_heal, str, out);
- gf_string2boolean(data_self_heal, &priv->data_self_heal);
+ if (gf_string2boolean(data_self_heal, &priv->data_self_heal) == -1)
+ goto out;
GF_OPTION_INIT("data-self-heal-algorithm", data_self_heal_algorithm, str,
out);
@@ -539,7 +566,9 @@ init(xlator_t *this)
GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out);
GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out);
+ afr_handle_anon_inode_options(priv, this->options);
+ GF_OPTION_INIT("use-anonymous-inode", priv->use_anon_inode, bool, out);
if (priv->quorum_count != 0)
priv->consistent_io = _gf_false;
@@ -551,6 +580,9 @@ init(xlator_t *this)
goto out;
}
+ priv->anon_inode = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
+
priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count,
gf_afr_mt_char);
@@ -559,7 +591,8 @@ init(xlator_t *this)
priv->halo_child_up = GF_CALLOC(sizeof(unsigned char), child_count,
gf_afr_mt_char);
- if (!priv->child_up || !priv->child_latency || !priv->halo_child_up) {
+ if (!priv->child_up || !priv->child_latency || !priv->halo_child_up ||
+ !priv->anon_inode) {
ret = -ENOMEM;
goto out;
}
@@ -1284,6 +1317,14 @@ struct volume_options options[] = {
.tags = {"replicate"},
.description = "This option exists only for backward compatibility "
"and configuring it doesn't have any effect"},
+ {.key = {"use-anonymous-inode"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_8_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ .description = "Setting this option heals directory renames efficiently"},
+
{.key = {NULL}},
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 7f50a27e6c9..d62f9a9caf2 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -42,6 +42,7 @@
#define AFR_LK_HEAL_DOM "afr.lock-heal.domain"
#define AFR_HALO_MAX_LATENCY 99999
+#define AFR_ANON_DIR_PREFIX ".glusterfs-anonymous-inode"
#define PFLAG_PENDING (1 << 0)
#define PFLAG_SBRAIN (1 << 1)
@@ -190,6 +191,7 @@ typedef struct _afr_private {
struct list_head ta_waitq;
struct list_head ta_onwireq;
+ unsigned char *anon_inode;
unsigned char *child_up;
unsigned char *halo_child_up;
int64_t *child_latency;
@@ -275,10 +277,15 @@ typedef struct _afr_private {
gf_boolean_t esh_granular;
gf_boolean_t consistent_io;
gf_boolean_t data_self_heal; /* on/off */
+ gf_boolean_t use_anon_inode;
/*For lock healing.*/
struct list_head saved_locks;
struct list_head lk_healq;
+
+ /*For anon-inode handling */
+ char anon_inode_name[NAME_MAX + 1];
+ char anon_gfid_str[UUID_SIZE + 1];
} afr_private_t;
typedef enum {
@@ -901,7 +908,7 @@ typedef struct _afr_local {
gf_boolean_t uninherit_done;
gf_boolean_t uninherit_value;
- /* post-op hook */
+ gf_boolean_t disable_delayed_post_op;
} transaction;
syncbarrier_t barrier;
@@ -997,7 +1004,10 @@ afr_inode_read_subvol_set(inode_t *inode, xlator_t *this,
int event_generation);
int
-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this);
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
+
+int
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
int
afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
@@ -1268,8 +1278,8 @@ int
afr_inode_split_brain_choice_set(inode_t *inode, xlator_t *this,
int spb_choice);
int
-afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this,
- int *spb_choice);
+afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this,
+ call_frame_t *frame, int *spb_subvol);
int
afr_get_child_index_from_name(xlator_t *this, char *name);
@@ -1354,7 +1364,7 @@ int
afr_set_inode_local(xlator_t *this, afr_local_t *local, inode_t *inode);
int
-afr_fill_ta_loc(xlator_t *this, loc_t *loc);
+afr_fill_ta_loc(xlator_t *this, loc_t *loc, gf_boolean_t is_gfid_based_fop);
int
afr_ta_post_op_lock(xlator_t *this, loc_t *loc);
@@ -1402,4 +1412,12 @@ afr_is_lock_mode_mandatory(dict_t *xdata);
void
afr_dom_lock_release(call_frame_t *frame);
+
+void
+afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
+ unsigned char *replies);
+
+gf_boolean_t
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
+ pid_t pid);
#endif /* __AFR_H__ */
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 22ef8200911..8ba0cc4c732 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -17,6 +17,7 @@
#include <glusterfs/quota-common-utils.h>
#include <glusterfs/upcall-utils.h>
#include "glusterfs/compat-errno.h" // for ENODATA on BSD
+#include <glusterfs/common-utils.h>
#include <sys/time.h>
#include <libgen.h>
@@ -43,15 +44,6 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
static int
dht_rmdir_unlock(call_frame_t *frame, xlator_t *this);
-char *xattrs_to_heal[] = {"user.",
- POSIX_ACL_ACCESS_XATTR,
- POSIX_ACL_DEFAULT_XATTR,
- QUOTA_LIMIT_KEY,
- QUOTA_LIMIT_OBJECTS_KEY,
- GF_SELINUX_XATTR_KEY,
- GF_XATTR_MDATA_KEY,
- NULL};
-
static const char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL};
/* Check the xdata to make sure EBADF has been set by client xlator */
@@ -84,6 +76,8 @@ dht_set_fixed_dir_stat(struct iatt *stat)
static gf_boolean_t
dht_match_xattr(const char *key)
{
+ char **xattrs_to_heal = get_xattrs_to_heal();
+
return gf_get_index_by_elem(xattrs_to_heal, (char *)key) >= 0;
}
@@ -388,7 +382,7 @@ out:
/* Code to save hashed subvol on inode ctx as a mds subvol
*/
-static int
+int
dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol)
{
dht_inode_ctx_t *ctx = NULL;
@@ -619,13 +613,14 @@ dht_discover_complete(xlator_t *this, call_frame_t *discover_frame)
if (local->need_xattr_heal && !heal_path) {
local->need_xattr_heal = 0;
- ret = dht_dir_xattr_heal(this, local);
- if (ret)
- gf_msg(this->name, GF_LOG_ERROR, ret,
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
DHT_MSG_DIR_XATTR_HEAL_FAILED,
"xattr heal failed for "
"directory gfid is %s ",
gfid_local);
+ }
}
}
@@ -695,6 +690,7 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int ret = -1;
dht_conf_t *conf = 0;
dht_layout_t *layout = NULL;
+ int32_t mds_heal_fresh_lookup = 0;
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
@@ -702,6 +698,7 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
conf = this->private;
layout = local->selfheal.layout;
+ mds_heal_fresh_lookup = local->mds_heal_fresh_lookup;
if (op_ret) {
gf_msg_debug(this->name, op_ret,
@@ -722,7 +719,7 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
layout);
}
out:
- if (local && local->mds_heal_fresh_lookup)
+ if (mds_heal_fresh_lookup)
DHT_STACK_DESTROY(frame);
return 0;
}
@@ -1256,7 +1253,7 @@ err:
to non hashed subvol
*/
int
-dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
+dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno)
{
dht_local_t *copy_local = NULL;
call_frame_t *copy = NULL;
@@ -1268,6 +1265,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
"No gfid exists for path %s "
"so healing xattr is not possible",
local->loc.path);
+ *op_errno = EIO;
goto out;
}
@@ -1281,6 +1279,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
"Memory allocation failed "
"for path %s gfid %s ",
local->loc.path, gfid_local);
+ *op_errno = ENOMEM;
DHT_STACK_DESTROY(copy);
} else {
copy_local->stbuf = local->stbuf;
@@ -1295,6 +1294,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
"Synctask creation failed to heal xattr "
"for path %s gfid %s ",
local->loc.path, gfid_local);
+ *op_errno = ENOMEM;
DHT_STACK_DESTROY(copy);
}
}
@@ -1651,7 +1651,7 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
uint32_t vol_commit_hash = 0;
xlator_t *subvol = NULL;
int32_t check_mds = 0;
- int errst = 0;
+ int errst = 0, i = 0;
int32_t mds_xattr_val[1] = {0};
GF_VALIDATE_OR_GOTO("dht", frame, err);
@@ -1718,6 +1718,14 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->need_lookup_everywhere = 1;
} else if (IA_ISDIR(local->loc.inode->ia_type)) {
+ layout = local->layout;
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == prev) {
+ layout->list[i].err = op_errno;
+ break;
+ }
+ }
+
local->need_selfheal = 1;
}
}
@@ -2153,31 +2161,18 @@ static int
dht_fill_dict_to_avoid_unlink_of_migrating_file(dict_t *dict)
{
int ret = 0;
- xlator_t *this = NULL;
- char *linktoskip_key = NULL;
- this = THIS;
- GF_VALIDATE_OR_GOTO("dht", this, err);
-
- if (dht_is_tier_xlator(this))
- linktoskip_key = TIER_SKIP_NON_LINKTO_UNLINK;
- else
- linktoskip_key = DHT_SKIP_NON_LINKTO_UNLINK;
-
- ret = dict_set_int32(dict, linktoskip_key, 1);
+ ret = dict_set_int32_sizen(dict, DHT_SKIP_NON_LINKTO_UNLINK, 1);
if (ret)
- goto err;
+ return -1;
- ret = dict_set_int32(dict, DHT_SKIP_OPEN_FD_UNLINK, 1);
+ ret = dict_set_int32_sizen(dict, DHT_SKIP_OPEN_FD_UNLINK, 1);
if (ret)
- goto err;
+ return -1;
return 0;
-
-err:
- return -1;
}
static int32_t
@@ -4306,6 +4301,8 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
index = conf->local_subvols_cnt;
uuid_list_copy = gf_strdup(uuid_list);
+ if (!uuid_list_copy)
+ goto unlock;
for (uuid_str = strtok_r(uuid_list, " ", &saveptr); uuid_str;
uuid_str = next_uuid_str) {
@@ -4596,18 +4593,8 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
dict_del(xattr, conf->xattr_name);
dict_del(xattr, conf->mds_xattr_key);
- /* filter out following two xattrs that need not
- * be visible on the mount point for geo-rep -
- * trusted.tier.fix.layout.complete and
- * trusted.tier.tier-dht.commithash
- */
-
dict_del(xattr, conf->commithash_xattr_name);
- if (frame->root->pid >= 0 && dht_is_tier_xlator(this)) {
- dict_del(xattr, GF_XATTR_TIER_LAYOUT_FIXED_KEY);
- }
-
if (frame->root->pid >= 0) {
GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr);
GF_REMOVE_INTERNAL_XATTR("trusted.pgfid*", xattr);
@@ -5446,11 +5433,13 @@ dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
int call_cnt = 0;
dht_local_t *local = NULL;
char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ char **xattrs_to_heal;
conf = this->private;
local = frame->local;
call_cnt = conf->subvolume_cnt;
local->flags = flags;
+ xattrs_to_heal = get_xattrs_to_heal();
if (!gf_uuid_is_null(local->gfid)) {
gf_uuid_unparse(local->gfid, gfid_local);
@@ -5883,22 +5872,7 @@ dht_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr,
if (local->rebalance.target_node) {
local->flags = forced_rebalance;
- /* Flag to suggest its a tiering migration
- * The reason for this dic key-value is that
- * promotions and demotions are multithreaded
- * so the original frame from gf_defrag_start()
- * is not carried. A new frame will be created when
- * we do syncop_setxattr(). This does not have the
- * frame->root->pid of the original frame. So we pass
- * this dic key-value when we do syncop_setxattr() to do
- * data migration and set the frame->root->pid to
- * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before
- * calling dht_start_rebalance_task() */
- tmp = dict_get(xattr, TIERING_MIGRATION_KEY);
- if (tmp)
- frame->root->pid = GF_CLIENT_PID_TIER_DEFRAG;
- else
- frame->root->pid = GF_CLIENT_PID_DEFRAG;
+ frame->root->pid = GF_CLIENT_PID_DEFRAG;
ret = dht_start_rebalance_task(this, frame);
if (!ret)
@@ -6710,10 +6684,9 @@ dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
layout = local->layout;
- /* We have seen crashes in while running "rm -rf" on tier volumes
- when the layout was NULL on the hot tier. This will skip the
- entries on the subvol without a layout, hence preventing the crash
- but rmdir might fail with "directory not empty" errors*/
+ /* This will skip the entries on the subvol without a layout,
+ * hence preventing the crash but rmdir might fail with
+ * "directory not empty" errors*/
if (layout == NULL)
goto done;
@@ -10840,23 +10813,17 @@ dht_notify(xlator_t *this, int event, void *data, ...)
int had_heard_from_all = 0;
int have_heard_from_all = 0;
- struct timeval time = {
- 0,
- };
gf_defrag_info_t *defrag = NULL;
dict_t *dict = NULL;
gf_defrag_type cmd = 0;
dict_t *output = NULL;
va_list ap;
- dht_methods_t *methods = NULL;
struct gf_upcall *up_data = NULL;
struct gf_upcall_cache_invalidation *up_ci = NULL;
conf = this->private;
GF_VALIDATE_OR_GOTO(this->name, conf, out);
- methods = &(conf->methods);
-
/* had all subvolumes reported status once till now? */
had_heard_from_all = 1;
for (i = 0; i < conf->subvolume_cnt; i++) {
@@ -10886,12 +10853,11 @@ dht_notify(xlator_t *this, int event, void *data, ...)
break;
}
- gettimeofday(&time, NULL);
LOCK(&conf->subvolume_lock);
{
conf->subvolume_status[cnt] = 1;
conf->last_event[cnt] = event;
- conf->subvol_up_time[cnt] = time.tv_sec;
+ conf->subvol_up_time[cnt] = gf_time();
}
UNLOCK(&conf->subvolume_lock);
@@ -10999,21 +10965,13 @@ dht_notify(xlator_t *this, int event, void *data, ...)
if (defrag->is_exiting)
goto unlock;
if ((cmd == GF_DEFRAG_CMD_STATUS) ||
- (cmd == GF_DEFRAG_CMD_STATUS_TIER) ||
(cmd == GF_DEFRAG_CMD_DETACH_STATUS))
gf_defrag_status_get(conf, output);
- else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER)
- gf_defrag_start_detach_tier(defrag);
else if (cmd == GF_DEFRAG_CMD_DETACH_START)
defrag->cmd = GF_DEFRAG_CMD_DETACH_START;
else if (cmd == GF_DEFRAG_CMD_STOP ||
- cmd == GF_DEFRAG_CMD_STOP_DETACH_TIER ||
cmd == GF_DEFRAG_CMD_DETACH_STOP)
gf_defrag_stop(conf, GF_DEFRAG_STATUS_STOPPED, output);
- else if (cmd == GF_DEFRAG_CMD_PAUSE_TIER)
- ret = gf_defrag_pause_tier(this, defrag);
- else if (cmd == GF_DEFRAG_CMD_RESUME_TIER)
- ret = gf_defrag_resume_tier(this, defrag);
}
unlock:
UNLOCK(&defrag->lock);
@@ -11088,15 +11046,13 @@ dht_notify(xlator_t *this, int event, void *data, ...)
* thread has already started.
*/
if (conf->defrag && !run_defrag) {
- if (methods->migration_needed(this)) {
- run_defrag = 1;
- ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start,
- this, "dhtdg");
- if (ret) {
- GF_FREE(conf->defrag);
- conf->defrag = NULL;
- kill(getpid(), SIGTERM);
- }
+ run_defrag = 1;
+ ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start,
+ this, "dhtdg");
+ if (ret) {
+ GF_FREE(conf->defrag);
+ conf->defrag = NULL;
+ kill(getpid(), SIGTERM);
}
}
}
@@ -11241,28 +11197,6 @@ out:
return ret;
}
-int32_t
-dht_migration_needed(xlator_t *this)
-{
- gf_defrag_info_t *defrag = NULL;
- dht_conf_t *conf = NULL;
- int ret = 0;
-
- conf = this->private;
-
- GF_VALIDATE_OR_GOTO("dht", conf, out);
- GF_VALIDATE_OR_GOTO("dht", conf->defrag, out);
-
- defrag = conf->defrag;
-
- if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
- (defrag->cmd != GF_DEFRAG_CMD_START_DETACH_TIER))
- ret = 1;
-
-out:
- return ret;
-}
-
/*
This function should not be called more then once during a FOP
handling path. It is valid only for for ops on files
@@ -11297,14 +11231,6 @@ dht_set_local_rebalance(xlator_t *this, dht_local_t *local, struct iatt *stbuf,
return 0;
}
-gf_boolean_t
-dht_is_tier_xlator(xlator_t *this)
-{
- if (strcmp(this->type, "cluster/tier") == 0)
- return _gf_true;
- return _gf_false;
-}
-
int32_t
dht_release(xlator_t *this, fd_t *fd)
{
@@ -11444,3 +11370,22 @@ dht_pt_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
FIRST_CHILD(this)->fops->fgetxattr, fd, key, xdata);
return 0;
}
+
+/* The job of this function is to check if all the xlators have updated
+ * error in the layout. */
+int
+dht_dir_layout_error_check(xlator_t *this, inode_t *inode)
+{
+ dht_layout_t *layout = NULL;
+ int i = 0;
+
+ layout = dht_layout_get(this, inode);
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == 0) {
+ return 0;
+ }
+ }
+
+ /* Returning the first xlator error as all xlators have errors */
+ return layout->list[0].err;
+}
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 871b7aed9b3..fe0dc3db34a 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -24,7 +24,6 @@
#define _DHT_H
#define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout"
-#define GF_XATTR_TIER_LAYOUT_FIXED_KEY "trusted.tier.fix.layout.complete"
#define GF_XATTR_FILE_MIGRATE_KEY "trusted.distribute.migrate-data"
#define DHT_MDS_STR "mds"
#define GF_DHT_LOOKUP_UNHASHED_OFF 0
@@ -36,7 +35,6 @@
#define DHT_LAYOUT_HEAL_DOMAIN "dht.layout.heal"
/* Namespace synchronization */
#define DHT_ENTRY_SYNC_DOMAIN "dht.entry.sync"
-#define TIERING_MIGRATION_KEY "tiering.migration"
#define DHT_LAYOUT_HASH_INVALID 1
#define MAX_REBAL_THREADS sysconf(_SC_NPROCESSORS_ONLN)
@@ -52,10 +50,6 @@
#define DHT_DBG_HASHED_SUBVOL_PATTERN "dht.file.hashed-subvol.*"
#define DHT_DBG_HASHED_SUBVOL_KEY "dht.file.hashed-subvol."
-/* Array to hold custom xattr keys
- */
-extern char *xattrs_to_heal[];
-
/* Rebalance nodeuuid flags */
#define REBAL_NODEUUID_MINE 0x01
@@ -246,19 +240,6 @@ typedef gf_boolean_t (*dht_need_heal_t)(call_frame_t *frame,
dht_layout_t **inmem,
dht_layout_t **ondisk);
-typedef struct {
- uint64_t blocks_used;
- uint64_t pblocks_used;
- uint64_t files_used;
- uint64_t pfiles_used;
- uint64_t unhashed_blocks_used;
- uint64_t unhashed_pblocks_used;
- uint64_t unhashed_files_used;
- uint64_t unhashed_pfiles_used;
- uint64_t unhashed_fsid;
- uint64_t hashed_fsid;
-} tier_statvfs_t;
-
struct dht_local {
loc_t loc;
loc_t loc2;
@@ -276,7 +257,6 @@ struct dht_local {
struct iatt preparent;
struct iatt postparent;
struct statvfs statvfs;
- tier_statvfs_t tier_statvfs;
fd_t *fd;
inode_t *inode;
dict_t *params;
@@ -409,14 +389,7 @@ enum gf_defrag_type {
GF_DEFRAG_CMD_STATUS = 1 + 2,
GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
GF_DEFRAG_CMD_START_FORCE = 1 + 4,
- GF_DEFRAG_CMD_START_TIER = 1 + 5,
- GF_DEFRAG_CMD_STATUS_TIER = 1 + 6,
- GF_DEFRAG_CMD_START_DETACH_TIER = 1 + 7,
- GF_DEFRAG_CMD_STOP_DETACH_TIER = 1 + 8,
- GF_DEFRAG_CMD_PAUSE_TIER = 1 + 9,
- GF_DEFRAG_CMD_RESUME_TIER = 1 + 10,
GF_DEFRAG_CMD_DETACH_STATUS = 1 + 11,
- GF_DEFRAG_CMD_STOP_TIER = 1 + 12,
GF_DEFRAG_CMD_DETACH_START = 1 + 13,
GF_DEFRAG_CMD_DETACH_COMMIT = 1 + 14,
GF_DEFRAG_CMD_DETACH_COMMIT_FORCE = 1 + 15,
@@ -467,75 +440,6 @@ struct dht_container {
int local_subvol_index;
};
-typedef enum tier_mode_ {
- TIER_MODE_NONE = 0,
- TIER_MODE_TEST,
- TIER_MODE_WM
-} tier_mode_t;
-
-typedef enum tier_pause_state_ {
- TIER_RUNNING = 0,
- TIER_REQUEST_PAUSE,
- TIER_PAUSED
-} tier_pause_state_t;
-
-/* This Structure is only used in tiering fixlayout */
-typedef struct gf_tier_fix_layout_arg {
- xlator_t *this;
- dict_t *fix_layout;
- pthread_t thread_id;
-} gf_tier_fix_layout_arg_t;
-
-typedef struct gf_tier_conf {
- int is_tier;
- int watermark_hi;
- int watermark_low;
- int watermark_last;
- unsigned long block_size;
- fsblkcnt_t blocks_total;
- fsblkcnt_t blocks_used;
- uint64_t max_migrate_bytes;
- int max_migrate_files;
- int query_limit;
- tier_mode_t mode;
- int percent_full;
- /* These flags are only used for tier-compact */
- gf_boolean_t compact_active;
- /* These 3 flags are set to true when the client changes the */
- /* compaction mode on the command line. */
- /* When they are set, the daemon will trigger compaction as */
- /* soon as possible to activate or deactivate compaction. */
- /* If in the middle of a compaction, then the switches take */
- /* effect on the next compaction, not the current one. */
- /* If the user switches it off, we want to avoid needless */
- /* compactions. */
- /* If the user switches it on, they want to compact as soon */
- /* as possible. */
- gf_boolean_t compact_mode_switched;
- gf_boolean_t compact_mode_switched_hot;
- gf_boolean_t compact_mode_switched_cold;
- int tier_max_promote_size;
- int tier_promote_frequency;
- int tier_demote_frequency;
- int tier_compact_hot_frequency;
- int tier_compact_cold_frequency;
- uint64_t st_last_promoted_size;
- uint64_t st_last_demoted_size;
- struct synctask *pause_synctask;
- gf_timer_t *pause_timer;
- pthread_mutex_t pause_mutex;
- int promote_in_progress;
- int demote_in_progress;
- /* This Structure is only used in tiering fixlayout */
- gf_tier_fix_layout_arg_t tier_fix_layout_arg;
- /* Indicates the index of the first queryfile picked
- * in the last cycle of promote or demote */
- int32_t last_promote_qfile_index;
- int32_t last_demote_qfile_index;
- tier_pause_state_t pause_state;
- char volname[GD_VOLUME_NAME_MAX + 1];
-} gf_tier_conf_t;
-
typedef struct nodeuuid_info {
char info; /* Set to 1 is this is my node's uuid*/
uuid_t uuid; /* Store the nodeuuid as well for debugging*/
@@ -563,17 +467,10 @@ struct gf_defrag_info_ {
int cmd;
inode_t *root_inode;
uuid_t node_uuid;
- struct timeval start_time;
+ time_t start_time;
uint32_t new_commit_hash;
gf_defrag_status_t defrag_status;
gf_defrag_pattern_list_t *defrag_pattern;
- gf_tier_conf_t tier_conf;
-
- /*Data Tiering params for scanner*/
- uint64_t total_files_promoted;
- uint64_t total_files_demoted;
- int write_freq_threshold;
- int read_freq_threshold;
pthread_cond_t parallel_migration_cond;
pthread_mutex_t dfq_mutex;
@@ -609,7 +506,6 @@ typedef struct gf_defrag_info_ gf_defrag_info_t;
struct dht_methods_s {
int32_t (*migration_get_dst_subvol)(xlator_t *this, dht_local_t *local);
int32_t (*migration_other)(xlator_t *this, gf_defrag_info_t *defrag);
- int32_t (*migration_needed)(xlator_t *this);
xlator_t *(*layout_search)(xlator_t *this, dht_layout_t *layout,
const char *name);
};
@@ -630,7 +526,7 @@ struct dht_conf {
int subvolume_cnt;
int32_t refresh_interval;
gf_lock_t subvolume_lock;
- struct timeval last_stat_fetch;
+ time_t last_stat_fetch;
gf_lock_t layout_lock;
dict_t *leaf_to_subvol;
void *private; /* Can be used by wrapper xlators over
@@ -752,6 +648,8 @@ struct dir_dfmeta {
struct list_head **head;
struct list_head **iterator;
int *fetch_entries;
+ /* fds corresponding to local subvols only */
+ fd_t **lfd;
};
typedef struct dht_migrate_info {
@@ -1238,24 +1136,6 @@ dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int
gf_defrag_status_get(dht_conf_t *conf, dict_t *dict);
-void
-gf_defrag_set_pause_state(gf_tier_conf_t *tier_conf, tier_pause_state_t state);
-
-tier_pause_state_t
-gf_defrag_get_pause_state(gf_tier_conf_t *tier_conf);
-
-int
-gf_defrag_pause_tier(xlator_t *this, gf_defrag_info_t *defrag);
-
-tier_pause_state_t
-gf_defrag_check_pause_tier(gf_tier_conf_t *defrag);
-
-int
-gf_defrag_resume_tier(xlator_t *this, gf_defrag_info_t *defrag);
-
-int
-gf_defrag_start_detach_tier(gf_defrag_info_t *defrag);
-
int
gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output);
@@ -1336,9 +1216,6 @@ dht_layout_missing_dirs(dht_layout_t *layout);
int
dht_refresh_layout(call_frame_t *frame);
-gf_boolean_t
-dht_is_tier_xlator(xlator_t *this);
-
int
dht_build_parent_loc(xlator_t *this, loc_t *parent, loc_t *child,
int32_t *op_errno);
@@ -1451,7 +1328,7 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
dict_t *src, int *uret, int *uflag);
int
-dht_dir_xattr_heal(xlator_t *this, dht_local_t *local);
+dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno);
int
dht_common_mark_mdsxattr(call_frame_t *frame, int *errst, int flag);
@@ -1499,4 +1376,9 @@ dht_create_lock(call_frame_t *frame, xlator_t *subvol);
int
dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local);
+int
+dht_dir_layout_error_check(xlator_t *this, inode_t *inode);
+
+int
+dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol);
#endif /* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
index 27097ca2475..c0588828fdb 100644
--- a/xlators/cluster/dht/src/dht-diskusage.c
+++ b/xlators/cluster/dht/src/dht-diskusage.c
@@ -151,22 +151,18 @@ dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
dht_conf_t *conf = NULL;
call_frame_t *statfs_frame = NULL;
dht_local_t *statfs_local = NULL;
- struct timeval tv = {
- 0,
- };
loc_t tmp_loc = {
0,
};
+ time_t now;
conf = this->private;
-
- gettimeofday(&tv, NULL);
-
+ now = gf_time();
/* make it root gfid, should be enough to get the proper
info back */
tmp_loc.gfid[15] = 1;
- if (tv.tv_sec > (conf->refresh_interval + conf->last_stat_fetch.tv_sec)) {
+ if (now > (conf->refresh_interval + conf->last_stat_fetch)) {
statfs_frame = copy_frame(frame);
if (!statfs_frame) {
goto err;
@@ -198,7 +194,7 @@ dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
statfs_local->params);
}
- conf->last_stat_fetch.tv_sec = tv.tv_sec;
+ conf->last_stat_fetch = now;
}
return 0;
err:
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 73a89399efd..3f2fe43d5f3 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -2083,6 +2083,7 @@ dht_heal_full_path_done(int op_ret, call_frame_t *heal_frame, void *data)
dht_local_t *local = NULL;
xlator_t *this = NULL;
int ret = -1;
+ int op_errno = 0;
local = heal_frame->local;
main_frame = local->main_frame;
@@ -2092,11 +2093,12 @@ dht_heal_full_path_done(int op_ret, call_frame_t *heal_frame, void *data)
dht_set_fixed_dir_stat(&local->postparent);
if (local->need_xattr_heal) {
local->need_xattr_heal = 0;
- ret = dht_dir_xattr_heal(this, local);
- if (ret)
- gf_smsg(this->name, GF_LOG_ERROR, ret,
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s", local->loc.path,
NULL);
+ }
}
DHT_STACK_UNWIND(lookup, main_frame, 0, 0, local->inode, &local->stbuf,
@@ -2265,6 +2267,7 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
int luret = -1;
int luflag = -1;
int i = 0;
+ char **xattrs_to_heal;
if (!src || !dst) {
gf_smsg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_DST_NULL_SET_FAILED,
@@ -2279,6 +2282,9 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
and set it to dst dict, here index start from 1 because
user xattr already checked in previous statement
*/
+
+ xattrs_to_heal = get_xattrs_to_heal();
+
for (i = 1; xattrs_to_heal[i]; i++) {
keyval = dict_get(src, xattrs_to_heal[i]);
if (keyval) {
diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c
index eda2491e0ff..2f23ce90fbd 100644
--- a/xlators/cluster/dht/src/dht-inode-write.c
+++ b/xlators/cluster/dht/src/dht-inode-write.c
@@ -93,30 +93,28 @@ dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
/* Check if the rebalance phase1 is true */
if (IS_DHT_MIGRATION_PHASE1(postbuf)) {
- if (!dht_is_tier_xlator(this)) {
+ if (!local->xattr_req) {
+ local->xattr_req = dict_new();
if (!local->xattr_req) {
- local->xattr_req = dict_new();
- if (!local->xattr_req) {
- gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM,
- "insufficient memory");
- local->op_errno = ENOMEM;
- local->op_ret = -1;
- goto out;
- }
- }
-
- ret = dict_set_uint32(local->xattr_req,
- GF_PROTECT_FROM_EXTERNAL_WRITES, 1);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0,
- "Failed to set key %s in dictionary",
- GF_PROTECT_FROM_EXTERNAL_WRITES);
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM,
+ "insufficient memory");
local->op_errno = ENOMEM;
local->op_ret = -1;
goto out;
}
}
+ ret = dict_set_uint32(local->xattr_req, GF_PROTECT_FROM_EXTERNAL_WRITES,
+ 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0,
+ "Failed to set key %s in dictionary",
+ GF_PROTECT_FROM_EXTERNAL_WRITES);
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
+ goto out;
+ }
+
dht_iatt_merge(this, &local->stbuf, postbuf);
dht_iatt_merge(this, &local->prebuf, prebuf);
diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h
index 33f9832395b..e3c4471334a 100644
--- a/xlators/cluster/dht/src/dht-mem-types.h
+++ b/xlators/cluster/dht/src/dht-mem-types.h
@@ -30,10 +30,7 @@ enum gf_dht_mem_types_ {
gf_dht_mt_container_t,
gf_dht_mt_octx_t,
gf_dht_mt_miginfo_t,
- gf_tier_mt_bricklist_t,
- gf_tier_mt_ipc_ctr_params_t,
gf_dht_mt_fd_ctx_t,
- gf_tier_mt_qfile_array_t,
gf_dht_ret_cache_t,
gf_dht_nodeuuids_t,
gf_dht_mt_end
diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h
index 026879e14af..601f8dad78b 100644
--- a/xlators/cluster/dht/src/dht-messages.h
+++ b/xlators/cluster/dht/src/dht-messages.h
@@ -38,12 +38,11 @@ GLFS_MSGID(
DHT_MSG_REBALANCE_STATUS, DHT_MSG_REBALANCE_STOPPED, DHT_MSG_RENAME_FAILED,
DHT_MSG_SETATTR_FAILED, DHT_MSG_SUBVOL_INSUFF_INODES,
DHT_MSG_SUBVOL_INSUFF_SPACE, DHT_MSG_UNLINK_FAILED,
- DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT, DHT_MSG_LOG_TIER_ERROR,
- DHT_MSG_LOG_TIER_STATUS, DHT_MSG_GET_XATTR_FAILED,
- DHT_MSG_FILE_LOOKUP_FAILED, DHT_MSG_OPEN_FD_FAILED,
- DHT_MSG_SET_INODE_CTX_FAILED, DHT_MSG_UNLOCKING_FAILED,
- DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO, DHT_MSG_CHUNK_SIZE_INFO,
- DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR,
+ DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT,
+ DHT_MSG_GET_XATTR_FAILED, DHT_MSG_FILE_LOOKUP_FAILED,
+ DHT_MSG_OPEN_FD_FAILED, DHT_MSG_SET_INODE_CTX_FAILED,
+ DHT_MSG_UNLOCKING_FAILED, DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO,
+ DHT_MSG_CHUNK_SIZE_INFO, DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR,
DHT_MSG_LAYOUT_SORT_FAILED, DHT_MSG_REGEX_INFO, DHT_MSG_FOPEN_FAILED,
DHT_MSG_SET_HOSTNAME_FAILED, DHT_MSG_BRICK_ERROR, DHT_MSG_SYNCOP_FAILED,
DHT_MSG_MIGRATE_INFO, DHT_MSG_SOCKET_ERROR, DHT_MSG_CREATE_FD_FAILED,
@@ -69,8 +68,7 @@ GLFS_MSGID(
DHT_MSG_INIT_LOCAL_SUBVOL_FAILED, DHT_MSG_SYS_CALL_GET_TIME_FAILED,
DHT_MSG_NO_DISK_USAGE_STATUS, DHT_MSG_SUBVOL_DOWN_ERROR,
DHT_MSG_REBAL_THROTTLE_INFO, DHT_MSG_COMMIT_HASH_INFO,
- DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_LOG_IPC_TIER_ERROR,
- DHT_MSG_TIER_PAUSED, DHT_MSG_TIER_RESUME, DHT_MSG_SETTLE_HASH_FAILED,
+ DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_SETTLE_HASH_FAILED,
DHT_MSG_DEFRAG_PROCESS_DIR_FAILED, DHT_MSG_FD_CTX_SET_FAILED,
DHT_MSG_STALE_LOOKUP, DHT_MSG_PARENT_LAYOUT_CHANGED,
DHT_MSG_LOCK_MIGRATION_FAILED, DHT_MSG_LOCK_INODE_UNREF_FAILED,
@@ -96,15 +94,13 @@ GLFS_MSGID(
DHT_MSG_UNLOCK_FILE_FAILED, DHT_MSG_REMOVE_XATTR_FAILED,
DHT_MSG_DATA_MIGRATE_ABORT, DHT_MSG_DEFRAG_NULL, DHT_MSG_PARENT_NULL,
DHT_MSG_GFID_NOT_PRESENT, DHT_MSG_CHILD_LOC_FAILED,
- DHT_MSG_SET_LOOKUP_FAILED, DHT_MSG_DIR_REMOVED,
- DHT_MSG_TIER_FIX_LAYOUT_STARTED, DHT_MSG_FIX_NOT_COMP,
- DHT_MSG_REMOVE_TIER_FAILED, DHT_MSG_SUBVOL_DETER_FAILED,
- DHT_MSG_LOCAL_SUBVOL, DHT_MSG_NODE_UUID, DHT_MSG_SIZE_FILE,
- DHT_MSG_GET_DATA_SIZE_FAILED, DHT_MSG_PTHREAD_JOIN_FAILED,
- DHT_MSG_COUNTER_THREAD_CREATE_FAILED, DHT_MSG_MIGRATION_INIT_QUEUE_FAILED,
- DHT_MSG_PAUSED_TIMEOUT, DHT_MSG_WOKE, DHT_MSG_ABORT_REBALANCE,
- DHT_MSG_CREATE_TASK_REBAL_FAILED, DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL,
- DHT_MSG_MIG_TIER_PAUSED, DHT_MSG_ADD_CHOICES_ERROR,
+ DHT_MSG_SET_LOOKUP_FAILED, DHT_MSG_DIR_REMOVED, DHT_MSG_FIX_NOT_COMP,
+ DHT_MSG_SUBVOL_DETER_FAILED, DHT_MSG_LOCAL_SUBVOL, DHT_MSG_NODE_UUID,
+ DHT_MSG_SIZE_FILE, DHT_MSG_GET_DATA_SIZE_FAILED,
+ DHT_MSG_PTHREAD_JOIN_FAILED, DHT_MSG_COUNTER_THREAD_CREATE_FAILED,
+ DHT_MSG_MIGRATION_INIT_QUEUE_FAILED, DHT_MSG_PAUSED_TIMEOUT, DHT_MSG_WOKE,
+ DHT_MSG_ABORT_REBALANCE, DHT_MSG_CREATE_TASK_REBAL_FAILED,
+ DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL, DHT_MSG_ADD_CHOICES_ERROR,
DHT_MSG_GET_CHOICES_ERROR, DHT_MSG_PREPARE_STATUS_ERROR,
DHT_MSG_SET_CHOICE_FAILED, DHT_MSG_SET_HASHED_SUBVOL_FAILED,
DHT_MSG_XATTR_HEAL_NOT_POSS, DHT_MSG_LINKTO_FILE_FAILED,
@@ -180,7 +176,6 @@ GLFS_MSGID(
"adding bricks"
#define DHT_MSG_NEW_TARGET_FOUND_STR "New target found for file"
#define DHT_MSG_INSUFF_MEMORY_STR "insufficient memory"
-#define DHT_MSG_MIG_TIER_PAUSED_STR "Migrate file paused"
#define DHT_MSG_SET_XATTR_FAILED_STR "failed to set xattr"
#define DHT_MSG_SET_MODE_FAILED_STR "failed to set mode"
#define DHT_MSG_FILE_EXISTS_IN_DEST_STR "file exists in destination"
@@ -222,17 +217,14 @@ GLFS_MSGID(
#define DHT_MSG_GFID_NOT_PRESENT_STR "gfid not present"
#define DHT_MSG_CHILD_LOC_FAILED_STR "Child loc build failed"
#define DHT_MSG_SET_LOOKUP_FAILED_STR "Failed to set lookup"
-#define DHT_MSG_LOG_TIER_STATUS_STR "lookup to cold tier on attach heal failed"
#define DHT_MSG_DIR_LOOKUP_FAILED_STR "lookup failed"
#define DHT_MSG_DIR_REMOVED_STR "Dir renamed or removed. Skipping"
#define DHT_MSG_READDIR_ERROR_STR "readdir failed, Aborting fix-layout"
#define DHT_MSG_SETTLE_HASH_FAILED_STR "Settle hash failed"
#define DHT_MSG_DEFRAG_PROCESS_DIR_FAILED_STR "gf_defrag_process_dir failed"
-#define DHT_MSG_TIER_FIX_LAYOUT_STARTED_STR "Tiering fix layout started"
#define DHT_MSG_FIX_NOT_COMP_STR \
"Unable to retrieve fixlayout xattr. Assume background fix layout not " \
"complete"
-#define DHT_MSG_REMOVE_TIER_FAILED_STR "Failed removing tier fix layout xattr"
#define DHT_MSG_SUBVOL_DETER_FAILED_STR \
"local subvolume determination failed with error"
#define DHT_MSG_LOCAL_SUBVOL_STR "local subvol"
@@ -248,8 +240,6 @@ GLFS_MSGID(
#define DHT_MSG_MIGRATION_INIT_QUEUE_FAILED_STR \
"Failed to initialise migration queue"
#define DHT_MSG_REBALANCE_STOPPED_STR "Received stop command on rebalance"
-#define DHT_MSG_TIER_RESUME_STR "Pause end. Resume tiering"
-#define DHT_MSG_TIER_PAUSED_STR "Pause tiering"
#define DHT_MSG_PAUSED_TIMEOUT_STR "Request pause timer timeout"
#define DHT_MSG_WOKE_STR "woken"
#define DHT_MSG_ABORT_REBALANCE_STR "Aborting rebalance"
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index abe2afae5dc..8ba8082bd86 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -16,8 +16,8 @@
#include "glusterfs/compat-errno.h" // for ENODATA on BSD
#define GF_DISK_SECTOR_SIZE 512
-#define DHT_REBALANCE_PID 4242 /* Change it if required */
-#define DHT_REBALANCE_BLKSIZE (1024 * 1024) /* 1 MB */
+#define DHT_REBALANCE_PID 4242 /* Change it if required */
+#define DHT_REBALANCE_BLKSIZE 1048576 /* 1 MB */
#define MAX_MIGRATE_QUEUE_COUNT 500
#define MIN_MIGRATE_QUEUE_COUNT 200
#define MAX_REBAL_TYPE_SIZE 16
@@ -45,7 +45,10 @@ gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt)
if (meta) {
for (i = 0; i < local_subvols_cnt; i++) {
- gf_dirent_free(&meta->equeue[i]);
+ if (meta->equeue)
+ gf_dirent_free(&meta->equeue[i]);
+ if (meta->lfd && meta->lfd[i])
+ fd_unref(meta->lfd[i]);
}
GF_FREE(meta->equeue);
@@ -53,6 +56,7 @@ gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt)
GF_FREE(meta->iterator);
GF_FREE(meta->offset_var);
GF_FREE(meta->fetch_entries);
+ GF_FREE(meta->lfd);
GF_FREE(meta);
}
}
@@ -84,26 +88,6 @@ dht_set_global_defrag_error(gf_defrag_info_t *defrag, int ret)
return;
}
-static gf_boolean_t
-dht_is_tier_command(int cmd)
-{
- gf_boolean_t is_tier = _gf_false;
-
- switch (cmd) {
- case GF_DEFRAG_CMD_START_TIER:
- case GF_DEFRAG_CMD_STATUS_TIER:
- case GF_DEFRAG_CMD_START_DETACH_TIER:
- case GF_DEFRAG_CMD_STOP_DETACH_TIER:
- case GF_DEFRAG_CMD_PAUSE_TIER:
- case GF_DEFRAG_CMD_RESUME_TIER:
- is_tier = _gf_true;
- break;
- default:
- break;
- }
- return is_tier;
-}
-
static int
dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status)
{
@@ -112,8 +96,6 @@ dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status)
char *tmpstr = NULL;
char *ptr = NULL;
char *suffix = "-dht";
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
int len = 0;
eventtypes_t event = EVENT_LAST;
@@ -132,21 +114,14 @@ dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status)
break;
}
- if (dht_is_tier_command(cmd)) {
- /* We should have the tier volume name*/
- conf = this->private;
- defrag = conf->defrag;
- volname = defrag->tier_conf.volname;
- } else {
- /* DHT volume */
- len = strlen(this->name) - strlen(suffix);
- tmpstr = gf_strdup(this->name);
- if (tmpstr) {
- ptr = tmpstr + len;
- if (!strcmp(ptr, suffix)) {
- tmpstr[len] = '\0';
- volname = tmpstr;
- }
+ /* DHT volume */
+ len = strlen(this->name) - strlen(suffix);
+ tmpstr = gf_strdup(this->name);
+ if (tmpstr) {
+ ptr = tmpstr + len;
+ if (!strcmp(ptr, suffix)) {
+ tmpstr[len] = '\0';
+ volname = tmpstr;
}
}
@@ -172,75 +147,6 @@ dht_strip_out_acls(dict_t *dict)
}
}
-static int
-dht_write_with_holes(xlator_t *to, fd_t *fd, struct iovec *vec, int count,
- int32_t size, off_t offset, struct iobref *iobref,
- int *fop_errno)
-{
- int i = 0;
- int ret = -1;
- int start_idx = 0;
- int tmp_offset = 0;
- int write_needed = 0;
- int buf_len = 0;
- int size_pending = 0;
- char *buf = NULL;
-
- /* loop through each vector */
- for (i = 0; i < count; i++) {
- buf = vec[i].iov_base;
- buf_len = vec[i].iov_len;
-
- for (start_idx = 0; (start_idx + GF_DISK_SECTOR_SIZE) <= buf_len;
- start_idx += GF_DISK_SECTOR_SIZE) {
- if (mem_0filled(buf + start_idx, GF_DISK_SECTOR_SIZE) != 0) {
- write_needed = 1;
- continue;
- }
-
- if (write_needed) {
- ret = syncop_write(
- to, fd, (buf + tmp_offset), (start_idx - tmp_offset),
- (offset + tmp_offset), iobref, 0, NULL, NULL);
- /* 'path' will be logged in calling function */
- if (ret < 0) {
- gf_log(THIS->name, GF_LOG_WARNING, "failed to write (%s)",
- strerror(-ret));
- *fop_errno = -ret;
- ret = -1;
- goto out;
- }
-
- write_needed = 0;
- }
- tmp_offset = start_idx + GF_DISK_SECTOR_SIZE;
- }
-
- if ((start_idx < buf_len) || write_needed) {
- /* This means, last chunk is not yet written.. write it */
- ret = syncop_write(to, fd, (buf + tmp_offset),
- (buf_len - tmp_offset), (offset + tmp_offset),
- iobref, 0, NULL, NULL);
- if (ret < 0) {
- /* 'path' will be logged in calling function */
- gf_log(THIS->name, GF_LOG_WARNING, "failed to write (%s)",
- strerror(-ret));
- *fop_errno = -ret;
- ret = -1;
- goto out;
- }
- }
-
- size_pending = (size - buf_len);
- if (!size_pending)
- break;
- }
-
- ret = size;
-out:
- return ret;
-}
-
/*
return values:
-1 : failure
@@ -648,7 +554,7 @@ out:
static int
__dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
loc_t *loc, struct iatt *stbuf, fd_t **dst_fd,
- int *fop_errno)
+ int *fop_errno, int file_has_holes)
{
int ret = -1;
int ret2 = -1;
@@ -703,26 +609,23 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
goto out;
}
- if (!!dht_is_tier_xlator(this)) {
- xdata = dict_new();
- if (!xdata) {
- *fop_errno = ENOMEM;
- ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM,
- DHT_MSG_MIGRATE_FILE_FAILED, "%s: dict_new failed)",
- loc->path);
- goto out;
- }
+ xdata = dict_new();
+ if (!xdata) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: dict_new failed)", loc->path);
+ goto out;
+ }
- ret = dict_set_int32(xdata, GF_CLEAN_WRITE_PROTECTION, 1);
- if (ret) {
- *fop_errno = ENOMEM;
- ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: failed to set dictionary value: key = %s ", loc->path,
- GF_CLEAN_WRITE_PROTECTION);
- goto out;
- }
+ ret = dict_set_int32_sizen(xdata, GF_CLEAN_WRITE_PROTECTION, 1);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: failed to set dictionary value: key = %s ", loc->path,
+ GF_CLEAN_WRITE_PROTECTION);
+ goto out;
}
ret = syncop_lookup(to, loc, &new_stbuf, NULL, xdata, NULL);
@@ -817,7 +720,7 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
/* No need to bother about 0 byte size files */
if (stbuf->ia_size > 0) {
- if (conf->use_fallocate) {
+ if (conf->use_fallocate && !file_has_holes) {
ret = syncop_fallocate(to, fd, 0, 0, stbuf->ia_size, NULL, NULL);
if (ret < 0) {
if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -ENOSYS) {
@@ -844,9 +747,7 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
goto out;
}
}
- }
-
- if (!conf->use_fallocate) {
+ } else {
ret = syncop_ftruncate(to, fd, stbuf->ia_size, NULL, NULL, NULL,
NULL);
if (ret < 0) {
@@ -1097,22 +998,90 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
int ret = 0;
int count = 0;
off_t offset = 0;
+ off_t data_offset = 0;
+ off_t hole_offset = 0;
struct iovec *vector = NULL;
struct iobref *iobref = NULL;
uint64_t total = 0;
size_t read_size = 0;
+ size_t data_block_size = 0;
dict_t *xdata = NULL;
dht_conf_t *conf = NULL;
conf = this->private;
+
/* if file size is '0', no need to enter this loop */
while (total < ia_size) {
- read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE)
- ? DHT_REBALANCE_BLKSIZE
- : (ia_size - total));
+ /* This is a regular file - read it sequentially */
+ if (!hole_exists) {
+ read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE)
+ ? DHT_REBALANCE_BLKSIZE
+ : (ia_size - total));
+ } else {
+ /* This is a sparse file - read only the data segments in the file
+ */
+
+ /* If the previous data block is fully copied, find the next data
+ * segment
+ * starting at the offset of the last read and written byte, */
+ if (data_block_size <= 0) {
+ ret = syncop_seek(from, src, offset, GF_SEEK_DATA, NULL,
+ &data_offset);
+ if (ret) {
+ if (ret == -ENXIO)
+ ret = 0; /* No more data segments */
+ else
+ *fop_errno = -ret; /* Error occurred */
+
+ break;
+ }
+
+ /* If the position of the current data segment is greater than
+ * the position of the next hole, find the next hole in order to
+ * calculate the length of the new data segment */
+ if (data_offset > hole_offset) {
+ /* Starting at the offset of the last data segment, find the
+ * next hole */
+ ret = syncop_seek(from, src, data_offset, GF_SEEK_HOLE,
+ NULL, &hole_offset);
+ if (ret) {
+ /* If an error occurred here it's a real error because
+ * if the seek for a data segment was successful then
+ * necessarily another hole must exist (EOF is a hole)
+ */
+ *fop_errno = -ret;
+ break;
+ }
+
+ /* Calculate the total size of the current data block */
+ data_block_size = hole_offset - data_offset;
+ }
+ } else {
+ /* There is still data in the current segment, move the
+ * data_offset to the position of the last written byte */
+ data_offset = offset;
+ }
+
+ /* Calculate how much data needs to be read and written. If the data
+ * segment's length is bigger than DHT_REBALANCE_BLKSIZE, read and
+ * write DHT_REBALANCE_BLKSIZE data length and the rest in the
+ * next iteration(s) */
+ read_size = ((data_block_size > DHT_REBALANCE_BLKSIZE)
+ ? DHT_REBALANCE_BLKSIZE
+ : data_block_size);
+
+ /* Calculate the remaining size of the data block - maybe there's no
+ * need to seek for data in the next iteration */
+ data_block_size -= read_size;
+
+ /* Set offset to the offset of the data segment so read and write
+ * will have the correct position */
+ offset = data_offset;
+ }
ret = syncop_readv(from, src, read_size, offset, 0, &vector, &count,
&iobref, NULL, NULL, NULL);
+
if (!ret || (ret < 0)) {
if (!ret) {
/* File was probably truncated*/
@@ -1124,57 +1093,42 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
break;
}
- if (hole_exists) {
- ret = dht_write_with_holes(to, dst, vector, count, ret, offset,
- iobref, fop_errno);
- } else {
- if (!conf->force_migration && !dht_is_tier_xlator(this)) {
+ if (!conf->force_migration) {
+ if (!xdata) {
+ xdata = dict_new();
if (!xdata) {
- xdata = dict_new();
- if (!xdata) {
- gf_msg("dht", GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "insufficient memory");
- ret = -1;
- *fop_errno = ENOMEM;
- break;
- }
+ gf_msg("dht", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "insufficient memory");
+ ret = -1;
+ *fop_errno = ENOMEM;
+ break;
+ }
- /* Fail this write and abort rebalance if we
- * detect a write from client since migration of
- * this file started. This is done to avoid
- * potential data corruption due to out of order
- * writes from rebalance and client to the same
- * region (as compared between src and dst
- * files). See
- * https://github.com/gluster/glusterfs/issues/308
- * for more details.
- */
- ret = dict_set_int32(xdata, GF_AVOID_OVERWRITE, 1);
- if (ret) {
- gf_msg("dht", GF_LOG_ERROR, 0, ENOMEM,
- "failed to set dict");
- ret = -1;
- *fop_errno = ENOMEM;
- break;
- }
+ /* Fail this write and abort rebalance if we
+ * detect a write from client since migration of
+ * this file started. This is done to avoid
+ * potential data corruption due to out of order
+ * writes from rebalance and client to the same
+ * region (as compared between src and dst
+ * files). See
+ * https://github.com/gluster/glusterfs/issues/308
+ * for more details.
+ */
+ ret = dict_set_int32_sizen(xdata, GF_AVOID_OVERWRITE, 1);
+ if (ret) {
+ gf_msg("dht", GF_LOG_ERROR, 0, ENOMEM,
+ "failed to set dict");
+ ret = -1;
+ *fop_errno = ENOMEM;
+ break;
}
}
- ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL,
- NULL, xdata, NULL);
- if (ret < 0) {
- *fop_errno = -ret;
- }
- }
-
- if ((defrag && defrag->cmd == GF_DEFRAG_CMD_START_TIER) &&
- (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING)) {
- gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED,
- "Migrate file paused");
- ret = -1;
}
+ ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL,
+ NULL, xdata, NULL);
if (ret < 0) {
+ *fop_errno = -ret;
break;
}
@@ -1568,6 +1522,7 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
xlator_t *old_target = NULL;
xlator_t *hashed_subvol = NULL;
fd_t *linkto_fd = NULL;
+ dict_t *xdata = NULL;
if (from == to) {
gf_msg_debug(this->name, 0,
@@ -1578,21 +1533,6 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
- /* If defrag is NULL, it should be assumed that migration is triggered
- * from client using the trusted.distribute.migrate-data virtual xattr
- */
- defrag = conf->defrag;
-
- /* migration of files from clients is restricted to non-tiered clients
- * for now */
- if (!defrag && dht_is_tier_xlator(this)) {
- ret = ENOTSUP;
- goto out;
- }
-
- if (defrag && defrag->tier_conf.is_tier)
- log_level = GF_LOG_TRACE;
-
gf_log(this->name, log_level, "%s: attempting to move from %s to %s",
loc->path, from->name, to->name);
@@ -1739,9 +1679,13 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
+ /* Try to preserve 'holes' while migrating data */
+ if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
+ file_has_holes = 1;
+
/* create the destination, with required modes/xattr */
ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf, &dst_fd,
- fop_errno);
+ fop_errno, file_has_holes);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
"Create dst failed"
@@ -1785,8 +1729,8 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
* destination. We need to do update this only post migration
* as in case of failure the linkto needs to point to the source
* subvol */
- ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf,
- &dst_fd, fop_errno);
+ ret = __dht_rebalance_create_dst_file(
+ this, to, from, loc, &stbuf, &dst_fd, fop_errno, file_has_holes);
if (ret) {
gf_log(this->name, GF_LOG_ERROR,
"Create dst failed"
@@ -1873,9 +1817,6 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
ret = 0;
goto out;
}
- /* Try to preserve 'holes' while migrating data */
- if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
- file_has_holes = 1;
ret = __dht_rebalance_migrate_data(this, defrag, from, to, src_fd, dst_fd,
stbuf.ia_size, file_has_holes,
@@ -1890,7 +1831,15 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
/* TODO: Sync the locks */
- ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, NULL, NULL);
+ xdata = dict_new();
+ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "%s: failed to set last-fsync flag on "
+ "%s (%s)",
+ loc->path, to->name, strerror(ENOMEM));
+ }
+
+ ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, xdata, NULL);
if (ret) {
gf_log(this->name, GF_LOG_WARNING, "%s: failed to fsync on %s (%s)",
loc->path, to->name, strerror(-ret));
@@ -2333,14 +2282,12 @@ out:
}
}
- if (!dht_is_tier_xlator(this)) {
- lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES,
- NULL, NULL);
- if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) {
- gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0,
- "%s: removexattr failed key %s", loc->path,
- GF_PROTECT_FROM_EXTERNAL_WRITES);
- }
+ lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES, NULL,
+ NULL);
+ if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) {
+ gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0,
+ "%s: removexattr failed key %s", loc->path,
+ GF_PROTECT_FROM_EXTERNAL_WRITES);
}
if (dict)
@@ -2353,11 +2300,15 @@ out:
if (dst_fd)
syncop_close(dst_fd);
+
if (src_fd)
syncop_close(src_fd);
if (linkto_fd)
syncop_close(linkto_fd);
+ if (xdata)
+ dict_unref(xdata);
+
loc_wipe(&tmp_loc);
loc_wipe(&parent_loc);
@@ -2587,10 +2538,10 @@ out:
* all hardlinks.
*/
-int
+gf_boolean_t
gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
{
- int ret = 0;
+ gf_boolean_t ret = _gf_false;
int i = local_subvol_index;
char *str = NULL;
uint32_t hashval = 0;
@@ -2612,12 +2563,11 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
}
str = uuid_utoa_r(gfid, buf);
- ret = dht_hash_compute(this, 0, str, &hashval);
- if (ret == 0) {
+ if (dht_hash_compute(this, 0, str, &hashval) == 0) {
index = (hashval % entry->count);
if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
/* Index matches this node's nodeuuid.*/
- ret = 1;
+ ret = _gf_true;
goto out;
}
@@ -2630,12 +2580,12 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
/* None of the bricks in the subvol are up.
* CHILD_DOWN will kill the process soon */
- return 0;
+ return _gf_false;
}
if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
/* Index matches this node's nodeuuid.*/
- ret = 1;
+ ret = _gf_true;
goto out;
}
}
@@ -2684,6 +2634,7 @@ gf_defrag_migrate_single_file(void *opaque)
struct iatt *iatt_ptr = NULL;
gf_boolean_t update_skippedcount = _gf_true;
int i = 0;
+ gf_boolean_t should_i_migrate = 0;
rebal_entry = (struct dht_container *)opaque;
if (!rebal_entry) {
@@ -2738,11 +2689,29 @@ gf_defrag_migrate_single_file(void *opaque)
goto out;
}
+ should_i_migrate = gf_defrag_should_i_migrate(
+ this, rebal_entry->local_subvol_index, entry->d_stat.ia_gfid);
+
gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid);
gf_uuid_copy(entry_loc.pargfid, loc->gfid);
ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL);
+
+ if (!should_i_migrate) {
+ /* this node isn't supposed to migrate the file. suppressing any
+ * potential error from lookup as this file is under migration by
+ * another node */
+ if (ret) {
+ gf_msg_debug(this->name, -ret,
+ "Ignoring lookup failure: node isn't migrating %s",
+ entry_loc.path);
+ ret = 0;
+ }
+ gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
+ goto out;
+ }
+
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
"Migrate file failed: %s lookup failed", entry_loc.path);
@@ -2763,12 +2732,6 @@ gf_defrag_migrate_single_file(void *opaque)
goto out;
}
- if (!gf_defrag_should_i_migrate(this, rebal_entry->local_subvol_index,
- entry->d_stat.ia_gfid)) {
- gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
- goto out;
- }
-
iatt_ptr = &iatt;
hashed_subvol = dht_subvol_get_hashed(this, &entry_loc);
@@ -2911,8 +2874,7 @@ gf_defrag_migrate_single_file(void *opaque)
if (defrag->stats == _gf_true) {
gettimeofday(&end, NULL);
- elapsed = (end.tv_sec - start.tv_sec) * 1e6 +
- (end.tv_usec - start.tv_usec);
+ elapsed = gf_tvdiff(&start, &end);
gf_log(this->name, GF_LOG_INFO,
"Migration of "
"file:%s size:%" PRIu64
@@ -3091,9 +3053,9 @@ int static gf_defrag_get_entry(xlator_t *this, int i,
dht_conf_t *conf, gf_defrag_info_t *defrag,
fd_t *fd, dict_t *migrate_data,
struct dir_dfmeta *dir_dfmeta, dict_t *xattr_req,
- int *should_commit_hash, int *perrno)
+ int *perrno)
{
- int ret = -1;
+ int ret = 0;
char is_linkfile = 0;
gf_dirent_t *df_entry = NULL;
struct dht_container *tmp_container = NULL;
@@ -3109,6 +3071,13 @@ int static gf_defrag_get_entry(xlator_t *this, int i,
}
if (dir_dfmeta->fetch_entries[i] == 1) {
+ if (!fd) {
+ dir_dfmeta->fetch_entries[i] = 0;
+ dir_dfmeta->offset_var[i].readdir_done = 1;
+ ret = 0;
+ goto out;
+ }
+
ret = syncop_readdirp(conf->local_subvols[i], fd, 131072,
dir_dfmeta->offset_var[i].offset,
&(dir_dfmeta->equeue[i]), xattr_req, NULL);
@@ -3268,7 +3237,6 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
dict_t *migrate_data, int *perrno)
{
int ret = -1;
- fd_t *fd = NULL;
dht_conf_t *conf = NULL;
gf_dirent_t entries;
dict_t *xattr_req = NULL;
@@ -3289,7 +3257,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
int dfc_index = 0;
int throttle_up = 0;
struct dir_dfmeta *dir_dfmeta = NULL;
- int should_commit_hash = 1;
+ xlator_t *old_THIS = NULL;
gf_log(this->name, GF_LOG_INFO, "migrate data called on %s", loc->path);
gettimeofday(&dir_start, NULL);
@@ -3302,28 +3270,53 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
goto out;
}
- fd = fd_create(loc->inode, defrag->pid);
- if (!fd) {
- gf_log(this->name, GF_LOG_ERROR, "Failed to create fd");
+ old_THIS = THIS;
+ THIS = this;
+
+ dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer);
+ if (!dir_dfmeta) {
+ gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL");
ret = -1;
goto out;
}
- ret = syncop_opendir(this, loc, fd, NULL, NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_DATA_FAILED,
- "Migrate data failed: Failed to open dir %s", loc->path);
- *perrno = -ret;
+ dir_dfmeta->lfd = GF_CALLOC(local_subvols_cnt, sizeof(fd_t *),
+ gf_common_mt_pointer);
+ if (!dir_dfmeta->lfd) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_INSUFF_MEMORY,
+ "for dir_dfmeta", NULL);
ret = -1;
+ *perrno = ENOMEM;
goto out;
}
- fd_bind(fd);
- dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer);
- if (!dir_dfmeta) {
- gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL");
- ret = -1;
- goto out;
+ for (i = 0; i < local_subvols_cnt; i++) {
+ dir_dfmeta->lfd[i] = fd_create(loc->inode, defrag->pid);
+ if (!dir_dfmeta->lfd[i]) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_FD_CREATE_FAILED,
+ NULL);
+ *perrno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir(conf->local_subvols[i], loc, dir_dfmeta->lfd[i],
+ NULL, NULL);
+ if (ret) {
+ fd_unref(dir_dfmeta->lfd[i]);
+ dir_dfmeta->lfd[i] = NULL;
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FAILED_TO_OPEN,
+ "dir: %s", loc->path, "subvol: %s",
+ conf->local_subvols[i]->name, NULL);
+
+ if (conf->decommission_in_progress) {
+ *perrno = -ret;
+ ret = -1;
+ goto out;
+ }
+ } else {
+ fd_bind(dir_dfmeta->lfd[i]);
+ }
}
dir_dfmeta->head = GF_CALLOC(local_subvols_cnt, sizeof(*(dir_dfmeta->head)),
@@ -3358,6 +3351,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
ret = -1;
goto out;
}
+
ret = gf_defrag_ctx_subvols_init(dir_dfmeta->offset_var, this);
if (ret) {
gf_log(this->name, GF_LOG_ERROR,
@@ -3370,7 +3364,8 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
dir_dfmeta->fetch_entries = GF_CALLOC(local_subvols_cnt, sizeof(int),
gf_common_mt_int);
if (!dir_dfmeta->fetch_entries) {
- gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->fetch_entries is NULL");
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_INSUFF_MEMORY,
+ "for dir_dfmeta->fetch_entries", NULL);
ret = -1;
goto out;
}
@@ -3440,8 +3435,9 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
ldfq_count <= MAX_MIGRATE_QUEUE_COUNT &&
!dht_dfreaddirp_done(dir_dfmeta->offset_var, local_subvols_cnt)) {
ret = gf_defrag_get_entry(this, dfc_index, &container, loc, conf,
- defrag, fd, migrate_data, dir_dfmeta,
- xattr_req, &should_commit_hash, perrno);
+ defrag, dir_dfmeta->lfd[dfc_index],
+ migrate_data, dir_dfmeta, xattr_req,
+ perrno);
if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) {
goto out;
@@ -3485,27 +3481,19 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
}
gettimeofday(&end, NULL);
- elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 +
- (end.tv_usec - dir_start.tv_usec);
+ elapsed = gf_tvdiff(&dir_start, &end);
gf_log(this->name, GF_LOG_INFO,
"Migration operation on dir %s took "
"%.2f secs",
loc->path, elapsed / 1e6);
ret = 0;
out:
-
+ THIS = old_THIS;
gf_defrag_free_dir_dfmeta(dir_dfmeta, local_subvols_cnt);
if (xattr_req)
dict_unref(xattr_req);
- if (fd)
- fd_unref(fd);
-
- if (ret == 0 && should_commit_hash == 0) {
- ret = 2;
- }
-
/* It does not matter if it errored out - this number is
* used to calculate rebalance estimated time to complete.
* No locking required as dirs are processed by a single thread.
@@ -3513,6 +3501,7 @@ out:
defrag->num_dirs_processed++;
return ret;
}
+
int
gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
dict_t *fix_layout)
@@ -3527,7 +3516,6 @@ gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
* rebalance is complete.
*/
if (defrag->cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX ||
- defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER ||
defrag->cmd == GF_DEFRAG_CMD_DETACH_START) {
return 0;
}
@@ -3573,114 +3561,6 @@ gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
return 0;
}
-/* Function for doing a named lookup on file inodes during an attach tier
- * So that a hardlink lookup heal i.e gfid to parent gfid lookup heal
- * happens on pre-existing data. This is required so that the ctr database has
- * hardlinks of all the exisitng file in the volume. CTR xlator on the
- * brick/server side does db update/insert of the hardlink on a namelookup.
- * Currently the namedlookup is done synchronous to the fixlayout that is
- * triggered by attach tier. This is not performant, adding more time to
- * fixlayout. The performant approach is record the hardlinks on a compressed
- * datastore and then do the namelookup asynchronously later, giving the ctr db
- * eventual consistency
- * */
-int
-gf_fix_layout_tier_attach_lookup(xlator_t *this, loc_t *parent_loc,
- gf_dirent_t *file_dentry)
-{
- int ret = -1;
- dict_t *lookup_xdata = NULL;
- dht_conf_t *conf = NULL;
- loc_t file_loc = {
- 0,
- };
- struct iatt iatt = {
- 0,
- };
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
-
- GF_VALIDATE_OR_GOTO(this->name, parent_loc, out);
-
- GF_VALIDATE_OR_GOTO(this->name, file_dentry, out);
-
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- if (!parent_loc->inode) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "%s/%s parent is NULL", parent_loc->path, file_dentry->d_name);
- goto out;
- }
-
- conf = this->private;
-
- loc_wipe(&file_loc);
-
- if (gf_uuid_is_null(file_dentry->d_stat.ia_gfid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "%s/%s gfid not present", parent_loc->path, file_dentry->d_name);
- goto out;
- }
-
- gf_uuid_copy(file_loc.gfid, file_dentry->d_stat.ia_gfid);
-
- if (gf_uuid_is_null(parent_loc->gfid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "%s/%s"
- " gfid not present",
- parent_loc->path, file_dentry->d_name);
- goto out;
- }
-
- gf_uuid_copy(file_loc.pargfid, parent_loc->gfid);
-
- ret = dht_build_child_loc(this, &file_loc, parent_loc, file_dentry->d_name);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Child loc build failed");
- ret = -1;
- goto out;
- }
-
- lookup_xdata = dict_new();
- if (!lookup_xdata) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed creating lookup dict for %s", file_dentry->d_name);
- goto out;
- }
-
- ret = dict_set_int32(lookup_xdata, CTR_ATTACH_TIER_LOOKUP, 1);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to set lookup flag");
- goto out;
- }
-
- gf_uuid_copy(file_loc.parent->gfid, parent_loc->gfid);
-
- /* Sending lookup to cold tier only */
- ret = syncop_lookup(conf->subvolumes[0], &file_loc, &iatt, NULL,
- lookup_xdata, NULL);
- if (ret) {
- /* If the file does not exist on the cold tier than it must */
- /* have been discovered on the hot tier. This is not an error. */
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "%s lookup to cold tier on attach heal failed", file_loc.path);
- goto out;
- }
-
- ret = 0;
-
-out:
-
- loc_wipe(&file_loc);
-
- if (lookup_xdata)
- dict_unref(lookup_xdata);
-
- return ret;
-}
-
int
gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
dict_t *fix_layout, dict_t *migrate_data)
@@ -3700,7 +3580,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
};
inode_t *linked_inode = NULL, *inode = NULL;
dht_conf_t *conf = NULL;
- int should_commit_hash = 1;
int perrno = 0;
conf = this->private;
@@ -3803,16 +3682,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
continue;
if (!IA_ISDIR(entry->d_stat.ia_type)) {
- /* If its a fix layout during the attach
- * tier operation do lookups on files
- * on cold subvolume so that there is a
- * CTR DB Lookup Heal triggered on existing
- * data.
- * */
- if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
- gf_fix_layout_tier_attach_lookup(this, loc, entry);
- }
-
continue;
}
loc_wipe(&entry_loc);
@@ -3829,8 +3698,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
goto out;
} else {
- should_commit_hash = 0;
-
continue;
}
}
@@ -3893,7 +3760,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
ret = -1;
goto out;
} else {
- should_commit_hash = 0;
continue;
}
}
@@ -3906,11 +3772,12 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
ret = gf_defrag_fix_layout(this, defrag, &entry_loc, fix_layout,
migrate_data);
- if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) {
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED ||
+ defrag->defrag_status == GF_DEFRAG_STATUS_FAILED) {
goto out;
}
- if (ret && ret != 2) {
+ if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_FIX_FAILED,
"Fix layout failed for %s", entry_loc.path);
@@ -3941,6 +3808,17 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
*/
ret = syncop_setxattr(this, loc, fix_layout, 0, NULL, NULL);
+
+ /* In case of a race where the directory is deleted just before
+ * layout setxattr, the errors are updated in the layout structure.
+ * We can use this information to make a decision whether the directory
+ * is deleted entirely.
+ */
+ if (ret == 0) {
+ ret = dht_dir_layout_error_check(this, loc->inode);
+ ret = -ret;
+ }
+
if (ret) {
if (-ret == ENOENT || -ret == ESTALE) {
gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_LAYOUT_FIX_FAILED,
@@ -3966,11 +3844,10 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
}
}
- if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
- (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) {
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
ret = gf_defrag_process_dir(this, defrag, loc, migrate_data, &perrno);
- if (ret && (ret != 2)) {
+ if (ret) {
if (perrno == ENOENT || perrno == ESTALE) {
ret = 0;
goto out;
@@ -3986,18 +3863,13 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
if (conf->decommission_in_progress) {
goto out;
}
-
- should_commit_hash = 0;
}
- } else if (ret == 2) {
- should_commit_hash = 0;
}
}
gf_msg_trace(this->name, 0, "fix layout called on %s", loc->path);
- if (should_commit_hash &&
- gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) {
+ if (gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) {
defrag->total_failures++;
gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SETTLE_HASH_FAILED,
@@ -4021,245 +3893,34 @@ out:
if (fd)
fd_unref(fd);
- if (ret == 0 && should_commit_hash == 0) {
- ret = 2;
- }
-
return ret;
}
-/******************************************************************************
- * Tier background Fix layout functions
- ******************************************************************************/
-/* This is the background tier fixlayout thread */
-void *
-gf_tier_do_fix_layout(void *args)
-{
- gf_tier_fix_layout_arg_t *tier_fix_layout_arg = args;
- int ret = -1;
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- dict_t *dict = NULL;
- loc_t loc = {
- 0,
- };
- struct iatt iatt = {
- 0,
- };
- struct iatt parent = {
- 0,
- };
-
- GF_VALIDATE_OR_GOTO("tier", tier_fix_layout_arg, out);
- GF_VALIDATE_OR_GOTO("tier", tier_fix_layout_arg->this, out);
- this = tier_fix_layout_arg->this;
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO(this->name, conf, out);
-
- defrag = conf->defrag;
- GF_VALIDATE_OR_GOTO(this->name, defrag, out);
- GF_VALIDATE_OR_GOTO(this->name, defrag->root_inode, out);
-
- GF_VALIDATE_OR_GOTO(this->name, tier_fix_layout_arg->fix_layout, out);
-
- /* Get Root loc_t */
- dht_build_root_loc(defrag->root_inode, &loc);
- ret = syncop_lookup(this, &loc, &iatt, &parent, NULL, NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_REBALANCE_START_FAILED,
- "Lookup on root failed.");
- ret = -1;
- goto out;
- }
-
- /* Start the crawl */
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Tiering Fixlayout started");
-
- ret = gf_defrag_fix_layout(this, defrag, &loc,
- tier_fix_layout_arg->fix_layout, NULL);
- if (ret && ret != 2) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_FAILED,
- "Tiering fixlayout failed.");
- ret = -1;
- goto out;
- }
-
- if (ret != 2 &&
- gf_defrag_settle_hash(this, defrag, &loc,
- tier_fix_layout_arg->fix_layout) != 0) {
- defrag->total_failures++;
- ret = -1;
- goto out;
- }
-
- dict = dict_new();
- if (!dict) {
- ret = -1;
- goto out;
- }
-
- ret = dict_set_str(dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY, "yes");
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_FAILED,
- "Failed to set dictionary value: key = %s",
- GF_XATTR_TIER_LAYOUT_FIXED_KEY);
- ret = -1;
- goto out;
- }
-
- /* Marking the completion of tiering fix layout via a xattr on root */
- ret = syncop_setxattr(this, &loc, dict, 0, NULL, NULL);
- if (ret) {
- gf_log(this->name, GF_LOG_ERROR,
- "Failed to set tiering fix "
- "layout completed xattr on %s",
- loc.path);
- ret = -1;
- goto out;
- }
-
- ret = 0;
-out:
- if (ret && defrag)
- defrag->total_failures++;
-
- if (dict)
- dict_unref(dict);
-
- return NULL;
-}
-
-int
-gf_tier_start_fix_layout(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag,
- dict_t *fix_layout)
-{
- int ret = -1;
- dict_t *tier_dict = NULL;
- gf_tier_fix_layout_arg_t *tier_fix_layout_arg = NULL;
-
- tier_dict = dict_new();
- if (!tier_dict) {
- gf_log("tier", GF_LOG_ERROR,
- "Tier fix layout failed :"
- "Creation of tier_dict failed");
- ret = -1;
- goto out;
- }
-
- /* Check if layout is fixed already */
- ret = syncop_getxattr(this, loc, &tier_dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY,
- NULL, NULL);
- if (ret != 0) {
- tier_fix_layout_arg = &defrag->tier_conf.tier_fix_layout_arg;
-
- /*Fill crawl arguments */
- tier_fix_layout_arg->this = this;
- tier_fix_layout_arg->fix_layout = fix_layout;
-
- /* Spawn the fix layout thread so that its done in the
- * background */
- ret = gf_thread_create(&tier_fix_layout_arg->thread_id, NULL,
- gf_tier_do_fix_layout, tier_fix_layout_arg,
- "tierfixl");
- if (ret) {
- gf_log("tier", GF_LOG_ERROR,
- "Thread creation failed. "
- "Background fix layout for tiering will not "
- "work.");
- defrag->total_failures++;
- goto out;
- }
- }
- ret = 0;
-out:
- if (tier_dict)
- dict_unref(tier_dict);
-
- return ret;
-}
-
-void
-gf_tier_clear_fix_layout(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag)
-{
- int ret = -1;
- dict_t *dict = NULL;
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
- GF_VALIDATE_OR_GOTO(this->name, loc, out);
- GF_VALIDATE_OR_GOTO(this->name, defrag, out);
-
- /* Check if background fixlayout is completed. This is not
- * multi-process safe i.e there is a possibility that by the time
- * we move to remove the xattr there it might have been cleared by some
- * other detach process from other node. We ignore the error if such
- * a thing happens */
- ret = syncop_getxattr(this, loc, &dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY,
- NULL, NULL);
- if (ret) {
- /* Background fixlayout not complete - nothing to clear*/
- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_LOG_TIER_STATUS,
- "Unable to retrieve fixlayout xattr."
- "Assume background fix layout not complete");
- goto out;
- }
-
- ret = syncop_removexattr(this, loc, GF_XATTR_TIER_LAYOUT_FIXED_KEY, NULL,
- NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_LOG_TIER_STATUS,
- "Failed removing tier fix layout "
- "xattr from %s",
- loc->path);
- goto out;
- }
- ret = 0;
-out:
- if (dict)
- dict_unref(dict);
-}
-
-void
-gf_tier_wait_fix_lookup(gf_defrag_info_t *defrag)
-{
- if (defrag->tier_conf.tier_fix_layout_arg.thread_id) {
- pthread_join(defrag->tier_conf.tier_fix_layout_arg.thread_id, NULL);
- }
-}
-/******************Tier background Fix layout functions END********************/
-
int
dht_init_local_subvols_and_nodeuuids(xlator_t *this, dht_conf_t *conf,
loc_t *loc)
{
dict_t *dict = NULL;
- gf_defrag_info_t *defrag = NULL;
uuid_t *uuid_ptr = NULL;
int ret = -1;
int i = 0;
int j = 0;
- defrag = conf->defrag;
-
- if (defrag->cmd != GF_DEFRAG_CMD_START_TIER) {
- /* Find local subvolumes */
- ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL,
- NULL, NULL);
- if (ret && (ret != -ENODATA)) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, 0,
- "local "
- "subvolume determination failed with error: %d",
- -ret);
- ret = -1;
- goto out;
- }
-
- if (!ret)
- goto out;
+ /* Find local subvolumes */
+ ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL, NULL,
+ NULL);
+ if (ret && (ret != -ENODATA)) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, 0,
+ "local "
+ "subvolume determination failed with error: %d",
+ -ret);
+ ret = -1;
+ goto out;
}
+ if (!ret)
+ goto out;
+
ret = syncop_getxattr(this, loc, &dict, GF_REBAL_OLD_FIND_LOCAL_SUBVOL,
NULL, NULL);
if (ret) {
@@ -4350,9 +4011,6 @@ dht_file_counter_thread(void *args)
struct timespec time_to_wait = {
0,
};
- struct timeval now = {
- 0,
- };
uint64_t tmp_size = 0;
if (!args)
@@ -4362,9 +4020,8 @@ dht_file_counter_thread(void *args)
dht_build_root_loc(defrag->root_inode, &root_loc);
while (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
- gettimeofday(&now, NULL);
- time_to_wait.tv_sec = now.tv_sec + 600;
- time_to_wait.tv_nsec = 0;
+ timespec_now(&time_to_wait);
+ time_to_wait.tv_sec += 600;
pthread_mutex_lock(&defrag->fc_mutex);
pthread_cond_timedwait(&defrag->fc_wakeup_cond, &defrag->fc_mutex,
@@ -4437,7 +4094,7 @@ gf_defrag_estimates_init(xlator_t *this, loc_t *loc, pthread_t *filecnt_thread)
goto out;
}
- ret = gf_thread_create(filecnt_thread, NULL, &dht_file_counter_thread,
+ ret = gf_thread_create(filecnt_thread, NULL, dht_file_counter_thread,
(void *)defrag, "dhtfcnt");
if (ret) {
@@ -4494,7 +4151,7 @@ gf_defrag_parallel_migration_init(xlator_t *this, gf_defrag_info_t *defrag,
/*Spawn Threads Here*/
while (index < thread_spawn_count) {
- ret = gf_thread_create(&(tid[index]), NULL, &gf_defrag_task,
+ ret = gf_thread_create(&(tid[index]), NULL, gf_defrag_task,
(void *)defrag, "dhtmig%d", (index + 1) & 0x3ff);
if (ret != 0) {
gf_msg("DHT", GF_LOG_ERROR, ret, 0, "Thread[%d] creation failed. ",
@@ -4568,7 +4225,6 @@ gf_defrag_start_crawl(void *data)
dict_t *migrate_data = NULL;
dict_t *status = NULL;
glusterfs_ctx_t *ctx = NULL;
- dht_methods_t *methods = NULL;
call_frame_t *statfs_frame = NULL;
xlator_t *old_THIS = NULL;
int ret = -1;
@@ -4584,7 +4240,6 @@ gf_defrag_start_crawl(void *data)
int thread_index = 0;
pthread_t *tid = NULL;
pthread_t filecnt_thread;
- gf_boolean_t is_tier_detach = _gf_false;
gf_boolean_t fc_thread_started = _gf_false;
this = data;
@@ -4603,7 +4258,8 @@ gf_defrag_start_crawl(void *data)
if (!defrag)
goto exit;
- gettimeofday(&defrag->start_time, NULL);
+ defrag->start_time = gf_time();
+
dht_build_root_inode(this, &defrag->root_inode);
if (!defrag->root_inode)
goto out;
@@ -4737,43 +4393,17 @@ gf_defrag_start_crawl(void *data)
}
}
- if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
- /* Fix layout for attach tier */
- ret = gf_tier_start_fix_layout(this, &loc, defrag, fix_layout);
- if (ret) {
- goto out;
- }
-
- methods = &(conf->methods);
-
- /* Calling tier_start of tier.c */
- methods->migration_other(this, defrag);
- if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER ||
- defrag->cmd == GF_DEFRAG_CMD_DETACH_START) {
- ret = dict_set_str(migrate_data, GF_XATTR_FILE_MIGRATE_KEY,
- "force");
- if (ret)
- goto out;
- }
- } else {
- ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout,
- migrate_data);
- if (ret && ret != 2) {
- defrag->total_failures++;
- ret = -1;
- goto out;
- }
-
- if (ret != 2 &&
- gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) {
- defrag->total_failures++;
- ret = -1;
- goto out;
- }
+ ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout, migrate_data);
+ if (ret) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
- if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER ||
- defrag->cmd == GF_DEFRAG_CMD_DETACH_START)
- is_tier_detach = _gf_true;
+ if (gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
}
gf_log("DHT", GF_LOG_INFO, "crawling file-system completed");
@@ -4787,19 +4417,6 @@ out:
defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
}
- if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
- /* Wait for the tier fixlayout to
- * complete if its was started.*/
- gf_tier_wait_fix_lookup(defrag);
- }
-
- if (is_tier_detach && ret == 0) {
- /* If it was a detach remove the tier fix-layout
- * xattr on root. Ignoring the failure, as nothing has to be
- * done, logging is done in gf_tier_clear_fix_layout */
- gf_tier_clear_fix_layout(this, &loc, defrag);
- }
-
gf_defrag_parallel_migration_cleanup(defrag, tid, thread_index);
if ((defrag->defrag_status != GF_DEFRAG_STATUS_STOPPED) &&
@@ -4898,9 +4515,6 @@ gf_defrag_get_estimates_based_on_size(dht_conf_t *conf)
uint64_t total_processed = 0;
uint64_t tmp_count = 0;
uint64_t time_to_complete = 0;
- struct timeval now = {
- 0,
- };
double elapsed = 0;
defrag = conf->defrag;
@@ -4908,8 +4522,7 @@ gf_defrag_get_estimates_based_on_size(dht_conf_t *conf)
if (!g_totalsize)
goto out;
- gettimeofday(&now, NULL);
- elapsed = now.tv_sec - defrag->start_time.tv_sec;
+ elapsed = gf_time() - defrag->start_time;
/* Don't calculate the estimates for the first 10 minutes.
* It is unlikely to be accurate and estimates are not required
@@ -4959,13 +4572,8 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
uint64_t lookup = 0;
uint64_t failures = 0;
uint64_t skipped = 0;
- uint64_t promoted = 0;
- uint64_t demoted = 0;
char *status = "";
double elapsed = 0;
- struct timeval end = {
- 0,
- };
uint64_t time_to_complete = 0;
uint64_t time_left = 0;
gf_defrag_info_t *defrag = conf->defrag;
@@ -4982,17 +4590,12 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
lookup = defrag->num_files_lookedup;
failures = defrag->total_failures;
skipped = defrag->skipped;
- promoted = defrag->total_files_promoted;
- demoted = defrag->total_files_demoted;
-
- gettimeofday(&end, NULL);
- elapsed = end.tv_sec - defrag->start_time.tv_sec;
+ elapsed = gf_time() - defrag->start_time;
/* The rebalance is still in progress */
- if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
- (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED)) {
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
time_to_complete = gf_defrag_get_estimates_based_on_size(conf);
if (time_to_complete && (time_to_complete > elapsed))
@@ -5007,14 +4610,6 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
if (!dict)
goto log;
- ret = dict_set_uint64(dict, "promoted", promoted);
- if (ret)
- gf_log(THIS->name, GF_LOG_WARNING, "failed to set promoted count");
-
- ret = dict_set_uint64(dict, "demoted", demoted);
- if (ret)
- gf_log(THIS->name, GF_LOG_WARNING, "failed to set demoted count");
-
ret = dict_set_uint64(dict, "files", files);
if (ret)
gf_log(THIS->name, GF_LOG_WARNING, "failed to set file count");
@@ -5080,159 +4675,6 @@ out:
return 0;
}
-void
-gf_defrag_set_pause_state(gf_tier_conf_t *tier_conf, tier_pause_state_t state)
-{
- pthread_mutex_lock(&tier_conf->pause_mutex);
- tier_conf->pause_state = state;
- pthread_mutex_unlock(&tier_conf->pause_mutex);
-}
-
-tier_pause_state_t
-gf_defrag_get_pause_state(gf_tier_conf_t *tier_conf)
-{
- int state;
-
- pthread_mutex_lock(&tier_conf->pause_mutex);
- state = tier_conf->pause_state;
- pthread_mutex_unlock(&tier_conf->pause_mutex);
-
- return state;
-}
-
-tier_pause_state_t
-gf_defrag_check_pause_tier(gf_tier_conf_t *tier_conf)
-{
- int woke = 0;
- int state = -1;
-
- pthread_mutex_lock(&tier_conf->pause_mutex);
-
- if (tier_conf->pause_state == TIER_RUNNING)
- goto out;
-
- if (tier_conf->pause_state == TIER_PAUSED)
- goto out;
-
- if (tier_conf->promote_in_progress || tier_conf->demote_in_progress)
- goto out;
-
- tier_conf->pause_state = TIER_PAUSED;
-
- if (tier_conf->pause_synctask) {
- synctask_wake(tier_conf->pause_synctask);
- tier_conf->pause_synctask = 0;
- woke = 1;
- }
-
- gf_msg("tier", GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED, "woken %d", woke);
-
- gf_event(EVENT_TIER_PAUSE, "vol=%s", tier_conf->volname);
-out:
- state = tier_conf->pause_state;
-
- pthread_mutex_unlock(&tier_conf->pause_mutex);
-
- return state;
-}
-
-void
-gf_defrag_pause_tier_timeout(void *data)
-{
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
-
- this = (xlator_t *)data;
- GF_VALIDATE_OR_GOTO("tier", this, out);
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO(this->name, conf, out);
-
- defrag = conf->defrag;
- GF_VALIDATE_OR_GOTO(this->name, defrag, out);
-
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED,
- "Request pause timer timeout");
-
- gf_defrag_check_pause_tier(&defrag->tier_conf);
-
-out:
- return;
-}
-
-int
-gf_defrag_pause_tier(xlator_t *this, gf_defrag_info_t *defrag)
-{
- int ret = 0;
- struct timespec delta = {
- 0,
- };
- int delay = 2;
-
- if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED)
- goto out;
-
- /*
- * Set flag requesting to pause tiering. Wait 'delay' seconds for
- * tiering to actually stop as indicated by the pause state
- * before returning success or failure.
- */
- gf_defrag_set_pause_state(&defrag->tier_conf, TIER_REQUEST_PAUSE);
-
- /*
- * If migration is not underway, can pause immediately.
- */
- gf_defrag_check_pause_tier(&defrag->tier_conf);
- if (gf_defrag_get_pause_state(&defrag->tier_conf) == TIER_PAUSED)
- goto out;
-
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED,
- "Request pause tier");
-
- defrag->tier_conf.pause_synctask = synctask_get();
- delta.tv_sec = delay;
- delta.tv_nsec = 0;
- defrag->tier_conf.pause_timer = gf_timer_call_after(
- this->ctx, delta, gf_defrag_pause_tier_timeout, this);
-
- synctask_yield(defrag->tier_conf.pause_synctask);
-
- if (gf_defrag_get_pause_state(&defrag->tier_conf) == TIER_PAUSED)
- goto out;
-
- gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING);
-
- ret = -1;
-out:
-
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED,
- "Pause tiering ret=%d", ret);
-
- return ret;
-}
-
-int
-gf_defrag_resume_tier(xlator_t *this, gf_defrag_info_t *defrag)
-{
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_RESUME,
- "Pause end. Resume tiering");
-
- gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING);
-
- gf_event(EVENT_TIER_RESUME, "vol=%s", defrag->tier_conf.volname);
-
- return 0;
-}
-
-int
-gf_defrag_start_detach_tier(gf_defrag_info_t *defrag)
-{
- defrag->cmd = GF_DEFRAG_CMD_START_DETACH_TIER;
-
- return 0;
-}
-
int
gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output)
{
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index 8657a22fd82..3e24065227c 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -1271,10 +1271,6 @@ dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie,
local->call_cnt = conf->subvolume_cnt;
if (op_ret < 0) {
- /* We get this error when the directory entry was not created
- * on a newky attached tier subvol. Hence proceed and do mkdir
- * on the tier subvol.
- */
if (op_errno == EINVAL) {
local->call_cnt = 1;
dht_selfheal_dir_mkdir_lookup_done(frame, this);
@@ -1326,12 +1322,15 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
{
int missing_dirs = 0;
int i = 0;
+ int op_errno = 0;
int ret = -1;
dht_local_t *local = NULL;
xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
local = frame->local;
this = frame->this;
+ conf = this->private;
local->selfheal.force_mkdir = force;
local->selfheal.hole_cnt = 0;
@@ -1348,11 +1347,12 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
if (!__is_root_gfid(local->stbuf.ia_gfid)) {
if (local->need_xattr_heal) {
local->need_xattr_heal = 0;
- ret = dht_dir_xattr_heal(this, local);
- if (ret)
- gf_smsg(this->name, GF_LOG_ERROR, ret,
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s",
local->loc.path, "gfid=%s", local->gfid, NULL);
+ }
} else {
if (!gf_uuid_is_null(local->gfid))
gf_uuid_copy(loc->gfid, local->gfid);
@@ -1370,15 +1370,44 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
return 0;
}
- if (local->hashed_subvol == NULL)
- local->hashed_subvol = dht_subvol_get_hashed(this, loc);
+ /* MDS xattr is populated only while DHT is having more than one
+ subvol.In case of graph switch while adding more dht subvols need to
+ consider hash subvol as a MDS to avoid MDS check failure at the time
+ of running fop on directory
+ */
+ if (!dict_get(local->xattr, conf->mds_xattr_key) &&
+ (conf->subvolume_cnt > 1)) {
+ if (local->hashed_subvol == NULL) {
+ local->hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (local->hashed_subvol == NULL) {
+ local->op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s",
+ loc->pargfid, "name=%s", loc->name, "path=%s",
+ loc->path, NULL);
+ goto err;
+ }
+ }
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this,
+ local->hashed_subvol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set hashed subvol for %s on inode vol is %s",
+ local->loc.path,
+ local->hashed_subvol ? local->hashed_subvol->name : "NULL");
+ goto err;
+ }
+ }
if (local->hashed_subvol == NULL) {
- local->op_errno = EINVAL;
- gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
- DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s", loc->pargfid,
- "name=%s", loc->name, "path=%s", loc->path, NULL);
- goto err;
+ local->hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (local->hashed_subvol == NULL) {
+ local->op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s", loc->pargfid,
+ "name=%s", loc->name, "path=%s", loc->path, NULL);
+ goto err;
+ }
}
local->current = &local->lock[0];
@@ -2154,6 +2183,15 @@ dht_dir_heal_xattrs(void *data)
if (subvol == mds_subvol)
continue;
if (uret || uflag) {
+ /* Custom xattr heal is required - let posix handle it */
+ ret = dict_set_int8(xdata, "sync_backend_xattrs", _gf_true);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", local->loc.path, "key=%s",
+ "sync_backend_xattrs", NULL);
+ goto out;
+ }
+
ret = syncop_setxattr(subvol, &local->loc, user_xattr, 0, xdata,
NULL);
if (ret) {
@@ -2162,6 +2200,8 @@ dht_dir_heal_xattrs(void *data)
DHT_MSG_DIR_XATTR_HEAL_FAILED,
"set-user-xattr-failed path=%s", local->loc.path,
"subvol=%s", subvol->name, "gfid=%s", gfid, NULL);
+ } else {
+ dict_del(xdata, "sync_backend_xattrs");
}
}
}
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
index d85b4d1ce13..bb72b0ffbb5 100644
--- a/xlators/cluster/dht/src/dht-shared.c
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -140,9 +140,9 @@ dht_priv_dump(xlator_t *this)
}
}
- if (conf->last_stat_fetch.tv_sec)
+ if (conf->last_stat_fetch)
gf_proc_dump_write("last_stat_fetch", "%s",
- ctime(&conf->last_stat_fetch.tv_sec));
+ ctime(&conf->last_stat_fetch));
UNLOCK(&conf->subvolume_lock);
@@ -537,6 +537,8 @@ gf_defrag_pattern_list_fill(xlator_t *this, gf_defrag_info_t *defrag,
pattern_str = strtok_r(data, ",", &tmp_str);
while (pattern_str) {
dup_str = gf_strdup(pattern_str);
+ if (!dup_str)
+ goto out;
pattern_list = GF_CALLOC(1, sizeof(gf_defrag_pattern_list_t), 1);
if (!pattern_list) {
goto out;
@@ -596,7 +598,6 @@ dht_init_methods(xlator_t *this)
methods = &(conf->methods);
methods->migration_get_dst_subvol = dht_migration_get_dst_subvol;
- methods->migration_needed = dht_migration_needed;
methods->migration_other = NULL;
methods->layout_search = dht_layout_search;
@@ -1045,84 +1046,6 @@ struct volume_options dht_options[] = {
/* NUFA option */
{.key = {"local-volume-name"}, .type = GF_OPTION_TYPE_XLATOR},
- /* tier options */
- {
- .key = {"tier-pause"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
-
- {
- .key = {"tier-promote-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "120",
- },
-
- {
- .key = {"tier-demote-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "3600",
- },
-
- {
- .key = {"write-freq-threshold"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "0",
- },
-
- {
- .key = {"read-freq-threshold"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "0",
- },
- {
- .key = {"watermark-hi"},
- .type = GF_OPTION_TYPE_PERCENT,
- .default_value = "90",
- },
- {
- .key = {"watermark-low"},
- .type = GF_OPTION_TYPE_PERCENT,
- .default_value = "75",
- },
- {
- .key = {"tier-mode"},
- .type = GF_OPTION_TYPE_STR,
- .default_value = "test",
- },
- {
- .key = {"tier-compact"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- {.key = {"tier-hot-compact-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "604800",
- .description = "Frequency to compact DBs on hot tier in system"},
- {.key = {"tier-cold-compact-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "604800",
- .description = "Frequency to compact DBs on cold tier in system"},
- {
- .key = {"tier-max-mb"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "4000",
- },
- {
- .key = {"tier-max-promote-file-size"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "0",
- },
- {
- .key = {"tier-max-files"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "10000",
- },
- {
- .key = {"tier-query-limit"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "100",
- },
/* switch option */
{.key = {"pattern.switch.case"}, .type = GF_OPTION_TYPE_ANY},
diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c
index 59313639c45..3648a564840 100644
--- a/xlators/cluster/dht/src/nufa.c
+++ b/xlators/cluster/dht/src/nufa.c
@@ -595,7 +595,6 @@ nufa_init(xlator_t *this)
dht_methods_t dht_methods = {
.migration_get_dst_subvol = dht_migration_get_dst_subvol,
- .migration_needed = dht_migration_needed,
.layout_search = dht_layout_search,
};
diff --git a/xlators/cluster/ec/src/ec-combine.c b/xlators/cluster/ec/src/ec-combine.c
index 9d712b359a0..703a30e2485 100644
--- a/xlators/cluster/ec/src/ec-combine.c
+++ b/xlators/cluster/ec/src/ec-combine.c
@@ -343,9 +343,8 @@ out:
}
static int32_t
-ec_dict_data_concat(const char *fmt, ec_cbk_data_t *cbk, int32_t which,
- char *key, char *new_key, const char *def,
- gf_boolean_t global, ...)
+ec_dict_data_concat(ec_cbk_data_t *cbk, int32_t which, char *key, char *new_key,
+ const char *def, gf_boolean_t global, const char *fmt, ...)
{
ec_t *ec = cbk->fop->xl->private;
data_t *data[ec->nodes];
@@ -357,7 +356,7 @@ ec_dict_data_concat(const char *fmt, ec_cbk_data_t *cbk, int32_t which,
ec_dict_list(data, cbk, which, key, global);
- va_start(args, global);
+ va_start(args, fmt);
err = ec_concat_prepare(cbk->fop->xl, &pre, &sep, &post, fmt, args);
va_end(args);
@@ -730,14 +729,14 @@ ec_dict_data_combine(dict_t *dict, char *key, data_t *value, void *arg)
if ((strcmp(key, GF_XATTR_PATHINFO_KEY) == 0) ||
(strcmp(key, GF_XATTR_USER_PATHINFO_KEY) == 0)) {
- return ec_dict_data_concat("(<EC:%s> { })", data->cbk, data->which, key,
- NULL, NULL, _gf_false,
+ return ec_dict_data_concat(data->cbk, data->which, key, NULL, NULL,
+ _gf_false, _gf_false, "(<EC:%s> { })",
data->cbk->fop->xl->name);
}
if (strncmp(key, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)) == 0) {
- return ec_dict_data_concat("{\n}", data->cbk, data->which, key, NULL,
- NULL, _gf_false);
+ return ec_dict_data_concat(data->cbk, data->which, key, NULL, NULL,
+ _gf_false, "{\n}");
}
if (strncmp(key, GF_XATTR_LOCKINFO_KEY, SLEN(GF_XATTR_LOCKINFO_KEY)) == 0) {
@@ -767,9 +766,9 @@ ec_dict_data_combine(dict_t *dict, char *key, data_t *value, void *arg)
if (XATTR_IS_NODE_UUID(key)) {
if (data->cbk->fop->int32) {
/* List of node uuid is requested */
- return ec_dict_data_concat("{ }", data->cbk, data->which, key,
+ return ec_dict_data_concat(data->cbk, data->which, key,
GF_XATTR_LIST_NODE_UUIDS_KEY, UUID0_STR,
- _gf_true);
+ _gf_true, "{ }");
} else {
return ec_dict_data_uuid(data->cbk, data->which, key);
}
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index ded34b81aa2..b955efd8c2d 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -230,7 +230,7 @@ ec_child_next(ec_t *ec, ec_fop_data_t *fop, uint32_t idx)
int32_t
ec_heal_report(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, uintptr_t mask, uintptr_t good,
- uintptr_t bad, dict_t *xdata)
+ uintptr_t bad, uint32_t pending, dict_t *xdata)
{
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_DEBUG, op_errno, EC_MSG_HEAL_FAIL,
@@ -316,17 +316,19 @@ ec_check_status(ec_fop_data_t *fop)
}
}
- gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
- "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
- "remaining=%s, good=%s, bad=%s, %s)",
- gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
- ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
- ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
- ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
- ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
- ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
- ec->nodes),
- ec_msg_str(fop));
+ gf_msg(
+ fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
+ "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
+ "remaining=%s, good=%s, bad=%s,"
+ "(Least significant bit represents first client/brick of subvol), %s)",
+ gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
+ ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
+ ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
+ ec->nodes),
+ ec_msg_str(fop));
if (fop->use_fd) {
if (fop->fd != NULL) {
ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
@@ -614,10 +616,10 @@ ec_msg_str(ec_fop_data_t *fop)
loc_t *loc2 = NULL;
char gfid1[64] = {0};
char gfid2[64] = {0};
+ ec_fop_data_t *parent = fop->parent;
if (fop->errstr)
return fop->errstr;
-
if (!fop->use_fd) {
loc1 = &fop->loc[0];
loc2 = &fop->loc[1];
@@ -625,23 +627,45 @@ ec_msg_str(ec_fop_data_t *fop)
if (fop->id == GF_FOP_RENAME) {
gf_asprintf(&fop->errstr,
"FOP : '%s' failed on '%s' and '%s' with gfids "
- "%s and %s respectively",
+ "%s and %s respectively. Parent FOP: %s",
ec_fop_name(fop->id), loc1->path, loc2->path,
uuid_utoa_r(loc1->gfid, gfid1),
- uuid_utoa_r(loc2->gfid, gfid2));
+ uuid_utoa_r(loc2->gfid, gfid2),
+ parent ? ec_fop_name(parent->id) : "No Parent");
} else {
- gf_asprintf(&fop->errstr, "FOP : '%s' failed on '%s' with gfid %s",
- ec_fop_name(fop->id), loc1->path,
- uuid_utoa_r(loc1->gfid, gfid1));
+ gf_asprintf(
+ &fop->errstr,
+ "FOP : '%s' failed on '%s' with gfid %s. Parent FOP: %s",
+ ec_fop_name(fop->id), loc1->path,
+ uuid_utoa_r(loc1->gfid, gfid1),
+ parent ? ec_fop_name(parent->id) : "No Parent");
}
} else {
- gf_asprintf(&fop->errstr, "FOP : '%s' failed on gfid %s",
- ec_fop_name(fop->id),
- uuid_utoa_r(fop->fd->inode->gfid, gfid1));
+ gf_asprintf(
+ &fop->errstr, "FOP : '%s' failed on gfid %s. Parent FOP: %s",
+ ec_fop_name(fop->id), uuid_utoa_r(fop->fd->inode->gfid, gfid1),
+ parent ? ec_fop_name(parent->id) : "No Parent");
}
return fop->errstr;
}
+static void
+ec_log_insufficient_vol(ec_fop_data_t *fop, int32_t have, uint32_t need,
+ int32_t loglevel)
+{
+ ec_t *ec = fop->xl->private;
+ char str1[32], str2[32], str3[32];
+
+ gf_msg(ec->xl->name, loglevel, 0, EC_MSG_CHILDS_INSUFFICIENT,
+ "Insufficient available children for this request: "
+ "Have : %d, Need : %u : Child UP : %s "
+ "Mask: %s, Healing : %s : %s ",
+ have, need, ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->healing, ec->nodes),
+ ec_msg_str(fop));
+}
+
static int32_t
ec_child_select(ec_fop_data_t *fop)
{
@@ -699,11 +723,7 @@ ec_child_select(ec_fop_data_t *fop)
ec_trace("SELECT", fop, "");
if ((num < fop->minimum) && (num < ec->fragments)) {
- gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT,
- "Insufficient available children "
- "for this request (have %d, need "
- "%d). %s",
- num, fop->minimum, ec_msg_str(fop));
+ ec_log_insufficient_vol(fop, num, fop->minimum, GF_LOG_ERROR);
return 0;
}
@@ -711,11 +731,7 @@ ec_child_select(ec_fop_data_t *fop)
(fop->locks[0].update[EC_DATA_TXN] ||
fop->locks[0].update[EC_METADATA_TXN])) {
if (ec->quorum_count && (num < ec->quorum_count)) {
- gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT,
- "Insufficient available children "
- "for this request (have %d, need "
- "%d). %s",
- num, ec->quorum_count, ec_msg_str(fop));
+ ec_log_insufficient_vol(fop, num, ec->quorum_count, GF_LOG_ERROR);
return 0;
}
}
diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c
index ef6b06fa4dd..f71dcfac293 100644
--- a/xlators/cluster/ec/src/ec-dir-read.c
+++ b/xlators/cluster/ec/src/ec-dir-read.c
@@ -386,9 +386,16 @@ ec_manager_readdir(ec_fop_data_t *fop, int32_t state)
/* Return error if opendir has not been successfully called on
* any subvolume. */
ctx = ec_fd_get(fop->fd, fop->xl);
- if ((ctx == NULL) || (ctx->open == 0)) {
- fop->error = EINVAL;
+ if (ctx == NULL) {
+ fop->error = ENOMEM;
+ } else if (ctx->open == 0) {
+ fop->error = EBADFD;
+ }
+ if (fop->error) {
+ gf_msg(fop->xl->name, GF_LOG_ERROR, fop->error,
+ EC_MSG_INVALID_REQUEST, "EC is not winding readdir: %s",
+ ec_msg_str(fop));
return EC_STATE_REPORT;
}
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index 81f6add5bb0..7d991f04aac 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -70,6 +70,7 @@ struct ec_name_data {
char *name;
inode_t *parent;
default_args_cbk_t *replies;
+ uint32_t heal_pending;
};
static char *ec_ignore_xattrs[] = {GF_SELINUX_XATTR_KEY, QUOTA_SIZE_KEY, NULL};
@@ -994,6 +995,7 @@ ec_set_new_entry_dirty(ec_t *ec, loc_t *loc, struct iatt *ia,
ret = -ENOTCONN;
goto out;
}
+
out:
if (xattr)
dict_unref(xattr);
@@ -1172,6 +1174,7 @@ ec_create_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
dict_t *xdata = NULL;
char *linkname = NULL;
ec_config_t config;
+
/* There should be just one gfid key */
EC_REPLIES_ALLOC(replies, ec->nodes);
if (gfid_db->count != 1) {
@@ -1416,6 +1419,11 @@ __ec_heal_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
ret = ec_create_name(frame, ec, parent, name, replies, gfid_db, enoent,
participants);
+ if (ret >= 0) {
+ /* If ec_create_name() succeeded we return 1 to indicate that a new
+ * file has been created and it will need to be healed. */
+ ret = 1;
+ }
out:
cluster_replies_wipe(replies, ec->nodes);
loc_wipe(&loc);
@@ -1493,18 +1501,22 @@ ec_name_heal_handler(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
ret = ec_heal_name(name_data->frame, ec, parent->inode, entry->d_name,
name_on);
- if (ret < 0)
+ if (ret < 0) {
memset(name_on, 0, ec->nodes);
+ } else {
+ name_data->heal_pending += ret;
+ }
for (i = 0; i < ec->nodes; i++)
if (name_data->participants[i] && !name_on[i])
name_data->failed_on[i] = 1;
+
return 0;
}
int
ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
- unsigned char *participants)
+ unsigned char *participants, uint32_t *pending)
{
int i = 0;
int j = 0;
@@ -1517,7 +1529,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
name_data.frame = frame;
name_data.participants = participants;
name_data.failed_on = alloca0(ec->nodes);
- ;
+ name_data.heal_pending = 0;
for (i = 0; i < ec->nodes; i++) {
if (!participants[i])
@@ -1536,6 +1548,8 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
break;
}
}
+ *pending += name_data.heal_pending;
+
loc_wipe(&loc);
return ret;
}
@@ -1543,7 +1557,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
int
__ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
unsigned char *heal_on, unsigned char *sources,
- unsigned char *healed_sinks)
+ unsigned char *healed_sinks, uint32_t *pending)
{
unsigned char *locked_on = NULL;
unsigned char *output = NULL;
@@ -1588,7 +1602,7 @@ unlock:
if (sources[i] || healed_sinks[i])
participants[i] = 1;
}
- ret = ec_heal_names(frame, ec, inode, participants);
+ ret = ec_heal_names(frame, ec, inode, participants, pending);
if (EC_COUNT(participants, ec->nodes) <= ec->fragments)
goto out;
@@ -1609,7 +1623,8 @@ out:
int
ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
- unsigned char *sources, unsigned char *healed_sinks)
+ unsigned char *sources, unsigned char *healed_sinks,
+ uint32_t *pending)
{
unsigned char *locked_on = NULL;
unsigned char *up_subvols = NULL;
@@ -1640,7 +1655,7 @@ ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
goto unlock;
}
ret = __ec_heal_entry(frame, ec, inode, locked_on, sources,
- healed_sinks);
+ healed_sinks, pending);
}
unlock:
cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
@@ -1961,14 +1976,14 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state)
if (fop->cbks.heal) {
fop->cbks.heal(fop->req_frame, fop->data, fop->xl, 0, 0,
(heal->good | heal->bad), heal->good, heal->bad,
- NULL);
+ 0, NULL);
}
return EC_STATE_END;
case -EC_STATE_REPORT:
if (fop->cbks.heal) {
fop->cbks.heal(fop->req_frame, fop->data, fop->xl, -1,
- fop->error, 0, 0, 0, NULL);
+ fop->error, 0, 0, 0, 0, NULL);
}
return EC_STATE_END;
@@ -2005,14 +2020,15 @@ out:
if (fop != NULL) {
ec_manager(fop, error);
} else {
- func(frame, heal, this, -1, error, 0, 0, 0, NULL);
+ func(frame, heal, this, -1, error, 0, 0, 0, 0, NULL);
}
}
int32_t
ec_heal_block_done(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, uintptr_t mask,
- uintptr_t good, uintptr_t bad, dict_t *xdata)
+ uintptr_t good, uintptr_t bad, uint32_t pending,
+ dict_t *xdata)
{
ec_heal_t *heal = cookie;
@@ -2481,6 +2497,58 @@ out:
return ret;
}
+int
+ec_heal_purge_stale_index(call_frame_t *frame, ec_t *ec, inode_t *inode)
+{
+ int i = 0;
+ int ret = 0;
+ dict_t **xattr = NULL;
+ loc_t loc = {0};
+ uint64_t dirty_xattr[EC_VERSION_SIZE] = {0};
+ unsigned char *on = NULL;
+ default_args_cbk_t *replies = NULL;
+ dict_t *dict = NULL;
+
+ /* Allocate the required memory */
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ on = alloca0(ec->nodes);
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer);
+ if (!xattr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ dict = dict_new();
+ if (!dict) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < ec->nodes; i++) {
+ xattr[i] = dict;
+ on[i] = 1;
+ }
+ ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr,
+ (sizeof(*dirty_xattr) * EC_VERSION_SIZE));
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame,
+ ec_wind_xattrop_parallel, &loc, GF_XATTROP_ADD_ARRAY64,
+ xattr, NULL);
+out:
+ if (dict) {
+ dict_unref(dict);
+ }
+ if (xattr) {
+ GF_FREE(xattr);
+ }
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&loc);
+ return ret;
+}
+
void
ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
{
@@ -2498,6 +2566,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
intptr_t mbad = 0;
intptr_t good = 0;
intptr_t bad = 0;
+ uint32_t pending = 0;
ec_fop_data_t *fop = data;
gf_boolean_t blocking = _gf_false;
ec_heal_need_t need_heal = EC_HEAL_NONEED;
@@ -2533,7 +2602,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
if (loc->name && strlen(loc->name)) {
ret = ec_heal_name(frame, ec, loc->parent, (char *)loc->name,
participants);
- if (ret == 0) {
+ if (ret >= 0) {
gf_msg_debug(this->name, 0,
"%s: name heal "
"successful on %" PRIXPTR,
@@ -2551,23 +2620,34 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
/* Mount triggers heal only when it detects that it must need heal, shd
* triggers heals periodically which need not be thorough*/
- if (ec->shd.iamshd) {
+ if (ec->shd.iamshd && (ret <= 0)) {
ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false,
&need_heal);
- if (need_heal == EC_HEAL_NONEED) {
+ if (need_heal == EC_HEAL_PURGE_INDEX) {
+ gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL,
+ "Index entry needs to be purged for: %s ",
+ uuid_utoa(loc->gfid));
+ /* We need to send zero-xattrop so that stale index entry could be
+ * removed. We need not take lock on this entry to do so as
+ * xattrop on a brick is atomic. */
+ ec_heal_purge_stale_index(frame, ec, loc->inode);
+ goto out;
+ } else if (need_heal == EC_HEAL_NONEED) {
gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
"Heal is not required for : %s ", uuid_utoa(loc->gfid));
goto out;
}
}
+
sources = alloca0(ec->nodes);
healed_sinks = alloca0(ec->nodes);
if (IA_ISREG(loc->inode->ia_type)) {
ret = ec_heal_data(frame, ec, blocking, loc->inode, sources,
healed_sinks);
} else if (IA_ISDIR(loc->inode->ia_type) && !partial) {
- ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks);
+ ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks,
+ &pending);
} else {
ret = 0;
memcpy(sources, participants, ec->nodes);
@@ -2597,10 +2677,11 @@ out:
if (fop->cbks.heal) {
fop->cbks.heal(fop->req_frame, fop->data, fop->xl, op_ret, op_errno,
ec_char_array_to_mask(participants, ec->nodes),
- mgood & good, mbad & bad, NULL);
+ mgood & good, mbad & bad, pending, NULL);
}
if (frame)
STACK_DESTROY(frame->root);
+
return;
}
@@ -2648,7 +2729,7 @@ ec_heal_fail(ec_t *ec, ec_fop_data_t *fop)
{
if (fop->cbks.heal) {
fop->cbks.heal(fop->req_frame, fop->data, ec->xl, -1, fop->error, 0, 0,
- 0, NULL);
+ 0, 0, NULL);
}
ec_fop_data_release(fop);
}
@@ -2835,7 +2916,7 @@ fail:
if (fop)
ec_fop_data_release(fop);
if (func)
- func(frame, data, this, -1, err, 0, 0, 0, NULL);
+ func(frame, data, this, -1, err, 0, 0, 0, 0, NULL);
}
int
@@ -2964,6 +3045,13 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
goto out;
}
}
+ /* If lock count is 0, all dirty flags are 0 and all the
+ * versions are macthing then why are we here. It looks
+ * like something went wrong while removing the index entries
+ * after completing a successful heal or fop. In this case
+ * we need to remove this index entry to avoid triggering heal
+ * in a loop and causing lookups again and again*/
+ *need_heal = EC_HEAL_PURGE_INDEX;
} else {
for (i = 0; i < ec->nodes; i++) {
/* Since each lock can only increment the dirty
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
index 956e73c2088..5c1586bc9c5 100644
--- a/xlators/cluster/ec/src/ec-heald.c
+++ b/xlators/cluster/ec/src/ec-heald.c
@@ -62,7 +62,7 @@ __ec_shd_healer_wait(struct subvol_healer *healer)
ec = healer->this->private;
disabled_loop:
- wait_till.tv_sec = time(NULL) + ec->shd.timeout;
+ wait_till.tv_sec = gf_time() + ec->shd.timeout;
while (!healer->rerun) {
ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till);
@@ -156,19 +156,78 @@ ec_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name)
return ret;
}
+static gf_boolean_t
+ec_is_heal_completed(char *status)
+{
+ char *bad_pos = NULL;
+ char *zero_pos = NULL;
+
+ if (!status) {
+ return _gf_false;
+ }
+
+ /*Logic:
+ * Status will be of the form Good: <binary>, Bad: <binary>
+ * If heal completes, if we do strchr for '0' it should be present after
+ * 'Bad:' i.e. strRchr for ':'
+ * */
+
+ zero_pos = strchr(status, '0');
+ bad_pos = strrchr(status, ':');
+ if (!zero_pos || !bad_pos) {
+ /*malformed status*/
+ return _gf_false;
+ }
+
+ if (zero_pos > bad_pos) {
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
int
ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc,
gf_boolean_t full)
{
+ dict_t *xdata = NULL;
+ dict_t *dict = NULL;
+ uint32_t count;
int32_t ret;
+ char *heal_status = NULL;
+ ec_t *ec = healer->this->private;
+
+ GF_ATOMIC_INC(ec->stats.shd.attempted);
+ ret = syncop_getxattr(healer->this, loc, &dict, EC_XATTR_HEAL, NULL,
+ &xdata);
+ if (ret == 0) {
+ if (dict && (dict_get_str(dict, EC_XATTR_HEAL, &heal_status) == 0)) {
+ if (ec_is_heal_completed(heal_status)) {
+ GF_ATOMIC_INC(ec->stats.shd.completed);
+ }
+ }
+ }
- ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, NULL);
- if (!full && (ret >= 0) && (loc->inode->ia_type == IA_IFDIR)) {
+ if (!full && (loc->inode->ia_type == IA_IFDIR)) {
/* If we have just healed a directory, it's possible that
- * other index entries have appeared to be healed. We put a
- * mark so that we can check it later and restart a scan
- * without delay. */
- healer->rerun = _gf_true;
+ * other index entries have appeared to be healed. */
+ if ((xdata != NULL) &&
+ (dict_get_uint32(xdata, EC_XATTR_HEAL_NEW, &count) == 0) &&
+ (count > 0)) {
+ /* Force a rerun of the index healer. */
+ gf_msg_debug(healer->this->name, 0, "%d more entries to heal",
+ count);
+
+ healer->rerun = _gf_true;
+ }
+ }
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ if (dict) {
+ dict_unref(dict);
}
return ret;
diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
index a891ccd0952..dad5f4d7018 100644
--- a/xlators/cluster/ec/src/ec-inode-read.c
+++ b/xlators/cluster/ec/src/ec-inode-read.c
@@ -390,7 +390,8 @@ ec_manager_getxattr(ec_fop_data_t *fop, int32_t state)
int32_t
ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
int32_t op_ret, int32_t op_errno, uintptr_t mask,
- uintptr_t good, uintptr_t bad, dict_t *xdata)
+ uintptr_t good, uintptr_t bad, uint32_t pending,
+ dict_t *xdata)
{
fop_getxattr_cbk_t func = cookie;
ec_t *ec = xl->private;
@@ -398,6 +399,25 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
char *str;
char bin1[65], bin2[65];
+ /* We try to return the 'pending' information in xdata, but if this cannot
+ * be set, we will ignore it silently. We prefer to report the success or
+ * failure of the heal itself. */
+ if (xdata == NULL) {
+ xdata = dict_new();
+ } else {
+ dict_ref(xdata);
+ }
+ if (xdata != NULL) {
+ if (dict_set_uint32(xdata, EC_XATTR_HEAL_NEW, pending) != 0) {
+ /* dict_set_uint32() is marked as 'warn_unused_result' and gcc
+ * enforces to check the result in this case. However we don't
+ * really care if it succeeded or not. We'll just do the same.
+ *
+ * This empty 'if' avoids the warning, and it will be removed by
+ * the optimizer. */
+ }
+ }
+
if (op_ret >= 0) {
dict = dict_new();
if (dict == NULL) {
@@ -431,11 +451,14 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
}
out:
- func(frame, NULL, xl, op_ret, op_errno, dict, NULL);
+ func(frame, NULL, xl, op_ret, op_errno, dict, xdata);
if (dict != NULL) {
dict_unref(dict);
}
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
return 0;
}
diff --git a/xlators/cluster/ec/src/ec-locks.c b/xlators/cluster/ec/src/ec-locks.c
index 8e84977d2b3..601960d6154 100644
--- a/xlators/cluster/ec/src/ec-locks.c
+++ b/xlators/cluster/ec/src/ec-locks.c
@@ -24,9 +24,36 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask)
ec_t *ec = fop->xl->private;
ec_cbk_data_t *ans = NULL;
ec_cbk_data_t *cbk = NULL;
- uintptr_t locked = 0, notlocked = 0;
+ uintptr_t locked = 0;
+ int32_t good = 0;
+ int32_t eagain = 0;
+ int32_t estale = 0;
int32_t error = -1;
+ /* There are some errors that we'll handle in an special way while trying
+ * to acquire a lock.
+ *
+ * EAGAIN: If it's found during a parallel non-blocking lock request, we
+ * consider that there's contention on the inode, so we consider
+ * the acquisition a failure and try again with a sequential
+ * blocking lock request. This will ensure that we get a lock on
+ * as many bricks as possible (ignoring EAGAIN here would cause
+ * unnecessary triggers of self-healing).
+ *
+ * If it's found during a sequential blocking lock request, it's
+ * considered an error. Lock will only succeed if there are
+ * enough other bricks locked.
+ *
+ * ESTALE: This can appear during parallel or sequential lock request if
+ * the inode has just been unlinked. We consider this error is
+ * not recoverable, but we also don't consider it as fatal. So,
+ * if it happens during parallel lock, we won't attempt a
+ * sequential one unless there are EAGAIN errors on other
+ * bricks (and are enough to form a quorum), but if we reach
+ * quorum counting the ESTALE bricks, we consider the whole
+ * result of the operation is ESTALE instead of EIO.
+ */
+
list_for_each_entry(ans, &fop->cbk_list, list)
{
if (ans->op_ret >= 0) {
@@ -34,24 +61,23 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask)
error = EIO;
}
locked |= ans->mask;
+ good = ans->count;
cbk = ans;
- } else {
- if (ans->op_errno == EAGAIN) {
- switch (fop->uint32) {
- case EC_LOCK_MODE_NONE:
- case EC_LOCK_MODE_ALL:
- /* Goal is to treat non-blocking lock as failure
- * even if there is a single EAGAIN*/
- notlocked |= ans->mask;
- break;
- }
- }
+ } else if (ans->op_errno == ESTALE) {
+ estale += ans->count;
+ } else if ((ans->op_errno == EAGAIN) &&
+ (fop->uint32 != EC_LOCK_MODE_INC)) {
+ eagain += ans->count;
}
}
if (error == -1) {
- if (gf_bits_count(locked | notlocked) >= ec->fragments) {
- if (notlocked == 0) {
+ /* If we have enough quorum with succeeded and EAGAIN answers, we
+ * ignore for now any ESTALE answer. If there are EAGAIN answers,
+ * we retry with a sequential blocking lock request if needed.
+ * Otherwise we succeed. */
+ if ((good + eagain) >= ec->fragments) {
+ if (eagain == 0) {
if (fop->answer == NULL) {
fop->answer = cbk;
}
@@ -64,21 +90,28 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask)
case EC_LOCK_MODE_NONE:
error = EAGAIN;
break;
-
case EC_LOCK_MODE_ALL:
fop->uint32 = EC_LOCK_MODE_INC;
break;
-
default:
+ /* This shouldn't happen because eagain cannot be > 0
+ * when fop->uint32 is EC_LOCK_MODE_INC. */
error = EIO;
break;
}
}
} else {
- if (fop->answer && fop->answer->op_ret < 0)
+ /* We have been unable to find enough candidates that will be able
+ * to take the lock. If we have quorum on some answer, we return
+ * it. Otherwise we check if ESTALE answers allow us to reach
+ * quorum. If so, we return ESTALE. */
+ if (fop->answer && fop->answer->op_ret < 0) {
error = fop->answer->op_errno;
- else
+ } else if ((good + eagain + estale) >= ec->fragments) {
+ error = ESTALE;
+ } else {
error = EIO;
+ }
}
}
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
index 7829b8c27b3..de9b89bb2c9 100644
--- a/xlators/cluster/ec/src/ec-types.h
+++ b/xlators/cluster/ec/src/ec-types.h
@@ -130,7 +130,12 @@ typedef void (*ec_resume_f)(ec_fop_data_t *, int32_t);
enum _ec_read_policy { EC_ROUND_ROBIN, EC_GFID_HASH, EC_READ_POLICY_MAX };
-enum _ec_heal_need { EC_HEAL_NONEED, EC_HEAL_MAYBE, EC_HEAL_MUST };
+enum _ec_heal_need {
+ EC_HEAL_NONEED,
+ EC_HEAL_MAYBE,
+ EC_HEAL_MUST,
+ EC_HEAL_PURGE_INDEX
+};
enum _ec_stripe_part { EC_STRIPE_HEAD, EC_STRIPE_TAIL };
@@ -186,10 +191,10 @@ struct _ec_inode {
typedef int32_t (*fop_heal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t,
int32_t, uintptr_t, uintptr_t, uintptr_t,
- dict_t *);
+ uint32_t, dict_t *);
typedef int32_t (*fop_fheal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t,
int32_t, uintptr_t, uintptr_t, uintptr_t,
- dict_t *);
+ uint32_t, dict_t *);
union _ec_cbk {
fop_access_cbk_t access;
@@ -621,6 +626,11 @@ struct _ec_statistics {
requests. (Basically memory allocation
errors). */
} stripe_cache;
+ struct {
+ gf_atomic_t attempted; /*Number of heals attempted on
+ files/directories*/
+ gf_atomic_t completed; /*Number of heals complted on files/directories*/
+ } shd;
};
struct _ec {
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 66b4e634911..7344be4968d 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -325,13 +325,18 @@ ec_get_event_from_state(ec_t *ec)
void
ec_up(xlator_t *this, ec_t *ec)
{
+ char str1[32], str2[32];
+
if (ec->timer != NULL) {
gf_timer_call_cancel(this->ctx, ec->timer);
ec->timer = NULL;
}
ec->up = 1;
- gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP, "Going UP");
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP,
+ "Going UP : Child UP = %s Child Notify = %s",
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes));
gf_event(EVENT_EC_MIN_BRICKS_UP, "subvol=%s", this->name);
}
@@ -339,13 +344,18 @@ ec_up(xlator_t *this, ec_t *ec)
void
ec_down(xlator_t *this, ec_t *ec)
{
+ char str1[32], str2[32];
+
if (ec->timer != NULL) {
gf_timer_call_cancel(this->ctx, ec->timer);
ec->timer = NULL;
}
ec->up = 0;
- gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN, "Going DOWN");
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN,
+ "Going DOWN : Child UP = %s Child Notify = %s",
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes));
gf_event(EVENT_EC_MIN_BRICKS_NOT_UP, "subvol=%s", this->name);
}
@@ -700,6 +710,8 @@ ec_statistics_init(ec_t *ec)
GF_ATOMIC_INIT(ec->stats.stripe_cache.evicts, 0);
GF_ATOMIC_INIT(ec->stats.stripe_cache.allocs, 0);
GF_ATOMIC_INIT(ec->stats.stripe_cache.errors, 0);
+ GF_ATOMIC_INIT(ec->stats.shd.attempted, 0);
+ GF_ATOMIC_INIT(ec->stats.shd.completed, 0);
}
static int
@@ -1569,6 +1581,10 @@ ec_dump_private(xlator_t *this)
GF_ATOMIC_GET(ec->stats.stripe_cache.allocs));
gf_proc_dump_write("errors", "%" GF_PRI_ATOMIC,
GF_ATOMIC_GET(ec->stats.stripe_cache.errors));
+ gf_proc_dump_write("heals-attempted", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.shd.attempted));
+ gf_proc_dump_write("heals-completed", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.shd.completed));
return 0;
}
diff --git a/xlators/cluster/ec/src/ec.h b/xlators/cluster/ec/src/ec.h
index 1b210d9adc1..6f6de6d5981 100644
--- a/xlators/cluster/ec/src/ec.h
+++ b/xlators/cluster/ec/src/ec.h
@@ -18,6 +18,7 @@
#define EC_XATTR_SIZE EC_XATTR_PREFIX "size"
#define EC_XATTR_VERSION EC_XATTR_PREFIX "version"
#define EC_XATTR_HEAL EC_XATTR_PREFIX "heal"
+#define EC_XATTR_HEAL_NEW EC_XATTR_PREFIX "heal-new"
#define EC_XATTR_DIRTY EC_XATTR_PREFIX "dirty"
#define EC_STRIPE_CACHE_MAX_SIZE 10
#define EC_VERSION_SIZE 2
diff --git a/xlators/debug/error-gen/src/error-gen.c b/xlators/debug/error-gen/src/error-gen.c
index ff993f7b5e5..d45655ef4c3 100644
--- a/xlators/debug/error-gen/src/error-gen.c
+++ b/xlators/debug/error-gen/src/error-gen.c
@@ -31,9 +31,9 @@
sys_error_t error_no_list[] = {
[GF_FOP_LOOKUP] = {.error_no_count = 4,
.error_no = {ENOENT, ENOTDIR, ENAMETOOLONG, EAGAIN}},
- [GF_FOP_STAT] = {.error_no_count = 7,
- .error_no = {EACCES, EBADF, EFAULT, ENAMETOOLONG, ENOENT,
- ENOMEM, ENOTDIR}},
+ [GF_FOP_STAT] = {.error_no_count = 6,
+ .error_no = {EACCES, EFAULT, ENAMETOOLONG, ENOENT, ENOMEM,
+ ENOTDIR}},
[GF_FOP_READLINK] = {.error_no_count = 8,
.error_no = {EACCES, EFAULT, EINVAL, EIO, ENAMETOOLONG,
ENOENT, ENOMEM, ENOTDIR}},
@@ -79,21 +79,20 @@ sys_error_t error_no_list[] = {
[GF_FOP_WRITE] = {.error_no_count = 7,
.error_no = {EINVAL, EBADF, EFAULT, EISDIR, ENAMETOOLONG,
ENOSPC, GF_ERROR_SHORT_WRITE}},
- [GF_FOP_STATFS] = {.error_no_count = 10,
- .error_no = {EACCES, EBADF, EFAULT, EINTR, EIO,
- ENAMETOOLONG, ENOENT, ENOMEM, ENOSYS,
- ENOTDIR}},
+ [GF_FOP_STATFS] = {.error_no_count = 9,
+ .error_no = {EACCES, EFAULT, EINTR, EIO, ENAMETOOLONG,
+ ENOENT, ENOMEM, ENOSYS, ENOTDIR}},
[GF_FOP_FLUSH] = {.error_no_count = 5,
.error_no = {EACCES, EFAULT, ENAMETOOLONG, ENOSYS,
ENOENT}},
[GF_FOP_FSYNC] = {.error_no_count = 4,
.error_no = {EBADF, EIO, EROFS, EINVAL}},
- [GF_FOP_SETXATTR] = {.error_no_count = 4,
- .error_no = {EACCES, EBADF, EINTR, ENAMETOOLONG}},
- [GF_FOP_GETXATTR] = {.error_no_count = 4,
- .error_no = {EACCES, EBADF, ENAMETOOLONG, EINTR}},
- [GF_FOP_REMOVEXATTR] = {.error_no_count = 4,
- .error_no = {EACCES, EBADF, ENAMETOOLONG, EINTR}},
+ [GF_FOP_SETXATTR] = {.error_no_count = 3,
+ .error_no = {EACCES, EINTR, ENAMETOOLONG}},
+ [GF_FOP_GETXATTR] = {.error_no_count = 3,
+ .error_no = {EACCES, ENAMETOOLONG, EINTR}},
+ [GF_FOP_REMOVEXATTR] = {.error_no_count = 3,
+ .error_no = {EACCES, ENAMETOOLONG, EINTR}},
[GF_FOP_FSETXATTR] = {.error_no_count = 4,
.error_no = {EACCES, EBADF, EINTR, ENAMETOOLONG}},
[GF_FOP_FGETXATTR] = {.error_no_count = 4,
@@ -125,26 +124,25 @@ sys_error_t error_no_list[] = {
ENOENT}},
[GF_FOP_FXATTROP] = {.error_no_count = 4,
.error_no = {EBADF, EIO, EROFS, EINVAL}},
- [GF_FOP_INODELK] = {.error_no_count = 4,
- .error_no = {EACCES, EBADF, EINTR, ENAMETOOLONG}},
+ [GF_FOP_INODELK] = {.error_no_count = 3,
+ .error_no = {EACCES, EINTR, ENAMETOOLONG}},
[GF_FOP_FINODELK] = {.error_no_count = 4,
.error_no = {EACCES, EBADF, EINTR, ENAMETOOLONG}},
- [GF_FOP_ENTRYLK] = {.error_no_count = 4,
- .error_no = {EACCES, EBADF, ENAMETOOLONG, EINTR}},
+ [GF_FOP_ENTRYLK] = {.error_no_count = 3,
+ .error_no = {EACCES, ENAMETOOLONG, EINTR}},
[GF_FOP_FENTRYLK] = {.error_no_count = 10,
.error_no = {EACCES, EEXIST, EFAULT, EISDIR, EMFILE,
ENAMETOOLONG, ENFILE, ENODEV, ENOENT,
ENOMEM}},
- [GF_FOP_SETATTR] = {.error_no_count = 11,
+ [GF_FOP_SETATTR] = {.error_no_count = 10,
.error_no = {EACCES, EFAULT, EIO, ENAMETOOLONG, ENOENT,
- ENOMEM, ENOTDIR, EPERM, EROFS, EBADF,
- EIO}},
+ ENOMEM, ENOTDIR, EPERM, EROFS, EIO}},
[GF_FOP_FSETATTR] = {.error_no_count = 11,
.error_no = {EACCES, EFAULT, EIO, ENAMETOOLONG, ENOENT,
ENOMEM, ENOTDIR, EPERM, EROFS, EBADF,
EIO}},
- [GF_FOP_GETSPEC] = {.error_no_count = 4,
- .error_no = {EACCES, EBADF, ENAMETOOLONG, EINTR}}};
+ [GF_FOP_GETSPEC] = {.error_no_count = 3,
+ .error_no = {EACCES, ENAMETOOLONG, EINTR}}};
int
generate_rand_no(int op_no)
@@ -1509,8 +1507,8 @@ init(xlator_t *this)
this->private = pvt;
- /* Give some seed value here */
- srand(time(NULL));
+ /* Give some seed value here. */
+ srand(gf_time());
ret = 0;
out:
diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
index 4e1f6e5af07..aa00c446e5a 100644
--- a/xlators/debug/io-stats/src/io-stats.c
+++ b/xlators/debug/io-stats/src/io-stats.c
@@ -135,7 +135,7 @@ struct ios_global_stats {
gf_atomic_t block_count_read[IOS_BLOCK_COUNT_SIZE];
gf_atomic_t fop_hits[GF_FOP_MAXVALUE];
gf_atomic_t upcall_hits[GF_UPCALL_FLAGS_MAXVALUE];
- struct timeval started_at;
+ time_t started_at;
struct ios_lat latency[GF_FOP_MAXVALUE];
uint64_t nr_opens;
uint64_t max_nr_opens;
@@ -292,9 +292,7 @@ is_fop_latency_started(call_frame_t *frame)
begin = &frame->begin; \
end = &frame->end; \
\
- elapsed = ((end->tv_sec - begin->tv_sec) * 1e9 + \
- (end->tv_nsec - begin->tv_nsec)) / \
- 1000; \
+ elapsed = gf_tsdiff(begin, end) / 1000.0; \
throughput = op_ret / elapsed; \
\
conf = this->private; \
@@ -678,10 +676,7 @@ ios_dump_throughput_stats(struct ios_stat_head *list_head, xlator_t *this,
FILE *logfp, ios_stats_thru_t type)
{
struct ios_stat_list *entry = NULL;
- struct timeval time = {
- 0,
- };
- char timestr[256] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
@@ -689,12 +684,9 @@ ios_dump_throughput_stats(struct ios_stat_head *list_head, xlator_t *this,
{
list_for_each_entry(entry, &list_head->iosstats->list, list)
{
- gf_time_fmt(timestr, sizeof timestr,
- entry->iosstat->thru_counters[type].time.tv_sec,
- gf_timefmt_FT);
- snprintf(timestr + strlen(timestr),
- sizeof timestr - strlen(timestr), ".%" GF_PRI_SUSECONDS,
- time.tv_usec);
+ gf_time_fmt_tv(timestr, sizeof timestr,
+ &entry->iosstat->thru_counters[type].time,
+ gf_timefmt_FT);
ios_log(this, logfp, "%s \t %-10.2f \t %s", timestr, entry->value,
entry->iosstat->filename);
@@ -773,9 +765,8 @@ err:
int
io_stats_dump_global_to_json_logfp(xlator_t *this,
- struct ios_global_stats *stats,
- struct timeval *now, int interval,
- FILE *logfp)
+ struct ios_global_stats *stats, time_t now,
+ int interval, FILE *logfp)
{
int i = 0;
int j = 0;
@@ -801,10 +792,7 @@ io_stats_dump_global_to_json_logfp(xlator_t *this,
};
dict_t *xattr = NULL;
- interval_sec = ((now->tv_sec * 1000000.0 + now->tv_usec) -
- (stats->started_at.tv_sec * 1000000.0 +
- stats->started_at.tv_usec)) /
- 1000000.0;
+ interval_sec = (double)(now - stats->started_at);
conf = this->private;
@@ -956,8 +944,8 @@ io_stats_dump_global_to_json_logfp(xlator_t *this,
}
if (interval == -1) {
- ios_log(this, logfp, "\"%s.%s.uptime\": %" PRId64 ",", key_prefix,
- str_prefix, (uint64_t)(now->tv_sec - stats->started_at.tv_sec));
+ ios_log(this, logfp, "\"%s.%s.uptime\": %" PRIu64 ",", key_prefix,
+ str_prefix, (uint64_t)(now - stats->started_at));
ios_log(this, logfp,
"\"%s.%s.bytes_read\": "
"%" GF_PRI_ATOMIC ",",
@@ -1209,14 +1197,14 @@ out:
int
io_stats_dump_global_to_logfp(xlator_t *this, struct ios_global_stats *stats,
- struct timeval *now, int interval, FILE *logfp)
+ time_t now, int interval, FILE *logfp)
{
int i = 0;
int per_line = 0;
int index = 0;
struct ios_stat_head *list_head = NULL;
struct ios_conf *conf = NULL;
- char timestr[256] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
char str_header[128] = {0};
@@ -1232,8 +1220,8 @@ io_stats_dump_global_to_logfp(xlator_t *this, struct ios_global_stats *stats,
ios_log(this, logfp, "\n=== Cumulative stats ===");
else
ios_log(this, logfp, "\n=== Interval %d stats ===", interval);
- ios_log(this, logfp, " Duration : %" PRId64 " secs",
- (uint64_t)(now->tv_sec - stats->started_at.tv_sec));
+ ios_log(this, logfp, " Duration : %" PRIu64 " secs",
+ (uint64_t)(now - stats->started_at));
ios_log(this, logfp, " BytesRead : %" GF_PRI_ATOMIC,
GF_ATOMIC_GET(stats->data_read));
ios_log(this, logfp, " BytesWritten : %" GF_PRI_ATOMIC "\n",
@@ -1325,11 +1313,8 @@ io_stats_dump_global_to_logfp(xlator_t *this, struct ios_global_stats *stats,
if (interval == -1) {
LOCK(&conf->lock);
{
- gf_time_fmt(timestr, sizeof timestr,
- conf->cumulative.max_openfd_time.tv_sec, gf_timefmt_FT);
- snprintf(timestr + strlen(timestr),
- sizeof timestr - strlen(timestr), ".%" GF_PRI_SUSECONDS,
- conf->cumulative.max_openfd_time.tv_usec);
+ gf_time_fmt_tv(timestr, sizeof timestr,
+ &conf->cumulative.max_openfd_time, gf_timefmt_FT);
ios_log(this, logfp,
"Current open fd's: %" PRId64 " Max open fd's: %" PRId64
" time %s",
@@ -1381,7 +1366,7 @@ io_stats_dump_global_to_logfp(xlator_t *this, struct ios_global_stats *stats,
int
io_stats_dump_global_to_dict(xlator_t *this, struct ios_global_stats *stats,
- struct timeval *now, int interval, dict_t *dict)
+ time_t now, int interval, dict_t *dict)
{
int ret = 0;
char key[64] = {0};
@@ -1407,7 +1392,7 @@ io_stats_dump_global_to_dict(xlator_t *this, struct ios_global_stats *stats,
interval);
snprintf(key, sizeof(key), "%d-duration", interval);
- sec = (uint64_t)(now->tv_sec - stats->started_at.tv_sec);
+ sec = now - stats->started_at;
ret = dict_set_uint64(dict, key, sec);
if (ret) {
gf_log(this->name, GF_LOG_ERROR,
@@ -1530,9 +1515,8 @@ out:
}
int
-io_stats_dump_global(xlator_t *this, struct ios_global_stats *stats,
- struct timeval *now, int interval,
- struct ios_dump_args *args)
+io_stats_dump_global(xlator_t *this, struct ios_global_stats *stats, time_t now,
+ int interval, struct ios_dump_args *args)
{
int ret = -1;
@@ -1590,13 +1574,13 @@ ios_dump_args_init(struct ios_dump_args *args, ios_dump_type_t type,
}
static void
-ios_global_stats_clear(struct ios_global_stats *stats, struct timeval *now)
+ios_global_stats_clear(struct ios_global_stats *stats, time_t now)
{
GF_ASSERT(stats);
GF_ASSERT(now);
memset(stats, 0, sizeof(*stats));
- stats->started_at = *now;
+ stats->started_at = now;
}
int
@@ -1607,7 +1591,7 @@ io_stats_dump(xlator_t *this, struct ios_dump_args *args, ios_info_op_t op,
struct ios_global_stats cumulative = {};
struct ios_global_stats incremental = {};
int increment = 0;
- struct timeval now;
+ time_t now = 0;
GF_ASSERT(this);
GF_ASSERT(args);
@@ -1615,8 +1599,8 @@ io_stats_dump(xlator_t *this, struct ios_dump_args *args, ios_info_op_t op,
GF_ASSERT(args->type < IOS_DUMP_TYPE_MAX);
conf = this->private;
+ now = gf_time();
- gettimeofday(&now, NULL);
LOCK(&conf->lock);
{
if (op == GF_IOS_INFO_ALL || op == GF_IOS_INFO_CUMULATIVE)
@@ -1629,17 +1613,17 @@ io_stats_dump(xlator_t *this, struct ios_dump_args *args, ios_info_op_t op,
if (!is_peek) {
increment = conf->increment++;
- ios_global_stats_clear(&conf->incremental, &now);
+ ios_global_stats_clear(&conf->incremental, now);
}
}
}
UNLOCK(&conf->lock);
if (op == GF_IOS_INFO_ALL || op == GF_IOS_INFO_CUMULATIVE)
- io_stats_dump_global(this, &cumulative, &now, -1, args);
+ io_stats_dump_global(this, &cumulative, now, -1, args);
if (op == GF_IOS_INFO_ALL || op == GF_IOS_INFO_INCREMENTAL)
- io_stats_dump_global(this, &incremental, &now, increment, args);
+ io_stats_dump_global(this, &incremental, now, increment, args);
return 0;
}
@@ -1649,9 +1633,8 @@ io_stats_dump_fd(xlator_t *this, struct ios_fd *iosfd)
{
struct ios_conf *conf = NULL;
struct timeval now;
- uint64_t sec = 0;
- uint64_t usec = 0;
int i = 0;
+ double usecs = 0;
uint64_t data_read = 0;
uint64_t data_written = 0;
uint64_t block_count_read = 0;
@@ -1666,23 +1649,15 @@ io_stats_dump_fd(xlator_t *this, struct ios_fd *iosfd)
return 0;
gettimeofday(&now, NULL);
-
- if (iosfd->opened_at.tv_usec > now.tv_usec) {
- now.tv_usec += 1000000;
- now.tv_usec--;
- }
-
- sec = now.tv_sec - iosfd->opened_at.tv_sec;
- usec = now.tv_usec - iosfd->opened_at.tv_usec;
+ usecs = gf_tvdiff(&iosfd->opened_at, &now);
gf_log(this->name, GF_LOG_INFO, "--- fd stats ---");
if (iosfd->filename)
gf_log(this->name, GF_LOG_INFO, " Filename : %s", iosfd->filename);
- if (sec)
- gf_log(this->name, GF_LOG_INFO,
- " Lifetime : %" PRId64 "secs, %" PRId64 "usecs", sec, usec);
+ if (usecs)
+ gf_log(this->name, GF_LOG_INFO, " Lifetime : %lf secs", usecs);
data_read = GF_ATOMIC_GET(iosfd->data_read);
if (data_read)
@@ -1785,9 +1760,7 @@ update_ios_latency(struct ios_conf *conf, call_frame_t *frame,
begin = &frame->begin;
end = &frame->end;
- elapsed = ((end->tv_sec - begin->tv_sec) * 1e9 +
- (end->tv_nsec - begin->tv_nsec)) /
- 1000;
+ elapsed = gf_tsdiff(begin, end) / 1000.0;
update_ios_latency_stats(&conf->cumulative, elapsed, op);
update_ios_latency_stats(&conf->incremental, elapsed, op);
@@ -1808,7 +1781,7 @@ io_stats_dump_stats_to_dict(xlator_t *this, dict_t *resp,
struct ios_stat_list *entry = NULL;
int ret = -1;
ios_stats_thru_t index = IOS_STATS_THRU_MAX;
- char timestr[256] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
char *dict_timestr = NULL;
@@ -1827,14 +1800,9 @@ io_stats_dump_stats_to_dict(xlator_t *this, dict_t *resp,
ret = dict_set_uint64(resp, "max-open",
conf->cumulative.max_nr_opens);
- gf_time_fmt(timestr, sizeof timestr,
- conf->cumulative.max_openfd_time.tv_sec,
- gf_timefmt_FT);
- if (conf->cumulative.max_openfd_time.tv_sec)
- snprintf(timestr + strlen(timestr),
- sizeof timestr - strlen(timestr),
- ".%" GF_PRI_SUSECONDS,
- conf->cumulative.max_openfd_time.tv_usec);
+ gf_time_fmt_tv(timestr, sizeof timestr,
+ &conf->cumulative.max_openfd_time,
+ gf_timefmt_FT);
dict_timestr = gf_strdup(timestr);
if (!dict_timestr)
@@ -3606,26 +3574,21 @@ ios_destroy_top_stats(struct ios_conf *conf)
return;
}
-static int
+static void
io_stats_clear(struct ios_conf *conf)
{
- struct timeval now;
- int ret = -1;
+ time_t now = 0;
GF_ASSERT(conf);
+ now = gf_time();
- if (!gettimeofday(&now, NULL)) {
- LOCK(&conf->lock);
- {
- ios_global_stats_clear(&conf->cumulative, &now);
- ios_global_stats_clear(&conf->incremental, &now);
- conf->increment = 0;
- }
- UNLOCK(&conf->lock);
- ret = 0;
+ LOCK(&conf->lock);
+ {
+ ios_global_stats_clear(&conf->cumulative, now);
+ ios_global_stats_clear(&conf->incremental, now);
+ conf->increment = 0;
}
-
- return ret;
+ UNLOCK(&conf->lock);
}
int32_t
@@ -3866,7 +3829,7 @@ ios_conf_destroy(struct ios_conf *conf)
_ios_destroy_dump_thread(conf);
ios_destroy_sample_buf(conf->ios_sample_buf);
LOCK_DESTROY(&conf->lock);
- GF_FREE(conf->dnscache);
+ gf_dnscache_deinit(conf->dnscache);
GF_FREE(conf);
}
@@ -3889,7 +3852,7 @@ ios_init_stats(struct ios_global_stats *stats)
for (i = 0; i < GF_UPCALL_FLAGS_MAXVALUE; i++)
GF_ATOMIC_INIT(stats->upcall_hits[i], 0);
- gettimeofday(&stats->started_at, NULL);
+ stats->started_at = gf_time();
}
int
@@ -3978,11 +3941,14 @@ init(xlator_t *this)
gf_log(this->name, GF_LOG_ERROR, "Out of memory.");
goto out;
}
- ret = -1;
GF_OPTION_INIT("ios-dnscache-ttl-sec", conf->ios_dnscache_ttl_sec, int32,
out);
conf->dnscache = gf_dnscache_init(conf->ios_dnscache_ttl_sec);
+ if (!conf->dnscache) {
+ ret = -1;
+ goto out;
+ }
GF_OPTION_INIT("sys-log-level", sys_log_str, str, out);
if (sys_log_str) {
@@ -4133,12 +4099,9 @@ notify(xlator_t *this, int32_t event, void *data, ...)
}
if (GF_IOS_INFO_CLEAR == op) {
- ret = io_stats_clear(this->private);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR,
- "Failed to clear info stats");
+ io_stats_clear(this->private);
- ret = dict_set_int32(output, "stats-cleared", ret ? 0 : 1);
+ ret = dict_set_int32(output, "stats-cleared", 1);
if (ret)
gf_log(this->name, GF_LOG_ERROR,
"Failed to set stats-cleared"
diff --git a/xlators/debug/trace/src/trace.c b/xlators/debug/trace/src/trace.c
index 3db2e263524..6ed0ca00342 100644
--- a/xlators/debug/trace/src/trace.c
+++ b/xlators/debug/trace/src/trace.c
@@ -22,13 +22,13 @@
static void
trace_stat_to_str(struct iatt *buf, char *str, size_t len)
{
- char atime_buf[200] = {
+ char atime_buf[GF_TIMESTR_SIZE] = {
0,
};
- char mtime_buf[200] = {
+ char mtime_buf[GF_TIMESTR_SIZE] = {
0,
};
- char ctime_buf[200] = {
+ char ctime_buf[GF_TIMESTR_SIZE] = {
0,
};
@@ -64,7 +64,7 @@ trace_stat_to_str(struct iatt *buf, char *str, size_t len)
int
dump_history_trace(circular_buffer_t *cb, void *data)
{
- char timestr[256] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
@@ -72,9 +72,7 @@ dump_history_trace(circular_buffer_t *cb, void *data)
gettimeofday () fails, it's safe to check tm and then dump the time
at which the entry was added to the buffer */
- gf_time_fmt(timestr, sizeof timestr, cb->tv.tv_sec, gf_timefmt_Ymd_T);
- snprintf(timestr + strlen(timestr), 256 - strlen(timestr),
- ".%" GF_PRI_SUSECONDS, cb->tv.tv_usec);
+ gf_time_fmt_tv(timestr, sizeof timestr, &cb->tv, gf_timefmt_Ymd_T);
gf_proc_dump_write("TIME", "%s", timestr);
gf_proc_dump_write("FOP", "%s\n", (char *)cb->data);
@@ -2209,10 +2207,10 @@ int
trace_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- char actime_str[256] = {
+ char actime_str[GF_TIMESTR_SIZE] = {
0,
};
- char modtime_str[256] = {
+ char modtime_str[GF_TIMESTR_SIZE] = {
0,
};
trace_conf_t *conf = NULL;
@@ -2278,10 +2276,10 @@ int
trace_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- char actime_str[256] = {
+ char actime_str[GF_TIMESTR_SIZE] = {
0,
};
- char modtime_str[256] = {
+ char modtime_str[GF_TIMESTR_SIZE] = {
0,
};
trace_conf_t *conf = NULL;
diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am
index 194634b003d..c57897f11ea 100644
--- a/xlators/features/Makefile.am
+++ b/xlators/features/Makefile.am
@@ -2,9 +2,13 @@ if BUILD_CLOUDSYNC
CLOUDSYNC_DIR = cloudsync
endif
+if BUILD_METADISP
+ METADISP_DIR = metadisp
+endif
+
SUBDIRS = locks quota read-only quiesce marker index barrier arbiter upcall \
compress changelog gfid-access snapview-client snapview-server trash \
shard bit-rot leases selinux sdfs namespace $(CLOUDSYNC_DIR) thin-arbiter \
- utime
+ utime $(METADISP_DIR)
CLEANFILES =
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c
index 34e20f9df11..5cef2ffa5e5 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c
@@ -40,21 +40,21 @@ br_inc_scrubbed_file(br_scrub_stats_t *scrub_stat)
}
void
-br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, struct timeval *tv)
+br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, time_t time)
{
if (!scrub_stat)
return;
pthread_mutex_lock(&scrub_stat->lock);
{
- scrub_stat->scrub_start_tv.tv_sec = tv->tv_sec;
+ scrub_stat->scrub_start_time = time;
}
pthread_mutex_unlock(&scrub_stat->lock);
}
void
br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr,
- struct timeval *tv)
+ time_t time)
{
int lst_size = 0;
@@ -67,10 +67,10 @@ br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr,
pthread_mutex_lock(&scrub_stat->lock);
{
- scrub_stat->scrub_end_tv.tv_sec = tv->tv_sec;
+ scrub_stat->scrub_end_time = time;
- scrub_stat->scrub_duration = scrub_stat->scrub_end_tv.tv_sec -
- scrub_stat->scrub_start_tv.tv_sec;
+ scrub_stat->scrub_duration = scrub_stat->scrub_end_time -
+ scrub_stat->scrub_start_time;
snprintf(scrub_stat->last_scrub_time, lst_size, "%s", timestr);
}
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h
index 24128b90a66..f022aa831eb 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h
@@ -15,20 +15,22 @@
#include <sys/time.h>
#include <pthread.h>
+#include <glusterfs/common-utils.h>
+
struct br_scrub_stats {
- uint64_t scrubbed_files; /* Total number of scrubbed file */
+ uint64_t scrubbed_files; /* Total number of scrubbed files. */
- uint64_t unsigned_files; /* Total number of unsigned file */
+ uint64_t unsigned_files; /* Total number of unsigned files. */
- uint64_t scrub_duration; /* Duration of last scrub */
+ uint64_t scrub_duration; /* Duration of last scrub. */
- char last_scrub_time[1024]; /*last scrub completion time */
+ char last_scrub_time[GF_TIMESTR_SIZE]; /* Last scrub completion time. */
- struct timeval scrub_start_tv; /* Scrubbing starting time*/
+ time_t scrub_start_time; /* Scrubbing starting time. */
- struct timeval scrub_end_tv; /* Scrubbing finishing time */
+ time_t scrub_end_time; /* Scrubbing finishing time. */
- int8_t scrub_running; /* Scrub running or not */
+ int8_t scrub_running; /* Whether scrub running or not. */
pthread_mutex_t lock;
};
@@ -40,9 +42,9 @@ br_inc_unsigned_file_count(br_scrub_stats_t *scrub_stat);
void
br_inc_scrubbed_file(br_scrub_stats_t *scrub_stat);
void
-br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, struct timeval *tv);
+br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, time_t time);
void
br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr,
- struct timeval *tv);
+ time_t time);
#endif /* __BIT_ROT_SCRUB_STATUS_H__ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
index d20ecc7cdbe..289dd53f610 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
@@ -601,25 +601,23 @@ br_fsscan_deactivate(xlator_t *this)
static void
br_scrubber_log_time(xlator_t *this, const char *sfx)
{
- char timestr[1024] = {
- 0,
- };
- struct timeval tv = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
br_private_t *priv = NULL;
+ time_t now = 0;
+ now = gf_time();
priv = this->private;
- gettimeofday(&tv, NULL);
- gf_time_fmt(timestr, sizeof(timestr), tv.tv_sec, gf_timefmt_FT);
+ gf_time_fmt(timestr, sizeof(timestr), now, gf_timefmt_FT);
if (strcasecmp(sfx, "started") == 0) {
- br_update_scrub_start_time(&priv->scrub_stat, &tv);
+ br_update_scrub_start_time(&priv->scrub_stat, now);
gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_START,
"Scrubbing %s at %s", sfx, timestr);
} else {
- br_update_scrub_finish_time(&priv->scrub_stat, timestr, &tv);
+ br_update_scrub_finish_time(&priv->scrub_stat, timestr, now);
gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_FINISH,
"Scrubbing %s at %s", sfx, timestr);
}
@@ -628,15 +626,13 @@ br_scrubber_log_time(xlator_t *this, const char *sfx)
static void
br_fsscanner_log_time(xlator_t *this, br_child_t *child, const char *sfx)
{
- char timestr[1024] = {
- 0,
- };
- struct timeval tv = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
+ time_t now = 0;
- gettimeofday(&tv, NULL);
- gf_time_fmt(timestr, sizeof(timestr), tv.tv_sec, gf_timefmt_FT);
+ now = gf_time();
+ gf_time_fmt(timestr, sizeof(timestr), now, gf_timefmt_FT);
if (strcasecmp(sfx, "started") == 0) {
gf_msg_debug(this->name, 0, "Scrubbing \"%s\" %s at %s",
@@ -919,10 +915,7 @@ br_fsscan_schedule(xlator_t *this)
{
uint32_t timo = 0;
br_private_t *priv = NULL;
- struct timeval tv = {
- 0,
- };
- char timestr[1024] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
struct br_scrubber *fsscrub = NULL;
@@ -933,8 +926,7 @@ br_fsscan_schedule(xlator_t *this)
fsscrub = &priv->fsscrub;
scrub_monitor = &priv->scrub_monitor;
- (void)gettimeofday(&tv, NULL);
- scrub_monitor->boot = tv.tv_sec;
+ scrub_monitor->boot = gf_time();
timo = br_fsscan_calculate_timeout(fsscrub->frequency);
if (timo == 0) {
@@ -975,12 +967,10 @@ int32_t
br_fsscan_activate(xlator_t *this)
{
uint32_t timo = 0;
- char timestr[1024] = {
- 0,
- };
- struct timeval now = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
+ time_t now = 0;
br_private_t *priv = NULL;
struct br_scrubber *fsscrub = NULL;
struct br_monitor *scrub_monitor = NULL;
@@ -989,7 +979,7 @@ br_fsscan_activate(xlator_t *this)
fsscrub = &priv->fsscrub;
scrub_monitor = &priv->scrub_monitor;
- (void)gettimeofday(&now, NULL);
+ now = gf_time();
timo = br_fsscan_calculate_timeout(fsscrub->frequency);
if (timo == 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG,
@@ -1003,7 +993,7 @@ br_fsscan_activate(xlator_t *this)
}
pthread_mutex_unlock(&scrub_monitor->donelock);
- gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT);
+ gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT);
(void)gf_tw_mod_timer(priv->timer_wheel, scrub_monitor->timer, timo);
_br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_PENDING);
@@ -1020,12 +1010,10 @@ br_fsscan_reschedule(xlator_t *this)
{
int32_t ret = 0;
uint32_t timo = 0;
- char timestr[1024] = {
- 0,
- };
- struct timeval now = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
+ time_t now = 0;
br_private_t *priv = NULL;
struct br_scrubber *fsscrub = NULL;
struct br_monitor *scrub_monitor = NULL;
@@ -1037,7 +1025,7 @@ br_fsscan_reschedule(xlator_t *this)
if (!fsscrub->frequency_reconf)
return 0;
- (void)gettimeofday(&now, NULL);
+ now = gf_time();
timo = br_fsscan_calculate_timeout(fsscrub->frequency);
if (timo == 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG,
@@ -1045,7 +1033,7 @@ br_fsscan_reschedule(xlator_t *this)
return -1;
}
- gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT);
+ gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT);
pthread_mutex_lock(&scrub_monitor->donelock);
{
@@ -1073,23 +1061,19 @@ br_fsscan_ondemand(xlator_t *this)
{
int32_t ret = 0;
uint32_t timo = 0;
- char timestr[1024] = {
- 0,
- };
- struct timeval now = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
+ time_t now = 0;
br_private_t *priv = NULL;
struct br_monitor *scrub_monitor = NULL;
priv = this->private;
scrub_monitor = &priv->scrub_monitor;
- (void)gettimeofday(&now, NULL);
-
+ now = gf_time();
timo = BR_SCRUB_ONDEMAND;
-
- gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT);
+ gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT);
pthread_mutex_lock(&scrub_monitor->donelock);
{
@@ -1799,7 +1783,7 @@ br_collect_bad_objects_of_child(xlator_t *this, br_child_t *child, dict_t *dict,
tmp_count = total_count;
for (j = 0; j < count; j++) {
- len = snprintf(key, PATH_MAX, "quarantine-%d", j);
+ len = snprintf(key, sizeof(key), "quarantine-%d", j);
ret = dict_get_strn(child_dict, key, len, &entry);
if (ret)
continue;
@@ -1810,7 +1794,7 @@ br_collect_bad_objects_of_child(xlator_t *this, br_child_t *child, dict_t *dict,
if ((len < 0) || (len >= PATH_MAX)) {
continue;
}
- snprintf(main_key, PATH_MAX, "quarantine-%d", tmp_count);
+ snprintf(main_key, sizeof(main_key), "quarantine-%d", tmp_count);
ret = dict_set_dynstr_with_alloc(dict, main_key, tmp);
if (!ret)
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
index 8d2b7f051da..6c15a166f18 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
@@ -44,7 +44,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED,
BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL,
BRS_MSG_USING_DEFAULT_THREAD_SIZE, BRS_MSG_ALLOC_MEM_FAILED,
BRS_MSG_DICT_ALLOC_FAILED, BRS_MSG_CREATE_GF_DIRENT_FAILED,
- BRS_MSG_ALLOC_FAILED, BRS_MSG_PATH_XATTR_GET_FAILED);
+ BRS_MSG_ALLOC_FAILED, BRS_MSG_PATH_XATTR_GET_FAILED,
+ BRS_MSG_VERSION_PREPARE_FAIL);
#define BRS_MSG_MEM_ACNT_FAILED_STR "Memory accounting init failed"
#define BRS_MSG_BAD_OBJ_THREAD_FAIL_STR "pthread_init failed"
@@ -68,6 +69,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED,
"daemon. Unwinding the fop"
#define BRS_MSG_SIGN_PREPARE_FAIL_STR \
"failed to prepare the signature. Unwinding the fop"
+#define BRS_MSG_VERSION_PREPARE_FAIL_STR \
+ "failed to prepare the version. Unwinding the fop"
#define BRS_MSG_STUB_ALLOC_FAILED_STR "failed to allocate stub fop, Unwinding"
#define BRS_MSG_BAD_OBJ_MARK_FAIL_STR "failed to mark object as bad"
#define BRS_MSG_NON_SCRUB_BAD_OBJ_MARK_STR \
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
index 605a5e4c3e4..447dd47ff41 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
@@ -424,8 +424,8 @@ br_stub_prepare_version_request(xlator_t *this, dict_t *dict,
priv = this->private;
br_set_ongoingversion(obuf, oversion, priv->boot);
- return dict_set_static_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf,
- sizeof(br_version_t));
+ return dict_set_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf,
+ sizeof(br_version_t));
}
static int
@@ -436,8 +436,7 @@ br_stub_prepare_signing_request(dict_t *dict, br_signature_t *sbuf,
br_set_signature(sbuf, sign, signaturelen, &size);
- return dict_set_static_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf,
- size);
+ return dict_set_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf, size);
}
/**
@@ -854,23 +853,27 @@ br_stub_perform_incversioning(xlator_t *this, call_frame_t *frame,
op_errno = ENOMEM;
dict = dict_new();
if (!dict)
- goto done;
+ goto out;
ret = br_stub_alloc_versions(&obuf, NULL, 0);
- if (ret)
- goto dealloc_dict;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ goto out;
+ }
ret = br_stub_prepare_version_request(this, dict, obuf, writeback_version);
- if (ret)
- goto dealloc_versions;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_VERSION_PREPARE_FAIL,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ br_stub_dealloc_versions(obuf);
+ goto out;
+ }
ret = br_stub_fd_versioning(
this, frame, stub, dict, fd, br_stub_fd_incversioning_cbk,
writeback_version, BR_STUB_INCREMENTAL_VERSIONING, !WRITEBACK_DURABLE);
-
-dealloc_versions:
- br_stub_dealloc_versions(obuf);
-dealloc_dict:
- dict_unref(dict);
-done:
+out:
+ if (dict)
+ dict_unref(dict);
if (ret) {
if (local)
frame->local = NULL;
@@ -1025,31 +1028,36 @@ static int
br_stub_prepare_signature(xlator_t *this, dict_t *dict, inode_t *inode,
br_isignature_t *sign, int *fakesuccess)
{
- int32_t ret = 0;
+ int32_t ret = -1;
size_t signaturelen = 0;
br_signature_t *sbuf = NULL;
if (!br_is_signature_type_valid(sign->signaturetype))
- goto error_return;
+ goto out;
signaturelen = sign->signaturelen;
ret = br_stub_alloc_versions(NULL, &sbuf, signaturelen);
- if (ret)
- goto error_return;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
+ ret = -1;
+ goto out;
+ }
ret = br_stub_prepare_signing_request(dict, sbuf, sign, signaturelen);
- if (ret)
- goto dealloc_versions;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SIGN_PREPARE_FAIL,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
+ ret = -1;
+ br_stub_dealloc_versions(sbuf);
+ goto out;
+ }
+ /* At this point sbuf has been added to dict, so the memory will be freed
+ * when the data from the dict is destroyed
+ */
ret = br_stub_compare_sign_version(this, inode, sbuf, dict, fakesuccess);
- if (ret)
- goto dealloc_versions;
-
- return 0;
-
-dealloc_versions:
- br_stub_dealloc_versions(sbuf);
-error_return:
- return -1;
+out:
+ return ret;
}
static void
diff --git a/xlators/features/changelog/src/changelog-helpers.c b/xlators/features/changelog/src/changelog-helpers.c
index 71fe1f032a0..e561997d858 100644
--- a/xlators/features/changelog/src/changelog-helpers.c
+++ b/xlators/features/changelog/src/changelog-helpers.c
@@ -242,8 +242,7 @@ changelog_write(int fd, char *buffer, size_t len)
}
int
-htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts,
- char *buffer)
+htime_update(xlator_t *this, changelog_priv_t *priv, time_t ts, char *buffer)
{
char changelog_path[PATH_MAX + 1] = {
0,
@@ -273,7 +272,7 @@ htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts,
goto out;
}
- len = snprintf(x_value, sizeof(x_value), "%lu:%d", ts,
+ len = snprintf(x_value, sizeof(x_value), "%ld:%d", ts,
priv->rollover_count);
if (len >= sizeof(x_value)) {
ret = -1;
@@ -382,8 +381,7 @@ out:
}
static int
-changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv,
- unsigned long ts)
+changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv, time_t ts)
{
int ret = -1;
int notify = 0;
@@ -421,16 +419,14 @@ changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv,
priv->changelog_fd = -1;
}
- time_t time = (time_t)ts;
-
- /* Get GMT time */
- gmt = gmtime(&time);
+ /* Get GMT time. */
+ gmt = gmtime(&ts);
strftime(yyyymmdd, sizeof(yyyymmdd), "%Y/%m/%d", gmt);
(void)snprintf(ofile, PATH_MAX, "%s/" CHANGELOG_FILE_NAME,
priv->changelog_dir);
- (void)snprintf(nfile, PATH_MAX, "%s/%s/" CHANGELOG_FILE_NAME ".%lu",
+ (void)snprintf(nfile, PATH_MAX, "%s/%s/" CHANGELOG_FILE_NAME ".%ld",
priv->changelog_dir, yyyymmdd, ts);
(void)snprintf(nfile_dir, PATH_MAX, "%s/%s", priv->changelog_dir, yyyymmdd);
@@ -593,7 +589,7 @@ out:
* returns -1 on failure or error
*/
int
-htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
+htime_open(xlator_t *this, changelog_priv_t *priv, time_t ts)
{
int ht_file_fd = -1;
int ht_dir_fd = -1;
@@ -723,7 +719,7 @@ out:
* returns -1 on failure or error
*/
int
-htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
+htime_create(xlator_t *this, changelog_priv_t *priv, time_t ts)
{
int ht_file_fd = -1;
int ht_dir_fd = -1;
@@ -741,12 +737,12 @@ htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
int32_t len = 0;
gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_NEW_HTIME_FILE,
- "name=%lu", ts, NULL);
+ "name=%ld", ts, NULL);
CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, ht_dir_path);
/* get the htime file name in ht_file_path */
- len = snprintf(ht_file_path, PATH_MAX, "%s/%s.%lu", ht_dir_path,
+ len = snprintf(ht_file_path, PATH_MAX, "%s/%s.%ld", ht_dir_path,
HTIME_FILE_NAME, ts);
if ((len < 0) || (len >= PATH_MAX)) {
ret = -1;
@@ -792,7 +788,7 @@ htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
goto out;
}
- (void)snprintf(ht_file_bname, sizeof(ht_file_bname), "%s.%lu",
+ (void)snprintf(ht_file_bname, sizeof(ht_file_bname), "%s.%ld",
HTIME_FILE_NAME, ts);
if (sys_fsetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname,
strlen(ht_file_bname), 0)) {
@@ -963,8 +959,8 @@ out:
}
int
-changelog_start_next_change(xlator_t *this, changelog_priv_t *priv,
- unsigned long ts, gf_boolean_t finale)
+changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, time_t ts,
+ gf_boolean_t finale)
{
int ret = -1;
@@ -985,21 +981,12 @@ changelog_entry_length()
return sizeof(changelog_log_data_t);
}
-int
+void
changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last)
{
- struct timeval tv = {
- 0,
- };
-
cld->cld_type = CHANGELOG_TYPE_ROLLOVER;
-
- if (gettimeofday(&tv, NULL))
- return -1;
-
- cld->cld_roll_time = (unsigned long)tv.tv_sec;
+ cld->cld_roll_time = gf_time();
cld->cld_finale = is_last;
- return 0;
}
int
@@ -1274,7 +1261,7 @@ changelog_rollover(void *data)
while (1) {
(void)pthread_testcancel();
- tv.tv_sec = time(NULL) + priv->rollover_time;
+ tv.tv_sec = gf_time() + priv->rollover_time;
tv.tv_nsec = 0;
ret = 0; /* Reset ret to zero */
@@ -1355,12 +1342,7 @@ changelog_rollover(void *data)
if (priv->explicit_rollover == _gf_true)
sleep(1);
- ret = changelog_fill_rollover_data(&cld, _gf_false);
- if (ret) {
- gf_smsg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_ROLLOVER_DATA_FILL_FAILED, NULL);
- continue;
- }
+ changelog_fill_rollover_data(&cld, _gf_false);
_mask_cancellation();
diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h
index 0d06d98c9e1..38fa7590c32 100644
--- a/xlators/features/changelog/src/changelog-helpers.h
+++ b/xlators/features/changelog/src/changelog-helpers.h
@@ -31,7 +31,7 @@
*/
typedef struct changelog_log_data {
/* rollover related */
- unsigned long cld_roll_time;
+ time_t cld_roll_time;
/* reopen changelog? */
gf_boolean_t cld_finale;
@@ -97,12 +97,6 @@ struct changelog_encoder {
typedef struct changelog_time_slice {
/**
- * just in case we need nanosecond granularity some day.
- * field is unused as of now (maybe we'd need it later).
- */
- struct timeval tv_start;
-
- /**
* version of changelog file, incremented each time changes
* rollover.
*/
@@ -423,11 +417,11 @@ changelog_local_t *
changelog_local_init(xlator_t *this, inode_t *inode, uuid_t gfid,
int xtra_records, gf_boolean_t update_flag);
int
-changelog_start_next_change(xlator_t *this, changelog_priv_t *priv,
- unsigned long ts, gf_boolean_t finale);
+changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, time_t ts,
+ gf_boolean_t finale);
int
changelog_open_journal(xlator_t *this, changelog_priv_t *priv);
-int
+void
changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last);
int
changelog_inject_single_event(xlator_t *this, changelog_priv_t *priv,
@@ -451,12 +445,11 @@ changelog_fsync_thread(void *data);
int
changelog_forget(xlator_t *this, inode_t *inode);
int
-htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts,
- char *buffer);
+htime_update(xlator_t *this, changelog_priv_t *priv, time_t ts, char *buffer);
int
-htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts);
+htime_open(xlator_t *this, changelog_priv_t *priv, time_t ts);
int
-htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts);
+htime_create(xlator_t *this, changelog_priv_t *priv, time_t ts);
/* Geo-Rep snapshot dependency changes */
void
diff --git a/xlators/features/changelog/src/changelog-messages.h b/xlators/features/changelog/src/changelog-messages.h
index 4dd56b8ee97..cb0e16c85d8 100644
--- a/xlators/features/changelog/src/changelog-messages.h
+++ b/xlators/features/changelog/src/changelog-messages.h
@@ -59,12 +59,12 @@ GLFS_MSGID(
CHANGELOG_MSG_NO_HTIME_CURRENT, CHANGELOG_MSG_HTIME_CURRENT,
CHANGELOG_MSG_NEW_HTIME_FILE, CHANGELOG_MSG_MKDIR_ERROR,
CHANGELOG_MSG_PATH_NOT_FOUND, CHANGELOG_MSG_XATTR_INIT_FAILED,
- CHANGELOG_MSG_WROTE_TO_CSNAP, CHANGELOG_MSG_ROLLOVER_DATA_FILL_FAILED,
+ CHANGELOG_MSG_WROTE_TO_CSNAP, CHANGELOG_MSG_UNUSED_0,
CHANGELOG_MSG_GET_BUFFER_FAILED, CHANGELOG_MSG_BARRIER_STATE_NOTIFY,
CHANGELOG_MSG_BARRIER_DISABLED, CHANGELOG_MSG_BARRIER_ALREADY_DISABLED,
CHANGELOG_MSG_BARRIER_ON_ERROR, CHANGELOG_MSG_BARRIER_ENABLE,
CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND, CHANGELOG_MSG_ERROR_IN_DICT_GET,
- CHANGELOG_MSG_GET_TIME_FAILURE, CHANGELOG_MSG_HTIME_FETCH_FAILED,
+ CHANGELOG_MSG_UNUSED_1, CHANGELOG_MSG_UNUSED_2,
CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS,
CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_FINISHED,
CHANGELOG_MSG_BARRIER_TIMEOUT, CHANGELOG_MSG_TIMEOUT_ADD_FAILED,
@@ -123,8 +123,6 @@ GLFS_MSGID(
#define CHANGELOG_MSG_GET_TIME_OP_FAILED_STR "Problem rolling over changelog(s)"
#define CHANGELOG_MSG_BARRIER_INFO_STR "Explicit wakeup on barrier notify"
#define CHANGELOG_MSG_SELECT_FAILED_STR "pthread_cond_timedwait failed"
-#define CHANGELOG_MSG_ROLLOVER_DATA_FILL_FAILED_STR \
- "failed to fill rollover data"
#define CHANGELOG_MSG_INJECT_FSYNC_FAILED_STR "failed to inject fsync event"
#define CHANGELOG_MSG_LOCAL_INIT_FAILED_STR \
"changelog local initialization failed"
@@ -144,9 +142,7 @@ GLFS_MSGID(
#define CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND_STR "barrier key not found"
#define CHANGELOG_MSG_ERROR_IN_DICT_GET_STR \
"Something went wrong in dict_get_str_boolean"
-#define CHANGELOG_MSG_GET_TIME_FAILURE_STR "gettimeofday() failure"
#define CHANGELOG_MSG_DIR_OPTIONS_NOT_SET_STR "changelog-dir option is not set"
-#define CHANGELOG_MSG_HTIME_FETCH_FAILED_STR "unable to fetch htime"
#define CHANGELOG_MSG_FREEUP_FAILED_STR "could not cleanup bootstrapper"
#define CHANGELOG_MSG_CHILD_MISCONFIGURED_STR \
"translator needs a single subvolume"
diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c
index 37916f40882..6a6e5af859e 100644
--- a/xlators/features/changelog/src/changelog.c
+++ b/xlators/features/changelog/src/changelog.c
@@ -2036,20 +2036,20 @@ notify(xlator_t *this, int event, void *data, ...)
priv->notify_down = _gf_true;
}
UNLOCK(&priv->lock);
- list_for_each_entry_safe(listener, next, &priv->rpc->listeners,
- list)
- {
- if (listener->trans) {
- rpc_transport_unref(listener->trans);
+ if (priv->rpc) {
+ list_for_each_entry_safe(listener, next,
+ &priv->rpc->listeners, list)
+ {
+ if (listener->trans) {
+ rpc_transport_unref(listener->trans);
+ }
}
+ rpcsvc_destroy(priv->rpc);
+ priv->rpc = NULL;
}
CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile,
UNIX_PATH_MAX);
sys_unlink(sockfile);
- if (priv->rpc) {
- rpcsvc_destroy(priv->rpc);
- priv->rpc = NULL;
- }
if (!cleanup_notify)
default_notify(this, GF_EVENT_PARENT_DOWN, data);
}
@@ -2252,23 +2252,11 @@ static int
changelog_init(xlator_t *this, changelog_priv_t *priv)
{
int i = 0;
- int ret = -1;
- struct timeval tv = {
- 0,
- };
+ int ret = 0;
changelog_log_data_t cld = {
0,
};
- ret = gettimeofday(&tv, NULL);
- if (ret) {
- gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_GET_TIME_FAILURE,
- NULL);
- goto out;
- }
-
- priv->slice.tv_start = tv;
-
priv->maps[CHANGELOG_TYPE_DATA] = "D ";
priv->maps[CHANGELOG_TYPE_METADATA] = "M ";
priv->maps[CHANGELOG_TYPE_METADATA_XATTR] = "M ";
@@ -2287,9 +2275,7 @@ changelog_init(xlator_t *this, changelog_priv_t *priv)
* in case there was an encoding change. so... things are kept
* simple here.
*/
- ret = changelog_fill_rollover_data(&cld, _gf_false);
- if (ret)
- goto out;
+ changelog_fill_rollover_data(&cld, _gf_false);
ret = htime_open(this, priv, cld.cld_roll_time);
/* call htime open with cld's rollover_time */
@@ -2470,9 +2456,6 @@ reconfigure(xlator_t *this, dict_t *options)
char csnap_dir[PATH_MAX] = {
0,
};
- struct timeval tv = {
- 0,
- };
uint32_t timeout = 0;
priv = this->private;
@@ -2564,9 +2547,7 @@ reconfigure(xlator_t *this, dict_t *options)
out);
if (active_now || active_earlier) {
- ret = changelog_fill_rollover_data(&cld, !active_now);
- if (ret)
- goto out;
+ changelog_fill_rollover_data(&cld, !active_now);
slice = &priv->slice;
@@ -2585,13 +2566,7 @@ reconfigure(xlator_t *this, dict_t *options)
if (!active_earlier) {
gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_RECONFIGURE,
NULL);
- if (gettimeofday(&tv, NULL)) {
- gf_smsg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_HTIME_FETCH_FAILED, NULL);
- ret = -1;
- goto out;
- }
- htime_create(this, priv, tv.tv_sec);
+ htime_create(this, priv, gf_time());
}
ret = changelog_spawn_helper_threads(this, priv);
}
diff --git a/xlators/features/cloudsync/src/Makefile.am b/xlators/features/cloudsync/src/Makefile.am
index 0c3966c968b..e2a277e372b 100644
--- a/xlators/features/cloudsync/src/Makefile.am
+++ b/xlators/features/cloudsync/src/Makefile.am
@@ -21,9 +21,9 @@ cloudsync_la_SOURCES = $(cloudsync_sources) $(cloudsynccommon_sources)
nodist_cloudsync_la_SOURCES = cloudsync-autogen-fops.c cloudsync-autogen-fops.h
BUILT_SOURCES = cloudsync-autogen-fops.h
-cloudsync_la_LDFLAGS = $(LIB_DL) -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+cloudsync_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-cloudsync_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+cloudsync_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIB_DL)
AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
-DCS_PLUGINDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/cloudsync-plugins\"
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c
index 7680260988b..23c3599825a 100644
--- a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c
@@ -237,7 +237,7 @@ aws_form_request(char *resource, char **date, char *reqtype, char *bucketid,
int date_len = -1;
int res_len = -1;
- ctime = time(NULL);
+ ctime = gf_time();
gtime = gmtime(&ctime);
date_len = strftime(httpdate, sizeof(httpdate),
diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c
index 4ece7ff6fc8..4abb2c73ce5 100644
--- a/xlators/features/index/src/index.c
+++ b/xlators/features/index/src/index.c
@@ -2104,7 +2104,7 @@ index_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
worker_enqueue(this, stub);
return 0;
normal:
- ret = dict_get_str(xattr_req, "link-count", &flag);
+ ret = dict_get_str_sizen(xattr_req, "link-count", &flag);
if ((ret == 0) && (strcmp(flag, GF_XATTROP_INDEX_COUNT) == 0)) {
STACK_WIND(frame, index_lookup_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
@@ -2592,7 +2592,7 @@ notify(xlator_t *this, int event, void *data, ...)
if ((event == GF_EVENT_PARENT_DOWN) && victim->cleanup_starting) {
stub_cnt = GF_ATOMIC_GET(priv->stub_cnt);
- clock_gettime(CLOCK_REALTIME, &sleep_till);
+ timespec_now_realtime(&sleep_till);
sleep_till.tv_sec += 1;
/* Wait for draining stub from queue before notify PARENT_DOWN */
diff --git a/xlators/features/leases/src/leases-internal.c b/xlators/features/leases/src/leases-internal.c
index 67fdd53cee2..56dee244281 100644
--- a/xlators/features/leases/src/leases-internal.c
+++ b/xlators/features/leases/src/leases-internal.c
@@ -897,7 +897,7 @@ __recall_lease(xlator_t *this, lease_inode_ctx_t *lease_ctx)
}
priv = this->private;
- recall_time = time(NULL);
+ recall_time = gf_time();
list_for_each_entry_safe(lease_entry, tmp, &lease_ctx->lease_id_list,
lease_id_list)
{
@@ -1367,7 +1367,7 @@ expired_recall_cleanup(void *data)
gf_msg_debug(this->name, 0, "Started the expired_recall_cleanup thread");
while (1) {
- time_now = time(NULL);
+ time_now = gf_time();
pthread_mutex_lock(&priv->mutex);
{
if (priv->fini) {
diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c
index 116aed68690..ab1eac68a53 100644
--- a/xlators/features/locks/src/clear.c
+++ b/xlators/features/locks/src/clear.c
@@ -181,9 +181,9 @@ clrlk_clear_posixlk(xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args,
if (plock->blocked) {
bcount++;
pl_trace_out(this, plock->frame, NULL, NULL, F_SETLKW,
- &plock->user_flock, -1, EAGAIN, NULL);
+ &plock->user_flock, -1, EINTR, NULL);
- STACK_UNWIND_STRICT(lk, plock->frame, -1, EAGAIN,
+ STACK_UNWIND_STRICT(lk, plock->frame, -1, EINTR,
&plock->user_flock, NULL);
} else {
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
index 4c6b78c2372..a2c6be93e03 100644
--- a/xlators/features/locks/src/common.c
+++ b/xlators/features/locks/src/common.c
@@ -460,11 +460,16 @@ pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local)
INIT_LIST_HEAD(&pl_inode->blocked_calls);
INIT_LIST_HEAD(&pl_inode->metalk_list);
INIT_LIST_HEAD(&pl_inode->queued_locks);
+ INIT_LIST_HEAD(&pl_inode->waiting);
gf_uuid_copy(pl_inode->gfid, inode->gfid);
pl_inode->check_mlock_info = _gf_true;
pl_inode->mlock_enforced = _gf_false;
+ /* -2 means never looked up. -1 means something went wrong and link
+ * tracking is disabled. */
+ pl_inode->links = -2;
+
ret = __inode_ctx_put(inode, this, (uint64_t)(long)(pl_inode));
if (ret) {
pthread_mutex_destroy(&pl_inode->mutex);
@@ -600,13 +605,11 @@ static void
__insert_lock(pl_inode_t *pl_inode, posix_lock_t *lock)
{
if (lock->blocked)
- gettimeofday(&lock->blkd_time, NULL);
+ lock->blkd_time = gf_time();
else
- gettimeofday(&lock->granted_time, NULL);
+ lock->granted_time = gf_time();
list_add_tail(&lock->list, &pl_inode->ext_list);
-
- return;
}
/* Return true if the locks overlap, false otherwise */
@@ -1290,3 +1293,299 @@ pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client)
}
return _gf_true;
}
+
+static int32_t
+pl_inode_from_loc(loc_t *loc, inode_t **pinode)
+{
+ inode_t *inode = NULL;
+ int32_t error = 0;
+
+ if (loc->inode != NULL) {
+ inode = inode_ref(loc->inode);
+ goto done;
+ }
+
+ if (loc->parent == NULL) {
+ error = EINVAL;
+ goto done;
+ }
+
+ if (!gf_uuid_is_null(loc->gfid)) {
+ inode = inode_find(loc->parent->table, loc->gfid);
+ if (inode != NULL) {
+ goto done;
+ }
+ }
+
+ if (loc->name == NULL) {
+ error = EINVAL;
+ goto done;
+ }
+
+ inode = inode_grep(loc->parent->table, loc->parent, loc->name);
+ if (inode == NULL) {
+ /* We haven't found any inode. This means that the file doesn't exist
+ * or that even if it exists, we don't have any knowledge about it, so
+ * we don't have locks on it either, which is fine for our purposes. */
+ goto done;
+ }
+
+done:
+ *pinode = inode;
+
+ return error;
+}
+
+static gf_boolean_t
+pl_inode_has_owners(xlator_t *xl, client_t *client, pl_inode_t *pl_inode,
+ struct timespec *now, struct list_head *contend)
+{
+ pl_dom_list_t *dom;
+ pl_inode_lock_t *lock;
+ gf_boolean_t has_owners = _gf_false;
+
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+ {
+ list_for_each_entry(lock, &dom->inodelk_list, list)
+ {
+ /* If the lock belongs to the same client, we assume it's related
+ * to the same operation, so we allow the removal to continue. */
+ if (lock->client == client) {
+ continue;
+ }
+ /* If the lock belongs to an internal process, we don't block the
+ * removal. */
+ if (lock->client_pid < 0) {
+ continue;
+ }
+ if (contend == NULL) {
+ return _gf_true;
+ }
+ has_owners = _gf_true;
+ inodelk_contention_notify_check(xl, lock, now, contend);
+ }
+ }
+
+ return has_owners;
+}
+
+int32_t
+pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc,
+ pl_inode_t **ppl_inode, struct list_head *contend)
+{
+ struct timespec now;
+ inode_t *inode;
+ pl_inode_t *pl_inode;
+ int32_t error;
+
+ pl_inode = NULL;
+
+ error = pl_inode_from_loc(loc, &inode);
+ if ((error != 0) || (inode == NULL)) {
+ goto done;
+ }
+
+ pl_inode = pl_inode_get(xl, inode, NULL);
+ if (pl_inode == NULL) {
+ inode_unref(inode);
+ error = ENOMEM;
+ goto done;
+ }
+
+ /* pl_inode_from_loc() already increments ref count for inode, so
+ * we only assign here our reference. */
+ pl_inode->inode = inode;
+
+ timespec_now(&now);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ if (pl_inode->removed) {
+ error = ESTALE;
+ goto unlock;
+ }
+
+ if (pl_inode_has_owners(xl, frame->root->client, pl_inode, &now, contend)) {
+ error = -1;
+ /* We skip the unlock here because the caller must create a stub when
+ * we return -1 and do a call to pl_inode_remove_complete(), which
+ * assumes the lock is still acquired and will release it once
+ * everything else is prepared. */
+ goto done;
+ }
+
+ pl_inode->is_locked = _gf_true;
+ pl_inode->remove_running++;
+
+unlock:
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+done:
+ *ppl_inode = pl_inode;
+
+ return error;
+}
+
+int32_t
+pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub,
+ struct list_head *contend)
+{
+ pl_inode_lock_t *lock;
+ int32_t error = -1;
+
+ if (stub != NULL) {
+ list_add_tail(&stub->list, &pl_inode->waiting);
+ pl_inode->is_locked = _gf_true;
+ } else {
+ error = ENOMEM;
+
+ while (!list_empty(contend)) {
+ lock = list_first_entry(contend, pl_inode_lock_t, list);
+ list_del_init(&lock->list);
+ __pl_inodelk_unref(lock);
+ }
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ if (error < 0) {
+ inodelk_contention_notify(xl, contend);
+ }
+
+ inode_unref(pl_inode->inode);
+
+ return error;
+}
+
+void
+pl_inode_remove_wake(struct list_head *list)
+{
+ call_stub_t *stub;
+
+ while (!list_empty(list)) {
+ stub = list_first_entry(list, call_stub_t, list);
+ list_del_init(&stub->list);
+
+ call_resume(stub);
+ }
+}
+
+void
+pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error)
+{
+ struct list_head contend, granted;
+ struct timespec now;
+ pl_dom_list_t *dom;
+
+ if (pl_inode == NULL) {
+ return;
+ }
+
+ INIT_LIST_HEAD(&contend);
+ INIT_LIST_HEAD(&granted);
+ timespec_now(&now);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ if (error == 0) {
+ if (pl_inode->links >= 0) {
+ pl_inode->links--;
+ }
+ if (pl_inode->links == 0) {
+ pl_inode->removed = _gf_true;
+ }
+ }
+
+ pl_inode->remove_running--;
+
+ if ((pl_inode->remove_running == 0) && list_empty(&pl_inode->waiting)) {
+ pl_inode->is_locked = _gf_false;
+
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+ {
+ __grant_blocked_inode_locks(xl, pl_inode, &granted, dom, &now,
+ &contend);
+ }
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ unwind_granted_inodes(xl, pl_inode, &granted);
+
+ inodelk_contention_notify(xl, &contend);
+
+ inode_unref(pl_inode->inode);
+}
+
+void
+pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode,
+ struct list_head *list)
+{
+ call_stub_t *stub, *tmp;
+
+ if (!pl_inode->is_locked) {
+ return;
+ }
+
+ list_for_each_entry_safe(stub, tmp, &pl_inode->waiting, list)
+ {
+ if (!pl_inode_has_owners(xl, stub->frame->root->client, pl_inode, NULL,
+ NULL)) {
+ list_move_tail(&stub->list, list);
+ }
+ }
+}
+
+/* This function determines if an inodelk attempt can be done now or it needs
+ * to wait.
+ *
+ * Possible return values:
+ * < 0: An error occurred. Currently only -ESTALE can be returned if the
+ * inode has been deleted previously by unlink/rmdir/rename
+ * = 0: The lock can be attempted.
+ * > 0: The lock needs to wait because a conflicting remove operation is
+ * ongoing.
+ */
+int32_t
+pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock)
+{
+ pl_dom_list_t *dom;
+ pl_inode_lock_t *ilock;
+
+ /* If the inode has been deleted, we won't allow any lock. */
+ if (pl_inode->removed) {
+ return -ESTALE;
+ }
+
+ /* We only synchronize with locks made for regular operations coming from
+ * the user. Locks done for internal purposes are hard to control and could
+ * lead to long delays or deadlocks quite easily. */
+ if (lock->client_pid < 0) {
+ return 0;
+ }
+ if (!pl_inode->is_locked) {
+ return 0;
+ }
+ if (pl_inode->remove_running > 0) {
+ return 1;
+ }
+
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+ {
+ list_for_each_entry(ilock, &dom->inodelk_list, list)
+ {
+ /* If a lock from the same client is already granted, we allow this
+ * one to continue. This is necessary to prevent deadlocks when
+ * multiple locks are taken for the same operation.
+ *
+ * On the other side it's unlikely that the same client sends
+ * completely unrelated locks for the same inode.
+ */
+ if (ilock->client == lock->client) {
+ return 0;
+ }
+ }
+ }
+
+ return 1;
+}
diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h
index 0916c299e84..281223bf3b8 100644
--- a/xlators/features/locks/src/common.h
+++ b/xlators/features/locks/src/common.h
@@ -105,6 +105,15 @@ void
__pl_inodelk_unref(pl_inode_lock_t *lock);
void
+__grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted, pl_dom_list_t *dom,
+ struct timespec *now, struct list_head *contend);
+
+void
+unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted);
+
+void
grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode,
pl_dom_list_t *dom, struct timespec *now,
struct list_head *contend);
@@ -204,6 +213,16 @@ pl_metalock_is_active(pl_inode_t *pl_inode);
void
__pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock);
+void
+inodelk_contention_notify_check(xlator_t *xl, pl_inode_lock_t *lock,
+ struct timespec *now,
+ struct list_head *contend);
+
+void
+entrylk_contention_notify_check(xlator_t *xl, pl_entry_lock_t *lock,
+ struct timespec *now,
+ struct list_head *contend);
+
gf_boolean_t
pl_does_monkey_want_stuck_lock();
@@ -218,4 +237,26 @@ pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd);
gf_boolean_t
pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client);
+
+int32_t
+pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc,
+ pl_inode_t **ppl_inode, struct list_head *contend);
+
+int32_t
+pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub,
+ struct list_head *contend);
+
+void
+pl_inode_remove_wake(struct list_head *list);
+
+void
+pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error);
+
+void
+pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode,
+ struct list_head *list);
+
+int32_t
+pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock);
+
#endif /* __COMMON_H__ */
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
index 0911659b437..fd772c850dd 100644
--- a/xlators/features/locks/src/entrylk.c
+++ b/xlators/features/locks/src/entrylk.c
@@ -121,7 +121,6 @@ __stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock,
pl_entry_lock_t *requested_lock, time_t *lock_age_sec)
{
posix_locks_private_t *priv = NULL;
- struct timeval curr;
priv = this->private;
@@ -129,8 +128,7 @@ __stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock,
* chance? Or just the locks we are attempting to acquire?
*/
if (names_conflict(candidate_lock->basename, requested_lock->basename)) {
- gettimeofday(&curr, NULL);
- *lock_age_sec = curr.tv_sec - candidate_lock->granted_time.tv_sec;
+ *lock_age_sec = gf_time() - candidate_lock->granted_time;
if (*lock_age_sec > priv->revocation_secs)
return _gf_true;
}
@@ -204,9 +202,9 @@ out:
return revoke_lock;
}
-static gf_boolean_t
-__entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
- struct timespec *now)
+void
+entrylk_contention_notify_check(xlator_t *this, pl_entry_lock_t *lock,
+ struct timespec *now, struct list_head *contend)
{
posix_locks_private_t *priv;
int64_t elapsed;
@@ -216,7 +214,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
/* If this lock is in a list, it means that we are about to send a
* notification for it, so no need to do anything else. */
if (!list_empty(&lock->contend)) {
- return _gf_false;
+ return;
}
elapsed = now->tv_sec;
@@ -225,7 +223,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
elapsed--;
}
if (elapsed < priv->notify_contention_delay) {
- return _gf_false;
+ return;
}
/* All contention notifications will be sent outside of the locked
@@ -238,7 +236,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
lock->contention_time = *now;
- return _gf_true;
+ list_add_tail(&lock->contend, contend);
}
void
@@ -332,9 +330,7 @@ __entrylk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_entry_lock_t *lock,
break;
}
}
- if (__entrylk_needs_contention_notify(this, tmp, now)) {
- list_add_tail(&tmp->contend, contend);
- }
+ entrylk_contention_notify_check(this, tmp, now, contend);
}
}
@@ -546,14 +542,10 @@ static int
__lock_blocked_add(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom,
pl_entry_lock_t *lock, int nonblock)
{
- struct timeval now;
-
if (nonblock)
goto out;
- gettimeofday(&now, NULL);
-
- lock->blkd_time = now;
+ lock->blkd_time = gf_time();
list_add_tail(&lock->blocked_locks, &dom->blocked_entrylks);
gf_msg_trace(this->name, 0, "Blocking lock: {pinode=%p, basename=%s}",
@@ -614,7 +606,7 @@ __lock_entrylk(xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock,
}
__pl_entrylk_ref(lock);
- gettimeofday(&lock->granted_time, NULL);
+ lock->granted_time = gf_time();
list_add(&lock->domain_list, &dom->entrylk_list);
ret = 0;
@@ -697,10 +689,9 @@ __grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode,
bl_ret = __lock_entrylk(bl->this, pl_inode, bl, 0, dom, now, contend);
if (bl_ret == 0) {
- list_add(&bl->blocked_locks, granted);
+ list_add_tail(&bl->blocked_locks, granted);
}
}
- return;
}
/* Grants locks if possible which are blocked on a lock */
diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
index e0e3b8f1f2d..d4e51d6e0a1 100644
--- a/xlators/features/locks/src/inodelk.c
+++ b/xlators/features/locks/src/inodelk.c
@@ -140,15 +140,13 @@ __stale_inodelk(xlator_t *this, pl_inode_lock_t *candidate_lock,
pl_inode_lock_t *requested_lock, time_t *lock_age_sec)
{
posix_locks_private_t *priv = NULL;
- struct timeval curr;
priv = this->private;
/* Question: Should we just prune them all given the
* chance? Or just the locks we are attempting to acquire?
*/
if (inodelk_conflict(candidate_lock, requested_lock)) {
- gettimeofday(&curr, NULL);
- *lock_age_sec = curr.tv_sec - candidate_lock->granted_time.tv_sec;
+ *lock_age_sec = gf_time() - candidate_lock->granted_time;
if (*lock_age_sec > priv->revocation_secs)
return _gf_true;
}
@@ -229,9 +227,9 @@ out:
return revoke_lock;
}
-static gf_boolean_t
-__inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
- struct timespec *now)
+void
+inodelk_contention_notify_check(xlator_t *this, pl_inode_lock_t *lock,
+ struct timespec *now, struct list_head *contend)
{
posix_locks_private_t *priv;
int64_t elapsed;
@@ -241,7 +239,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
/* If this lock is in a list, it means that we are about to send a
* notification for it, so no need to do anything else. */
if (!list_empty(&lock->contend)) {
- return _gf_false;
+ return;
}
elapsed = now->tv_sec;
@@ -250,7 +248,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
elapsed--;
}
if (elapsed < priv->notify_contention_delay) {
- return _gf_false;
+ return;
}
/* All contention notifications will be sent outside of the locked
@@ -263,7 +261,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
lock->contention_time = *now;
- return _gf_true;
+ list_add_tail(&lock->contend, contend);
}
void
@@ -351,9 +349,7 @@ __inodelk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
break;
}
}
- if (__inodelk_needs_contention_notify(this, l, now)) {
- list_add_tail(&l->contend, contend);
- }
+ inodelk_contention_notify_check(this, l, now, contend);
}
}
@@ -399,15 +395,11 @@ static int
__lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
int can_block)
{
- struct timeval now;
-
if (can_block == 0) {
goto out;
}
- gettimeofday(&now, NULL);
-
- lock->blkd_time = now;
+ lock->blkd_time = gf_time();
list_add_tail(&lock->blocked_locks, &dom->blocked_inodelks);
gf_msg_trace(this->name, 0,
@@ -433,12 +425,17 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
struct list_head *contend)
{
pl_inode_lock_t *conf = NULL;
- int ret = -EINVAL;
+ int ret;
- conf = __inodelk_grantable(this, dom, lock, now, contend);
- if (conf) {
- ret = __lock_blocked_add(this, dom, lock, can_block);
- goto out;
+ ret = pl_inode_remove_inodelk(pl_inode, lock);
+ if (ret < 0) {
+ return ret;
+ }
+ if (ret == 0) {
+ conf = __inodelk_grantable(this, dom, lock, now, contend);
+ }
+ if ((ret > 0) || (conf != NULL)) {
+ return __lock_blocked_add(this, dom, lock, can_block);
}
/* To prevent blocked locks starvation, check if there are any blocked
@@ -460,17 +457,13 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
"starvation");
}
- ret = __lock_blocked_add(this, dom, lock, can_block);
- goto out;
+ return __lock_blocked_add(this, dom, lock, can_block);
}
__pl_inodelk_ref(lock);
- gettimeofday(&lock->granted_time, NULL);
+ lock->granted_time = gf_time();
list_add(&lock->list, &dom->inodelk_list);
- ret = 0;
-
-out:
- return ret;
+ return 0;
}
/* Return true if the two inodelks have exactly same lock boundaries */
@@ -527,12 +520,11 @@ out:
return conf;
}
-static void
+void
__grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
struct list_head *granted, pl_dom_list_t *dom,
struct timespec *now, struct list_head *contend)
{
- int bl_ret = 0;
pl_inode_lock_t *bl = NULL;
pl_inode_lock_t *tmp = NULL;
@@ -545,52 +537,48 @@ __grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
{
list_del_init(&bl->blocked_locks);
- bl_ret = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend);
+ bl->status = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend);
- if (bl_ret == 0) {
- list_add(&bl->blocked_locks, granted);
+ if (bl->status != -EAGAIN) {
+ list_add_tail(&bl->blocked_locks, granted);
}
}
- return;
}
-/* Grant all inodelks blocked on a lock */
void
-grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
- pl_dom_list_t *dom, struct timespec *now,
- struct list_head *contend)
+unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted)
{
- struct list_head granted;
pl_inode_lock_t *lock;
pl_inode_lock_t *tmp;
+ int32_t op_ret;
+ int32_t op_errno;
- INIT_LIST_HEAD(&granted);
-
- pthread_mutex_lock(&pl_inode->mutex);
- {
- __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now,
- contend);
- }
- pthread_mutex_unlock(&pl_inode->mutex);
-
- list_for_each_entry_safe(lock, tmp, &granted, blocked_locks)
+ list_for_each_entry_safe(lock, tmp, granted, blocked_locks)
{
- gf_log(this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => Granted",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid,
- lkowner_utoa(&lock->owner), lock->user_flock.l_start,
- lock->user_flock.l_len);
-
+ if (lock->status == 0) {
+ op_ret = 0;
+ op_errno = 0;
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64
+ " => Granted",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid, lkowner_utoa(&lock->owner),
+ lock->user_flock.l_start, lock->user_flock.l_len);
+ } else {
+ op_ret = -1;
+ op_errno = -lock->status;
+ }
pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock,
- 0, 0, lock->volume);
+ op_ret, op_errno, lock->volume);
- STACK_UNWIND_STRICT(inodelk, lock->frame, 0, 0, NULL);
+ STACK_UNWIND_STRICT(inodelk, lock->frame, op_ret, op_errno, NULL);
lock->frame = NULL;
}
pthread_mutex_lock(&pl_inode->mutex);
{
- list_for_each_entry_safe(lock, tmp, &granted, blocked_locks)
+ list_for_each_entry_safe(lock, tmp, granted, blocked_locks)
{
list_del_init(&lock->blocked_locks);
__pl_inodelk_unref(lock);
@@ -599,6 +587,26 @@ grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
pthread_mutex_unlock(&pl_inode->mutex);
}
+/* Grant all inodelks blocked on a lock */
+void
+grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
+ pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend)
+{
+ struct list_head granted;
+
+ INIT_LIST_HEAD(&granted);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now,
+ contend);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ unwind_granted_inodes(this, pl_inode, &granted);
+}
+
static void
pl_inodelk_log_cleanup(pl_inode_lock_t *lock)
{
@@ -660,7 +668,7 @@ pl_inodelk_client_cleanup(xlator_t *this, pl_ctx_t *ctx)
* and blocked lists, then this means that a parallel
* unlock on another inodelk (L2 say) may have 'granted'
* L1 and added it to 'granted' list in
- * __grant_blocked_node_locks() (although using the
+ * __grant_blocked_inode_locks() (although using the
* 'blocked_locks' member). In that case, the cleanup
* codepath must try and grant other overlapping
* blocked inodelks from other clients, now that L1 is
@@ -745,6 +753,7 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
gf_boolean_t need_inode_unref = _gf_false;
struct list_head *pcontend = NULL;
struct list_head contend;
+ struct list_head wake;
struct timespec now = {};
short fl_type;
@@ -796,6 +805,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
timespec_now(&now);
}
+ INIT_LIST_HEAD(&wake);
+
if (ctx)
pthread_mutex_lock(&ctx->lock);
pthread_mutex_lock(&pl_inode->mutex);
@@ -818,18 +829,17 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid, lkowner_utoa(&lock->owner),
lock->user_flock.l_start, lock->user_flock.l_len);
- if (can_block)
+ if (can_block) {
unref = _gf_false;
- /* For all but the case where a non-blocking
- * lock attempt fails, the extra ref taken at
- * the start of this function must be negated.
- */
- else
- need_inode_unref = _gf_true;
+ }
}
-
- if (ctx && (!ret || can_block))
+ /* For all but the case where a non-blocking lock attempt fails
+ * with -EAGAIN, the extra ref taken at the start of this function
+ * must be negated. */
+ need_inode_unref = (ret != 0) && ((ret != -EAGAIN) || !can_block);
+ if (ctx && !need_inode_unref) {
list_add_tail(&lock->client_list, &ctx->inodelk_lockers);
+ }
} else {
/* Irrespective of whether unlock succeeds or not,
* the extra inode ref that was done at the start of
@@ -847,6 +857,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
list_del_init(&retlock->client_list);
__pl_inodelk_unref(retlock);
+ pl_inode_remove_unlocked(this, pl_inode, &wake);
+
ret = 0;
}
out:
@@ -857,6 +869,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
if (ctx)
pthread_mutex_unlock(&ctx->lock);
+ pl_inode_remove_wake(&wake);
+
/* The following (extra) unref corresponds to the ref that
* was done at the time the lock was granted.
*/
@@ -1037,10 +1051,14 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
inode);
if (ret < 0) {
- if ((can_block) && (F_UNLCK != lock_type)) {
- goto out;
+ if (ret == -EAGAIN) {
+ if (can_block && (F_UNLCK != lock_type)) {
+ goto out;
+ }
+ gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN");
+ } else {
+ gf_log(this->name, GF_LOG_TRACE, "returning %d", ret);
}
- gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN");
op_errno = -ret;
goto unwind;
}
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
index 3305350afb1..c868eb494a2 100644
--- a/xlators/features/locks/src/locks.h
+++ b/xlators/features/locks/src/locks.h
@@ -43,9 +43,8 @@ struct __posix_lock {
fd_t *fd;
call_frame_t *frame;
- struct timeval blkd_time; /*time at which lock was queued into blkd list*/
- struct timeval
- granted_time; /*time at which lock was queued into active list*/
+ time_t blkd_time; /* time at which lock was queued into blkd list */
+ time_t granted_time; /* time at which lock was queued into active list */
/* These two together serve to uniquely identify each process
across nodes */
@@ -85,9 +84,9 @@ struct __pl_inode_lock {
call_frame_t *frame;
- struct timeval blkd_time; /*time at which lock was queued into blkd list*/
- struct timeval
- granted_time; /*time at which lock was queued into active list*/
+ time_t blkd_time; /* time at which lock was queued into blkd list */
+ time_t granted_time; /* time at which lock was queued into active list */
+
/*last time at which lock contention was detected and notified*/
struct timespec contention_time;
@@ -102,6 +101,9 @@ struct __pl_inode_lock {
struct list_head client_list; /* list of all locks from a client */
short fl_type;
+
+ int32_t status; /* Error code when we try to grant a lock in blocked
+ state */
};
typedef struct __pl_inode_lock pl_inode_lock_t;
@@ -136,9 +138,9 @@ struct __entry_lock {
const char *basename;
- struct timeval blkd_time; /*time at which lock was queued into blkd list*/
- struct timeval
- granted_time; /*time at which lock was queued into active list*/
+ time_t blkd_time; /* time at which lock was queued into blkd list */
+ time_t granted_time; /* time at which lock was queued into active list */
+
/*last time at which lock contention was detected and notified*/
struct timespec contention_time;
@@ -164,13 +166,14 @@ struct __pl_inode {
struct list_head rw_list; /* list of waiting r/w requests */
struct list_head reservelk_list; /* list of reservelks */
struct list_head blocked_reservelks; /* list of blocked reservelks */
- struct list_head
- blocked_calls; /* List of blocked lock calls while a reserve is held*/
- struct list_head metalk_list; /* Meta lock list */
- /* This is to store the incoming lock
- requests while meta lock is enabled */
- struct list_head queued_locks;
- int mandatory; /* if mandatory locking is enabled */
+ struct list_head blocked_calls; /* List of blocked lock calls while a
+ reserve is held*/
+ struct list_head metalk_list; /* Meta lock list */
+ struct list_head queued_locks; /* This is to store the incoming lock
+ requests while meta lock is enabled */
+ struct list_head waiting; /* List of pending fops waiting to unlink/rmdir
+ the inode. */
+ int mandatory; /* if mandatory locking is enabled */
inode_t *refkeeper; /* hold refs on an inode while locks are
held to prevent pruning */
@@ -197,7 +200,13 @@ struct __pl_inode {
*/
int fop_wind_count;
pthread_cond_t check_fop_wind_count;
+
gf_boolean_t track_fop_wind_count;
+
+ int32_t links; /* Number of hard links the inode has. */
+ uint32_t remove_running; /* Number of remove operations running. */
+ gf_boolean_t is_locked; /* Regular locks will be blocked. */
+ gf_boolean_t removed; /* The inode has been deleted. */
};
typedef struct __pl_inode pl_inode_t;
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index 8b57627addf..cf0ae4c57dd 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -148,6 +148,29 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);
} \
} while (0)
+#define PL_INODE_REMOVE(_fop, _frame, _xl, _loc1, _loc2, _cont, _cbk, \
+ _args...) \
+ ({ \
+ struct list_head contend; \
+ pl_inode_t *__pl_inode; \
+ call_stub_t *__stub; \
+ int32_t __error; \
+ INIT_LIST_HEAD(&contend); \
+ __error = pl_inode_remove_prepare(_xl, _frame, _loc2 ? _loc2 : _loc1, \
+ &__pl_inode, &contend); \
+ if (__error < 0) { \
+ __stub = fop_##_fop##_stub(_frame, _cont, ##_args); \
+ __error = pl_inode_remove_complete(_xl, __pl_inode, __stub, \
+ &contend); \
+ } else if (__error == 0) { \
+ PL_LOCAL_GET_REQUESTS(_frame, _xl, xdata, ((fd_t *)NULL), _loc1, \
+ _loc2); \
+ STACK_WIND_COOKIE(_frame, _cbk, __pl_inode, FIRST_CHILD(_xl), \
+ FIRST_CHILD(_xl)->fops->_fop, ##_args); \
+ } \
+ __error; \
+ })
+
gf_boolean_t
pl_has_xdata_requests(dict_t *xdata)
{
@@ -471,6 +494,9 @@ pl_inodelk_xattr_fill_multiple(dict_t *this, char *key, data_t *value,
char *save_ptr = NULL;
tmp_key = gf_strdup(key);
+ if (!tmp_key)
+ return -1;
+
strtok_r(tmp_key, ":", &save_ptr);
if (!*save_ptr) {
if (tmp_key)
@@ -2962,11 +2988,85 @@ out:
return ret;
}
+static int32_t
+pl_request_link_count(dict_t **pxdata)
+{
+ dict_t *xdata;
+
+ xdata = *pxdata;
+ if (xdata == NULL) {
+ xdata = dict_new();
+ if (xdata == NULL) {
+ return ENOMEM;
+ }
+ } else {
+ dict_ref(xdata);
+ }
+
+ if (dict_set_uint32(xdata, GET_LINK_COUNT, 0) != 0) {
+ dict_unref(xdata);
+ return ENOMEM;
+ }
+
+ *pxdata = xdata;
+
+ return 0;
+}
+
+static int32_t
+pl_check_link_count(dict_t *xdata)
+{
+ int32_t count;
+
+ /* In case we are unable to read the link count from xdata, we take a
+ * conservative approach and return -2, which will prevent the inode from
+ * being considered deleted. In fact it will cause link tracking for this
+ * inode to be disabled completely to avoid races. */
+
+ if (xdata == NULL) {
+ return -2;
+ }
+
+ if (dict_get_int32(xdata, GET_LINK_COUNT, &count) != 0) {
+ return -2;
+ }
+
+ return count;
+}
+
int32_t
pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
struct iatt *postparent)
{
+ pl_inode_t *pl_inode;
+
+ if (op_ret >= 0) {
+ pl_inode = pl_inode_get(this, inode, NULL);
+ if (pl_inode == NULL) {
+ PL_STACK_UNWIND(lookup, xdata, frame, -1, ENOMEM, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ /* We only update the link count if we previously didn't know it.
+ * Doing it always can lead to races since lookup is not executed
+ * atomically most of the times. */
+ if (pl_inode->links == -2) {
+ pl_inode->links = pl_check_link_count(xdata);
+ if (buf->ia_type == IA_IFDIR) {
+ /* Directories have at least 2 links. To avoid special handling
+ * for directories, we simply decrement the value here to make
+ * them equivalent to regular files. */
+ pl_inode->links--;
+ }
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
PL_STACK_UNWIND(lookup, xdata, frame, op_ret, op_errno, inode, buf, xdata,
postparent);
return 0;
@@ -2975,9 +3075,17 @@ pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t
pl_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
- STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ int32_t error;
+
+ error = pl_request_link_count(&xdata);
+ if (error == 0) {
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+ STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ dict_unref(xdata);
+ } else {
+ STACK_UNWIND_STRICT(lookup, frame, -1, error, NULL, NULL, NULL, NULL);
+ }
return 0;
}
@@ -3502,10 +3610,10 @@ pl_dump_lock(char *str, int size, struct gf_flock *flock, gf_lkowner_t *owner,
time_t *blkd_time, gf_boolean_t active)
{
char *type_str = NULL;
- char granted[256] = {
+ char granted[GF_TIMESTR_SIZE] = {
0,
};
- char blocked[256] = {
+ char blocked[GF_TIMESTR_SIZE] = {
0,
};
@@ -3556,10 +3664,10 @@ __dump_entrylks(pl_inode_t *pl_inode)
{
pl_dom_list_t *dom = NULL;
pl_entry_lock_t *lock = NULL;
- char blocked[256] = {
+ char blocked[GF_TIMESTR_SIZE] = {
0,
};
- char granted[256] = {
+ char granted[GF_TIMESTR_SIZE] = {
0,
};
int count = 0;
@@ -3579,10 +3687,10 @@ __dump_entrylks(pl_inode_t *pl_inode)
list_for_each_entry(lock, &dom->entrylk_list, domain_list)
{
- gf_time_fmt(granted, sizeof(granted), lock->granted_time.tv_sec,
+ gf_time_fmt(granted, sizeof(granted), lock->granted_time,
gf_timefmt_FT);
gf_proc_dump_build_key(key, k, "entrylk[%d](ACTIVE)", count);
- if (lock->blkd_time.tv_sec == 0) {
+ if (lock->blkd_time == 0) {
snprintf(tmp, sizeof(tmp), ENTRY_GRNTD_FMT,
lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK"
: "ENTRYLK_WRLCK",
@@ -3590,7 +3698,7 @@ __dump_entrylks(pl_inode_t *pl_inode)
lkowner_utoa(&lock->owner), lock->client,
lock->connection_id, granted);
} else {
- gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time.tv_sec,
+ gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time,
gf_timefmt_FT);
snprintf(tmp, sizeof(tmp), ENTRY_BLKD_GRNTD_FMT,
lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK"
@@ -3607,7 +3715,7 @@ __dump_entrylks(pl_inode_t *pl_inode)
list_for_each_entry(lock, &dom->blocked_entrylks, blocked_locks)
{
- gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time.tv_sec,
+ gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time,
gf_timefmt_FT);
gf_proc_dump_build_key(key, k, "entrylk[%d](BLOCKED)", count);
@@ -3659,9 +3767,8 @@ __dump_inodelks(pl_inode_t *pl_inode)
SET_FLOCK_PID(&lock->user_flock, lock);
pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner,
- lock->client, lock->connection_id,
- &lock->granted_time.tv_sec, &lock->blkd_time.tv_sec,
- _gf_true);
+ lock->client, lock->connection_id, &lock->granted_time,
+ &lock->blkd_time, _gf_true);
gf_proc_dump_write(key, "%s", tmp);
count++;
@@ -3673,8 +3780,8 @@ __dump_inodelks(pl_inode_t *pl_inode)
count);
SET_FLOCK_PID(&lock->user_flock, lock);
pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner,
- lock->client, lock->connection_id, 0,
- &lock->blkd_time.tv_sec, _gf_false);
+ lock->client, lock->connection_id, 0, &lock->blkd_time,
+ _gf_false);
gf_proc_dump_write(key, "%s", tmp);
count++;
@@ -3707,9 +3814,8 @@ __dump_posixlks(pl_inode_t *pl_inode)
gf_proc_dump_build_key(key, "posixlk", "posixlk[%d](%s)", count,
lock->blocked ? "BLOCKED" : "ACTIVE");
pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner,
- lock->client, lock->client_uid, &lock->granted_time.tv_sec,
- &lock->blkd_time.tv_sec,
- (lock->blocked) ? _gf_false : _gf_true);
+ lock->client, lock->client_uid, &lock->granted_time,
+ &lock->blkd_time, (lock->blocked) ? _gf_false : _gf_true);
gf_proc_dump_write(key, "%s", tmp);
count++;
@@ -3793,6 +3899,10 @@ unlock:
gf_proc_dump_write("posixlk-count", "%d", count);
__dump_posixlks(pl_inode);
}
+
+ gf_proc_dump_write("links", "%d", pl_inode->links);
+ gf_proc_dump_write("removes_pending", "%u", pl_inode->remove_running);
+ gf_proc_dump_write("removed", "%u", pl_inode->removed);
}
pthread_mutex_unlock(&pl_inode->mutex);
@@ -4138,8 +4248,11 @@ pl_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
struct iatt *postoldparent, struct iatt *prenewparent,
struct iatt *postnewparent, dict_t *xdata)
{
+ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0);
+
PL_STACK_UNWIND(rename, xdata, frame, op_ret, op_errno, buf, preoldparent,
postoldparent, prenewparent, postnewparent, xdata);
+
return 0;
}
@@ -4147,10 +4260,15 @@ int32_t
pl_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc);
+ int32_t error;
+
+ error = PL_INODE_REMOVE(rename, frame, this, oldloc, newloc, pl_rename,
+ pl_rename_cbk, oldloc, newloc, xdata);
+ if (error > 0) {
+ STACK_UNWIND_STRICT(rename, frame, -1, error, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ }
- STACK_WIND(frame, pl_rename_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
return 0;
}
@@ -4274,8 +4392,11 @@ pl_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
dict_t *xdata)
{
+ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0);
+
PL_STACK_UNWIND(unlink, xdata, frame, op_ret, op_errno, preparent,
postparent, xdata);
+
return 0;
}
@@ -4283,9 +4404,14 @@ int32_t
pl_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
- STACK_WIND(frame, pl_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ int32_t error;
+
+ error = PL_INODE_REMOVE(unlink, frame, this, loc, NULL, pl_unlink,
+ pl_unlink_cbk, loc, xflag, xdata);
+ if (error > 0) {
+ STACK_UNWIND_STRICT(unlink, frame, -1, error, NULL, NULL, NULL);
+ }
+
return 0;
}
@@ -4352,8 +4478,11 @@ pl_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
dict_t *xdata)
{
+ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0);
+
PL_STACK_UNWIND_FOR_CLIENT(rmdir, xdata, frame, op_ret, op_errno, preparent,
postparent, xdata);
+
return 0;
}
@@ -4361,9 +4490,14 @@ int
pl_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
- STACK_WIND(frame, pl_rmdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir, loc, xflags, xdata);
+ int32_t error;
+
+ error = PL_INODE_REMOVE(rmdir, frame, this, loc, NULL, pl_rmdir,
+ pl_rmdir_cbk, loc, xflags, xdata);
+ if (error > 0) {
+ STACK_UNWIND_STRICT(rmdir, frame, -1, error, NULL, NULL, NULL);
+ }
+
return 0;
}
@@ -4393,6 +4527,19 @@ pl_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, inode_t *inode, struct iatt *buf,
struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
+ pl_inode_t *pl_inode = (pl_inode_t *)cookie;
+
+ if (op_ret >= 0) {
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ /* TODO: can happen pl_inode->links == 0 ? */
+ if (pl_inode->links >= 0) {
+ pl_inode->links++;
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
PL_STACK_UNWIND_FOR_CLIENT(link, xdata, frame, op_ret, op_errno, inode, buf,
preparent, postparent, xdata);
return 0;
@@ -4402,9 +4549,18 @@ int
pl_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
dict_t *xdata)
{
+ pl_inode_t *pl_inode;
+
+ pl_inode = pl_inode_get(this, oldloc->inode, NULL);
+ if (pl_inode == NULL) {
+ STACK_UNWIND_STRICT(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
+
PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc);
- STACK_WIND(frame, pl_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ STACK_WIND_COOKIE(frame, pl_link_cbk, pl_inode, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
return 0;
}
diff --git a/xlators/features/metadisp/Makefile.am b/xlators/features/metadisp/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/metadisp/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/metadisp/src/Makefile.am b/xlators/features/metadisp/src/Makefile.am
new file mode 100644
index 00000000000..1520ad8c424
--- /dev/null
+++ b/xlators/features/metadisp/src/Makefile.am
@@ -0,0 +1,38 @@
+noinst_PYTHON = gen-fops.py
+
+EXTRA_DIST = fops-tmpl.c
+
+xlator_LTLIBRARIES = metadisp.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+nodist_metadisp_la_SOURCES = fops.c
+
+BUILT_SOURCES = fops.c
+
+metadisp_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+metadisp_la_SOURCES = metadisp.c \
+ metadisp-unlink.c \
+ metadisp-stat.c \
+ metadisp-lookup.c \
+ metadisp-readdir.c \
+ metadisp-create.c \
+ metadisp-open.c \
+ metadisp-fsync.c \
+ metadisp-setattr.c \
+ backend.c
+
+metadisp_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = metadisp.h metadisp-fops.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+fops.c: fops-tmpl.c $(top_srcdir)/libglusterfs/src/generator.py gen-fops.py
+ PYTHONPATH=$(top_srcdir)/libglusterfs/src \
+ $(PYTHON) $(srcdir)/gen-fops.py $(srcdir)/fops-tmpl.c > $@
+
+CLEANFILES = $(nodist_metadisp_la_SOURCES)
diff --git a/xlators/features/metadisp/src/backend.c b/xlators/features/metadisp/src/backend.c
new file mode 100644
index 00000000000..ee2c25bfaa7
--- /dev/null
+++ b/xlators/features/metadisp/src/backend.c
@@ -0,0 +1,45 @@
+#define GFID_STR_LEN 37
+
+#include "metadisp.h"
+
+/*
+ * backend.c
+ *
+ * functions responsible for converting user-facing paths to backend-style
+ * "/$GFID" paths.
+ */
+
+int32_t
+build_backend_loc(uuid_t gfid, loc_t *src_loc, loc_t *dst_loc)
+{
+ static uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ char gfid_buf[GFID_STR_LEN + 1] = {
+ 0,
+ };
+ char *path = NULL;
+
+ GF_VALIDATE_OR_GOTO("metadisp", src_loc, out);
+ GF_VALIDATE_OR_GOTO("metadisp", dst_loc, out);
+
+ loc_copy(dst_loc, src_loc);
+ memcpy(dst_loc->pargfid, root, sizeof(root));
+ GF_FREE((char *)dst_loc->path); // we are overwriting path so nuke
+ // whatever loc_copy gave us
+
+ uuid_utoa_r(gfid, gfid_buf);
+
+ path = GF_CALLOC(GFID_STR_LEN + 1, sizeof(char),
+ gf_common_mt_char); // freed via loc_wipe
+
+ path[0] = '/';
+ strncpy(path + 1, gfid_buf, GFID_STR_LEN);
+ path[GFID_STR_LEN] = 0;
+ dst_loc->path = path;
+ if (src_loc->name)
+ dst_loc->name = strrchr(dst_loc->path, '/');
+ if (dst_loc->name)
+ dst_loc->name++;
+ return 0;
+out:
+ return -1;
+}
diff --git a/xlators/features/metadisp/src/fops-tmpl.c b/xlators/features/metadisp/src/fops-tmpl.c
new file mode 100644
index 00000000000..4385b7dd5b7
--- /dev/null
+++ b/xlators/features/metadisp/src/fops-tmpl.c
@@ -0,0 +1,10 @@
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <glusterfs/xlator.h>
+#include "metadisp.h"
+#include "metadisp-fops.h"
+
+#pragma generate
diff --git a/xlators/features/metadisp/src/gen-fops.py b/xlators/features/metadisp/src/gen-fops.py
new file mode 100644
index 00000000000..8b5e120fdec
--- /dev/null
+++ b/xlators/features/metadisp/src/gen-fops.py
@@ -0,0 +1,160 @@
+#!/usr/bin/python
+
+import sys
+from generator import fop_subs, generate
+
+FN_METADATA_CHILD_GENERIC = """
+int32_t
+metadisp_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ METADISP_TRACE("@NAME@ metadata");
+ STACK_WIND (frame, default_@NAME@_cbk,
+ METADATA_CHILD(this), METADATA_CHILD(this)->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FN_GENERIC_TEMPLATE = """
+int32_t
+metadisp_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ METADISP_TRACE("@NAME@ generic");
+ STACK_WIND (frame, default_@NAME@_cbk,
+ DATA_CHILD(this), DATA_CHILD(this)->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FN_DATAFD_TEMPLATE = """
+int32_t
+metadisp_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ METADISP_TRACE("@NAME@ datafd");
+ xlator_t *child = NULL;
+ child = DATA_CHILD(this);
+ STACK_WIND (frame, default_@NAME@_cbk,
+ child, child->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FN_DATALOC_TEMPLATE = """
+int32_t
+metadisp_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ METADISP_TRACE("@NAME@ dataloc");
+ loc_t backend_loc = {
+ 0,
+ };
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+ xlator_t *child = NULL;
+ child = DATA_CHILD(this);
+ STACK_WIND (frame, default_@NAME@_cbk,
+ child, child->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+"""
+
+FOPS_LINE_TEMPLATE = "\t.@NAME@ = metadisp_@NAME@,"
+
+skipped = [
+ "readdir",
+ "readdirp",
+ "lookup",
+ "fsync",
+ "stat",
+ "open",
+ "create",
+ "unlink",
+ "setattr",
+ # TODO: implement "inodelk",
+]
+
+
+def gen_fops():
+ done = skipped
+
+ #
+ # these are fops that wind to the DATA_CHILD
+ #
+ # NOTE: re-written in order from google doc:
+ # https://docs.google.com/document/d/1KEwVtSNvDhs4qb63gWx2ulCp5GJjge77NGJk4p_Ms4Q
+ for name in [
+ "writev",
+ "readv",
+ "ftruncate",
+ "zerofill",
+ "discard",
+ "seek",
+ "fstat",
+ ]:
+ done = done + [name]
+ print(generate(FN_DATAFD_TEMPLATE, name, fop_subs))
+
+ for name in ["truncate"]:
+ done = done + [name]
+ print(generate(FN_DATALOC_TEMPLATE, name, fop_subs))
+
+ # these are fops that operate solely on dentries, folders,
+ # or extended attributes. Therefore, they must always
+ # wind to METADATA_CHILD and should never perform
+ # any path rewriting
+ #
+ # NOTE: re-written in order from google doc:
+ # https://docs.google.com/document/d/1KEwVtSNvDhs4qb63gWx2ulCp5GJjge77NGJk4p_Ms4Q
+ for name in [
+ "mkdir",
+ "symlink",
+ "link",
+ "rename",
+ "mknod",
+ "opendir",
+ # "readdir, # special-cased
+ # "readdirp, # special-cased
+ "fsyncdir",
+ # "setattr", # special-cased
+ "readlink",
+ "fentrylk",
+ "access",
+ # TODO: these wind to both,
+ # data for backend-attributes and metadata for the rest
+ "xattrop",
+ "setxattr",
+ "getxattr",
+ "removexattr",
+ "fgetxattr",
+ "fsetxattr",
+ "fremovexattr",
+ ]:
+
+ done = done + [name]
+ print(generate(FN_METADATA_CHILD_GENERIC, name, fop_subs))
+
+ print("struct xlator_fops fops = {")
+ for name in done:
+ print(generate(FOPS_LINE_TEMPLATE, name, fop_subs))
+
+ print("};")
+
+
+for l in open(sys.argv[1], "r").readlines():
+ if l.find("#pragma generate") != -1:
+ print("/* BEGIN GENERATED CODE - DO NOT MODIFY */")
+ gen_fops()
+ print("/* END GENERATED CODE */")
+ else:
+ print(l[:-1])
diff --git a/xlators/features/metadisp/src/metadisp-create.c b/xlators/features/metadisp/src/metadisp-create.c
new file mode 100644
index 00000000000..f8c9798dd59
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-create.c
@@ -0,0 +1,101 @@
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+/**
+ * Create, like stat, is a two-step process. We send a create
+ * to the METADATA_CHILD, then send another create to the DATA_CHILD.
+ *
+ * We do the metadata child first to ensure that the ACLs are enforced.
+ */
+
+int32_t
+metadisp_create_dentry_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_create_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
+{
+ // create the backend data inode
+ STACK_WIND(frame, metadisp_create_dentry_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
+}
+
+int32_t
+metadisp_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ METADISP_TRACE("%d %d", op_ret, op_errno);
+ call_stub_t *stub = cookie;
+ if (op_ret != 0) {
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+ }
+
+ if (stub == NULL) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+int32_t
+metadisp_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ METADISP_TRACE(".");
+
+ loc_t backend_loc = {
+ 0,
+ };
+ call_stub_t *stub = NULL;
+ uuid_t *gfid_req = NULL;
+
+ RESOLVE_GFID_REQ(xdata, gfid_req, out);
+
+ if (build_backend_loc(*gfid_req, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ frame->local = loc;
+
+ stub = fop_create_stub(frame, metadisp_create_resume, &backend_loc, flags,
+ mode, umask, fd, xdata);
+
+ STACK_WIND_COOKIE(frame, metadisp_create_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->create, loc, flags, mode,
+ umask, fd, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+out:
+ return -1;
+}
diff --git a/xlators/features/metadisp/src/metadisp-fops.h b/xlators/features/metadisp/src/metadisp-fops.h
new file mode 100644
index 00000000000..56dd427cf34
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-fops.h
@@ -0,0 +1,51 @@
+#ifndef GF_METADISP_FOPS_H_
+#define GF_METADISP_FOPS_H_
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/glusterfs.h>
+
+#include <sys/types.h>
+
+/* fops in here are defined in their own file. Every other fop is just defined
+ * inline of fops.c */
+
+int
+metadisp_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata);
+
+int
+metadisp_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *dict);
+
+int
+metadisp_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int
+metadisp_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
+
+int
+metadisp_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
+
+int
+metadisp_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int
+metadisp_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata);
+
+int
+metadisp_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata);
+
+int
+metadisp_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
+
+int
+metadisp_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+#endif
diff --git a/xlators/features/metadisp/src/metadisp-fsync.c b/xlators/features/metadisp/src/metadisp-fsync.c
new file mode 100644
index 00000000000..2e46fa84eac
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-fsync.c
@@ -0,0 +1,54 @@
+
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+int32_t
+metadisp_fsync_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ STACK_WIND(frame, default_fsync_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->fsync, fd, flags, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ stub = fop_fsync_stub(frame, metadisp_fsync_resume, fd, flags, xdata);
+ STACK_WIND_COOKIE(frame, metadisp_fsync_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->fsync, fd, flags, xdata);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-lookup.c b/xlators/features/metadisp/src/metadisp-lookup.c
new file mode 100644
index 00000000000..27d90c9f746
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-lookup.c
@@ -0,0 +1,90 @@
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+/**
+ * Lookup, like stat, is a two-step process for grabbing the metadata details
+ * as well as the data details.
+ */
+
+int32_t
+metadisp_backend_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ METADISP_TRACE("backend_lookup_cbk");
+ if (op_errno == ENOENT) {
+ op_errno = ENODATA;
+ op_ret = -1;
+ }
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ return 0;
+}
+
+int32_t
+metadisp_backend_lookup_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ METADISP_TRACE("backend_lookup_resume");
+ loc_t backend_loc = {
+ 0,
+ };
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ STACK_WIND(frame, metadisp_backend_lookup_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->lookup, &backend_loc, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+metadisp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ METADISP_TRACE("%d %d", op_ret, op_errno);
+ call_stub_t *stub = NULL;
+ stub = cookie;
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (!IA_ISREG(buf->ia_type)) {
+ goto unwind;
+ } else if (!stub) {
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ METADISP_TRACE("resuming stub");
+
+ // memcpy(stub->args.loc.gfid, buf->ia_gfid, sizeof(uuid_t));
+ call_resume(stub);
+ return 0;
+unwind:
+ METADISP_TRACE("unwinding %d %d", op_ret, op_errno);
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ return 0;
+}
+
+int32_t
+metadisp_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ METADISP_TRACE("lookup");
+ call_stub_t *stub = NULL;
+ stub = fop_lookup_stub(frame, metadisp_backend_lookup_resume, loc, xdata);
+ STACK_WIND_COOKIE(frame, metadisp_lookup_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-open.c b/xlators/features/metadisp/src/metadisp-open.c
new file mode 100644
index 00000000000..64814afe636
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-open.c
@@ -0,0 +1,70 @@
+#include <glusterfs/call-stub.h>
+#include "metadisp.h"
+
+int32_t
+metadisp_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ METADISP_TRACE("got open results %d %d", op_ret, op_errno);
+
+ call_stub_t *stub = NULL;
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (!stub) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
+{
+ STACK_WIND_COOKIE(frame, metadisp_open_cbk, NULL, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ loc_t backend_loc = {
+ 0,
+ };
+
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ stub = fop_open_stub(frame, metadisp_open_resume, &backend_loc, flags, fd,
+ xdata);
+ STACK_WIND_COOKIE(frame, metadisp_open_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(open, frame, -1, EINVAL, NULL, NULL);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-readdir.c b/xlators/features/metadisp/src/metadisp-readdir.c
new file mode 100644
index 00000000000..5f840b1e88f
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-readdir.c
@@ -0,0 +1,65 @@
+#include "metadisp.h"
+
+/**
+ * With a change to the posix xlator, readdir and readdirp are shockingly
+ * simple.
+ *
+ * The issue with separating the backend data of the files
+ * with the metadata is that readdirs must now read from multiple sources
+ * to coalesce the directory entries.
+ *
+ * The way we do this is to tell the METADATA_CHILD that when it's
+ * running readdirp, each file entry should have a stat wound to
+ * 'stat-source-of-truth'.
+ *
+ * see metadisp_stat for how it handles winds _from_posix.
+ */
+
+int32_t
+metadisp_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ METADISP_TRACE(".");
+ /*
+ * Always use readdirp, even if the original was readdir. Why? Because NFS.
+ * There are multiple translations between Gluster, UNIX, and NFS stat
+ * structures in that path. One of them uses the type etc. from the stat
+ * structure, which is only filled in by readdirp. If we use readdir, the
+ * entries do actually go all the way back to the client and are visible in
+ * getdents, but then the readdir throws them away because of the
+ * uninitialized type.
+ */
+ GF_UNUSED int32_t ret;
+ if (!xdata) {
+ xdata = dict_new();
+ }
+
+ // ret = dict_set_int32 (xdata, "list-xattr", 1);
+
+ // I'm my own source of truth!
+ ret = dict_set_static_ptr(xdata, "stat-source-of-truth", (void *)this);
+
+ STACK_WIND(frame, default_readdirp_cbk, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+
+ return 0;
+}
+
+int32_t
+metadisp_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ METADISP_TRACE(".");
+ if (!xdata) {
+ xdata = dict_new();
+ }
+ GF_UNUSED int32_t ret;
+ // ret = dict_set_int32 (xdata, "list-xattr", 1);
+
+ // I'm my own source of truth!
+ ret = dict_set_static_ptr(xdata, "stat-source-of-truth", (void *)this);
+
+ STACK_WIND(frame, default_readdirp_cbk, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-setattr.c b/xlators/features/metadisp/src/metadisp-setattr.c
new file mode 100644
index 00000000000..6991cf644f3
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-setattr.c
@@ -0,0 +1,90 @@
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+int32_t
+metadisp_backend_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata)
+
+{
+ METADISP_TRACE("backend_setattr_cbk");
+ if (op_errno == ENOENT) {
+ op_errno = ENODATA;
+ op_ret = -1;
+ }
+ STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost,
+ xdata);
+ return 0;
+}
+
+int32_t
+metadisp_backend_setattr_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid,
+ dict_t *xdata)
+
+{
+ METADISP_TRACE("backend_setattr_resume");
+ loc_t backend_loc = {
+ 0,
+ };
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ STACK_WIND(frame, metadisp_backend_setattr_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->setattr, &backend_loc, stbuf, valid,
+ xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(setattr, frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+metadisp_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ METADISP_TRACE("%d %d", op_ret, op_errno);
+ call_stub_t *stub = NULL;
+ stub = cookie;
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (!IA_ISREG(statpost->ia_type)) {
+ goto unwind;
+ } else if (!stub) {
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ METADISP_TRACE("resuming stub");
+ call_resume(stub);
+ return 0;
+unwind:
+ METADISP_TRACE("unwinding %d %d", op_ret, op_errno);
+ STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost,
+ xdata);
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ return 0;
+}
+
+int32_t
+metadisp_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ METADISP_TRACE("setattr");
+ call_stub_t *stub = NULL;
+ stub = fop_setattr_stub(frame, metadisp_backend_setattr_resume, loc, stbuf,
+ valid, xdata);
+ STACK_WIND_COOKIE(frame, metadisp_setattr_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->setattr, loc, stbuf, valid,
+ xdata);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-stat.c b/xlators/features/metadisp/src/metadisp-stat.c
new file mode 100644
index 00000000000..b06d0dbcddd
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-stat.c
@@ -0,0 +1,124 @@
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+/**
+ * The stat flow in METADISP is complicated because we must
+ * do ensure a few things:
+ * 1. stat, on the path within the metadata layer,
+ * MUST get the backend FD of the data layer.
+ * --- we wind to the metadata layer, then the data layer.
+ *
+ * 2. the metadata layer MUST be able to ask the data
+ * layer for stat information.
+ * --- this is 'syncop-internal-from-posix'
+ *
+ * 3. when the metadata exists BUT the data is missing,
+ * we MUST mark the backend file as bad and heal it.
+ */
+
+int32_t
+metadisp_stat_backend_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ METADISP_TRACE("got backend stat results %d %d", op_ret, op_errno);
+ if (op_errno == ENOENT) {
+ STACK_UNWIND_STRICT(open, frame, -1, ENODATA, NULL, NULL);
+ return 0;
+ }
+ STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_stat_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ METADISP_TRACE("winding stat to path %s", loc->path);
+ if (gf_uuid_is_null(loc->gfid)) {
+ METADISP_TRACE("bad object, sending EUCLEAN");
+ STACK_UNWIND_STRICT(open, frame, -1, EUCLEAN, NULL, NULL);
+ return 0;
+ }
+
+ STACK_WIND(frame, metadisp_stat_backend_cbk, SECOND_CHILD(this),
+ SECOND_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ METADISP_TRACE("got stat results %d %d", op_ret, op_errno);
+
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ // only use the stub for the files
+ if (!IA_ISREG(buf->ia_type)) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ int32_t ret = 0;
+ loc_t backend_loc = {
+ 0,
+ };
+ METADISP_FILTER_ROOT(stat, loc, xdata);
+
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ if (dict_get_int32(xdata, "syncop-internal-from-posix", &ret) == 0) {
+ // if we've just been sent a stat from posix, then we know
+ // that we must send down a stat for a file to the second child.
+ //
+ // that means we can skip the stat for the first child and just
+ // send to the data disk.
+ METADISP_TRACE("got syncop-internal-from-posix");
+ STACK_WIND(frame, default_stat_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->stat, &backend_loc, xdata);
+ return 0;
+ }
+
+ // we do not know if the request is for a file, folder, etc. wind
+ // to first child to find out.
+ stub = fop_stat_stub(frame, metadisp_stat_resume, &backend_loc, xdata);
+ METADISP_TRACE("winding stat to first child %s", loc->path);
+ STACK_WIND_COOKIE(frame, metadisp_stat_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(stat, frame, -1, EINVAL, NULL, NULL);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-unlink.c b/xlators/features/metadisp/src/metadisp-unlink.c
new file mode 100644
index 00000000000..1f6a8eb35ce
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-unlink.c
@@ -0,0 +1,160 @@
+
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+/**
+ * The unlink flow in metadisp is complicated because we must
+ * do ensure that UNLINK causes both the metadata objects
+ * to get removed and the data objects to get removed.
+ */
+
+int32_t
+metadisp_unlink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflag, dict_t *xdata)
+{
+ METADISP_TRACE("winding backend unlink to path %s", loc->path);
+ STACK_WIND(frame, default_unlink_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ METADISP_TRACE(". %d %d", op_ret, op_errno);
+
+ int ret = 0;
+ call_stub_t *stub = NULL;
+ int nlink = 0;
+
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, &nlink);
+ if (ret != 0) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto unwind;
+ }
+ METADISP_TRACE("frontend hardlink count %d %d", ret, nlink);
+ if (nlink > 1) {
+ goto unwind;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
+}
+
+int32_t
+metadisp_unlink_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ call_stub_t *stub = NULL;
+
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ // fail fast on empty gfid so we don't loop forever
+ if (gf_uuid_is_null(buf->ia_gfid)) {
+ op_ret = -1;
+ op_errno = ENODATA;
+ goto unwind;
+ }
+
+ // fill gfid since the stub is incomplete
+ memcpy(stub->args.loc.gfid, buf->ia_gfid, sizeof(uuid_t));
+ memcpy(stub->args.loc.pargfid, postparent->ia_gfid, sizeof(uuid_t));
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+metadisp_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ loc_t backend_loc = {
+ 0,
+ };
+
+ if (gf_uuid_is_null(loc->gfid)) {
+ METADISP_TRACE("winding lookup for unlink to path %s", loc->path);
+
+ // loop back to ourselves after a lookup
+ stub = fop_unlink_stub(frame, metadisp_unlink, loc, xflag, xdata);
+ STACK_WIND_COOKIE(frame, metadisp_unlink_lookup_cbk, stub,
+ METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+ }
+
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ //
+ // ensure we get the link count on the unlink response, so we can
+ // account for hardlinks before winding to the backend.
+ // NOTE:
+ // multiple xlators use GF_REQUEST_LINK_COUNT_XDATA. confirmation
+ // is needed to ensure that multiple requests will work in the same
+ // xlator stack.
+ //
+ if (!xdata) {
+ xdata = dict_new();
+ }
+ dict_set_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, 1);
+
+ METADISP_TRACE("winding frontend unlink to path %s", loc->path);
+ stub = fop_unlink_stub(frame, metadisp_unlink_resume, &backend_loc, xflag,
+ xdata);
+
+ STACK_WIND_COOKIE(frame, metadisp_unlink_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(unlink, frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp.c b/xlators/features/metadisp/src/metadisp.c
new file mode 100644
index 00000000000..3c8f150cebc
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp.c
@@ -0,0 +1,46 @@
+#include <glusterfs/call-stub.h>
+
+#include "metadisp.h"
+#include "metadisp-fops.h"
+
+int32_t
+init(xlator_t *this)
+{
+ if (!this->children) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "not configured with children. exiting");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile ");
+ }
+
+ return 0;
+}
+
+void
+fini(xlator_t *this)
+{
+ return;
+}
+
+/* defined in fops.c */
+struct xlator_fops fops;
+
+struct xlator_cbks cbks = {};
+
+struct volume_options options[] = {
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .op_version = {1},
+ .identifier = "metadisp",
+ .category = GF_EXPERIMENTAL,
+};
diff --git a/xlators/features/metadisp/src/metadisp.h b/xlators/features/metadisp/src/metadisp.h
new file mode 100644
index 00000000000..c8fd7a13c04
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp.h
@@ -0,0 +1,45 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef GF_METADISP_H_
+#define GF_METADISP_H_
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#define METADATA_CHILD(_this) FIRST_CHILD(_this)
+#define DATA_CHILD(_this) SECOND_CHILD(_this)
+
+int32_t
+build_backend_loc(uuid_t gfid, loc_t *src_loc, loc_t *dst_loc);
+
+#define METADISP_TRACE(_args...) gf_log("metadisp", GF_LOG_INFO, _args)
+
+#define METADISP_FILTER_ROOT(_op, _args...) \
+ if (strcmp(loc->path, "/") == 0) { \
+ STACK_WIND(frame, default_##_op##_cbk, METADATA_CHILD(this), \
+ METADATA_CHILD(this)->fops->_op, _args); \
+ return 0; \
+ }
+
+#define METADISP_FILTER_ROOT_BY_GFID(_op, _gfid, _args...) \
+ if (__is_root_gfid(_gfid)) { \
+ STACK_WIND(frame, default_##_op##_cbk, METADATA_CHILD(this), \
+ METADATA_CHILD(this)->fops->_op, _args); \
+ return 0; \
+ }
+
+#define RESOLVE_GFID_REQ(_dict, _dest, _lbl) \
+ VALIDATE_OR_GOTO(dict_get_ptr(_dict, "gfid-req", (void **)&_dest) == 0, \
+ _lbl)
+
+#endif /* __TEMPLATE_H__ */
diff --git a/xlators/features/quota/src/quota-enforcer-client.c b/xlators/features/quota/src/quota-enforcer-client.c
index 097439d86d6..480d64ade27 100644
--- a/xlators/features/quota/src/quota-enforcer-client.c
+++ b/xlators/features/quota/src/quota-enforcer-client.c
@@ -32,12 +32,6 @@
#include <malloc.h>
#endif
-#ifdef HAVE_MALLOC_STATS
-#ifdef DEBUG
-#include <mcheck.h>
-#endif
-#endif
-
#include "quota.h"
#include "quota-messages.h"
diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c
index 73c008a2c00..18df9ae6d19 100644
--- a/xlators/features/quota/src/quota.c
+++ b/xlators/features/quota/src/quota.c
@@ -586,9 +586,6 @@ quota_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
quota_meta_t size = {
0,
};
- struct timeval tv = {
- 0,
- };
local = frame->local;
@@ -626,13 +623,12 @@ quota_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
* loop of validation and checking
* limit when timeout is zero.
*/
- gettimeofday(&tv, NULL);
LOCK(&ctx->lock);
{
ctx->size = size.size;
+ ctx->validate_time = gf_time();
ctx->file_count = size.file_count;
ctx->dir_count = size.dir_count;
- memcpy(&ctx->tv, &tv, sizeof(struct timeval));
}
UNLOCK(&ctx->lock);
@@ -644,27 +640,10 @@ unwind:
return 0;
}
-static uint64_t
-quota_time_elapsed(struct timeval *now, struct timeval *then)
-{
- return (now->tv_sec - then->tv_sec);
-}
-
-int32_t
-quota_timeout(struct timeval *tv, int32_t timeout)
+static inline gf_boolean_t
+quota_timeout(time_t t, uint32_t timeout)
{
- struct timeval now = {
- 0,
- };
- int32_t timed_out = 0;
-
- gettimeofday(&now, NULL);
-
- if (quota_time_elapsed(&now, tv) >= timeout) {
- timed_out = 1;
- }
-
- return timed_out;
+ return (gf_time() - t) >= timeout;
}
/* Return: 1 if new entry added
@@ -1128,7 +1107,7 @@ quota_check_object_limit(call_frame_t *frame, quota_inode_ctx_t *ctx,
timeout = priv->hard_timeout;
}
- if (!just_validated && quota_timeout(&ctx->tv, timeout)) {
+ if (!just_validated && quota_timeout(ctx->validate_time, timeout)) {
need_validate = 1;
} else if ((object_aggr_count) > ctx->object_hard_lim) {
hard_limit_exceeded = 1;
@@ -1195,7 +1174,7 @@ quota_check_size_limit(call_frame_t *frame, quota_inode_ctx_t *ctx,
timeout = priv->hard_timeout;
}
- if (!just_validated && quota_timeout(&ctx->tv, timeout)) {
+ if (!just_validated && quota_timeout(ctx->validate_time, timeout)) {
need_validate = 1;
} else if (wouldbe_size >= ctx->hard_lim) {
hard_limit_exceeded = 1;
@@ -4314,9 +4293,6 @@ quota_statfs_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
quota_meta_t size = {
0,
};
- struct timeval tv = {
- 0,
- };
local = frame->local;
@@ -4348,13 +4324,12 @@ quota_statfs_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
op_errno = EINVAL;
}
- gettimeofday(&tv, NULL);
LOCK(&ctx->lock);
{
ctx->size = size.size;
+ ctx->validate_time = gf_time();
ctx->file_count = size.file_count;
ctx->dir_count = size.dir_count;
- memcpy(&ctx->tv, &tv, sizeof(struct timeval));
}
UNLOCK(&ctx->lock);
@@ -4873,7 +4848,7 @@ off:
void
quota_log_helper(char **usage_str, int64_t cur_size, inode_t *inode,
- char **path, struct timeval *cur_time)
+ char **path, time_t *cur_time)
{
xlator_t *this = THIS;
@@ -4892,7 +4867,7 @@ quota_log_helper(char **usage_str, int64_t cur_size, inode_t *inode,
if (!(*path))
*path = uuid_utoa(inode->gfid);
- gettimeofday(cur_time, NULL);
+ *cur_time = gf_time();
}
/* Logs if
@@ -4903,9 +4878,7 @@ void
quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode,
int64_t delta)
{
- struct timeval cur_time = {
- 0,
- };
+ time_t cur_time = 0;
char *usage_str = NULL;
char *path = NULL;
int64_t cur_size = 0;
@@ -4931,12 +4904,12 @@ quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode,
"path=%s",
usage_str, priv->volume_uuid, path);
- ctx->prev_log = cur_time;
+ ctx->prev_log_time = cur_time;
}
/* Usage is above soft limit */
else if (cur_size > ctx->soft_lim &&
- quota_timeout(&ctx->prev_log, priv->log_timeout)) {
+ quota_timeout(ctx->prev_log_time, priv->log_timeout)) {
quota_log_helper(&usage_str, cur_size, inode, &path, &cur_time);
gf_msg(this->name, GF_LOG_ALERT, 0, Q_MSG_CROSSED_SOFT_LIMIT,
@@ -4947,7 +4920,7 @@ quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode,
"path=%s",
usage_str, priv->volume_uuid, path);
- ctx->prev_log = cur_time;
+ ctx->prev_log_time = cur_time;
}
if (path)
@@ -5184,9 +5157,9 @@ quota_priv_dump(xlator_t *this)
if (ret)
goto out;
else {
- gf_proc_dump_write("soft-timeout", "%d", priv->soft_timeout);
- gf_proc_dump_write("hard-timeout", "%d", priv->hard_timeout);
- gf_proc_dump_write("alert-time", "%d", priv->log_timeout);
+ gf_proc_dump_write("soft-timeout", "%u", priv->soft_timeout);
+ gf_proc_dump_write("hard-timeout", "%u", priv->hard_timeout);
+ gf_proc_dump_write("alert-time", "%u", priv->log_timeout);
gf_proc_dump_write("quota-on", "%d", priv->is_quota_on);
gf_proc_dump_write("statfs", "%d", priv->consider_statfs);
gf_proc_dump_write("volume-uuid", "%s", priv->volume_uuid);
diff --git a/xlators/features/quota/src/quota.h b/xlators/features/quota/src/quota.h
index 8a3dc7a77f5..0395d78c9ef 100644
--- a/xlators/features/quota/src/quota.h
+++ b/xlators/features/quota/src/quota.h
@@ -153,8 +153,8 @@ struct quota_inode_ctx {
int64_t object_soft_lim;
struct iatt buf;
struct list_head parents;
- struct timeval tv;
- struct timeval prev_log;
+ time_t validate_time;
+ time_t prev_log_time;
gf_boolean_t ancestry_built;
gf_lock_t lock;
};
@@ -199,6 +199,7 @@ struct quota_local {
typedef struct quota_local quota_local_t;
struct quota_priv {
+ /* FIXME: consider time_t for timeouts. */
uint32_t soft_timeout;
uint32_t hard_timeout;
uint32_t log_timeout;
diff --git a/xlators/features/read-only/src/worm-helper.c b/xlators/features/read-only/src/worm-helper.c
index 25fbd4aa748..df45f2a940b 100644
--- a/xlators/features/read-only/src/worm-helper.c
+++ b/xlators/features/read-only/src/worm-helper.c
@@ -41,7 +41,7 @@ worm_init_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr)
GF_VALIDATE_OR_GOTO("worm", this, out);
GF_VALIDATE_OR_GOTO(this->name, file_ptr, out);
- start_time = time(NULL);
+ start_time = gf_time();
dict = dict_new();
if (!dict) {
gf_log(this->name, GF_LOG_ERROR, "Error creating the dict");
@@ -94,7 +94,7 @@ worm_set_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr,
if (ret)
goto out;
stbuf->ia_mtime = stpre.ia_mtime;
- stbuf->ia_atime = time(NULL) + retention_state->ret_period;
+ stbuf->ia_atime = gf_time() + retention_state->ret_period;
if (fop_with_fd)
ret = syncop_fsetattr(this, (fd_t *)file_ptr, stbuf, GF_SET_ATTR_ATIME,
@@ -286,6 +286,7 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd,
{
int op_errno = EROFS;
int ret = -1;
+ time_t now = 0;
uint64_t com_period = 0;
uint64_t start_time = 0;
dict_t *dict = NULL;
@@ -337,8 +338,10 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd,
goto out;
}
- if (ret == -1 && (time(NULL) - start_time) >= com_period) {
- if ((time(NULL) - stbuf.ia_mtime) >= com_period) {
+ now = gf_time();
+
+ if (ret == -1 && (now - start_time) >= com_period) {
+ if ((now - stbuf.ia_mtime) >= com_period) {
ret = worm_set_state(this, fop_with_fd, file_ptr, &reten_state,
&stbuf);
if (ret) {
@@ -352,10 +355,10 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd,
op_errno = 0;
goto out;
}
- } else if (ret == -1 && (time(NULL) - start_time) < com_period) {
+ } else if (ret == -1 && (now - start_time) < com_period) {
op_errno = 0;
goto out;
- } else if (reten_state.retain && ((time(NULL) >= stbuf.ia_atime))) {
+ } else if (reten_state.retain && ((now >= stbuf.ia_atime))) {
gf_worm_state_lookup(this, fop_with_fd, file_ptr, &reten_state, &stbuf);
}
if (reten_state.worm && !reten_state.retain && priv->worm_files_deletable &&
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index e4042117427..e5f93063943 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -513,6 +513,9 @@ shard_local_wipe(shard_local_t *local)
loc_wipe(&local->int_entrylk.loc);
loc_wipe(&local->newloc);
+ if (local->name)
+ GF_FREE(local->name);
+
if (local->int_entrylk.basename)
GF_FREE(local->int_entrylk.basename);
if (local->fd)
@@ -1001,6 +1004,10 @@ shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode)
}
int
+shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame,
+ xlator_t *this);
+
+int
shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
shard_post_resolve_fop_handler_t post_res_handler)
{
@@ -1017,21 +1024,47 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
inode_t *fsync_inode = NULL;
shard_priv_t *priv = NULL;
shard_local_t *local = NULL;
+ uint64_t resolve_count = 0;
priv = this->private;
local = frame->local;
local->call_count = 0;
shard_idx_iter = local->first_block;
res_inode = local->resolver_base_inode;
+
+ if ((local->op_ret < 0) || (local->resolve_not))
+ goto out;
+
+ /* If this prealloc FOP is for fresh file creation, then the size of the
+ * file will be 0. Then there will be no shards associated with this file.
+ * So we can skip the lookup process for the shards which do not exists
+ * and directly issue mknod to crete shards.
+ *
+ * In case the prealloc fop is to extend the preallocated file to bigger
+ * size then just lookup and populate inodes of existing shards and
+ * update the create count
+ */
+ if (local->fop == GF_FOP_FALLOCATE) {
+ if (!local->prebuf.ia_size) {
+ local->inode_list[0] = inode_ref(res_inode);
+ local->create_count = local->last_block;
+ shard_common_inode_write_post_lookup_shards_handler(frame, this);
+ return 0;
+ }
+ if (local->prebuf.ia_size < local->total_size)
+ local->create_count = local->last_block -
+ ((local->prebuf.ia_size - 1) /
+ local->block_size);
+ }
+
+ resolve_count = local->last_block - local->create_count;
+
if (res_inode)
gf_uuid_copy(gfid, res_inode->gfid);
else
gf_uuid_copy(gfid, local->base_gfid);
- if ((local->op_ret < 0) || (local->resolve_not))
- goto out;
-
- while (shard_idx_iter <= local->last_block) {
+ while (shard_idx_iter <= resolve_count) {
i++;
if (shard_idx_iter == 0) {
local->inode_list[i] = inode_ref(res_inode);
@@ -1659,26 +1692,24 @@ err:
}
int
-shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xdata,
- struct iatt *postparent)
+shard_set_iattr_invoke_post_handler(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
int ret = -1;
int32_t mask = SHARD_INODE_WRITE_MASK;
- shard_local_t *local = NULL;
+ shard_local_t *local = frame->local;
shard_inode_ctx_t ctx = {
0,
};
- local = frame->local;
-
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_ERROR, op_errno,
SHARD_MSG_BASE_FILE_LOOKUP_FAILED,
"Lookup on base file"
" failed : %s",
- loc_gfid_utoa(&(local->loc)));
+ uuid_utoa(inode->gfid));
local->op_ret = op_ret;
local->op_errno = op_errno;
goto unwind;
@@ -1712,18 +1743,57 @@ unwind:
}
int
-shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
- shard_post_fop_handler_t handler)
+shard_fstat_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ shard_local_t *local = frame->local;
+
+ shard_set_iattr_invoke_post_handler(frame, this, local->fd->inode, op_ret,
+ op_errno, buf, xdata);
+ return 0;
+}
+
+int
+shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ /* In case of op_ret < 0, inode passed to this function will be NULL
+ ex: in case of op_errno = ENOENT. So refer prefilled inode data
+ which is part of local.
+ Note: Reassigning/overriding the inode passed to this cbk with inode
+ which is part of *struct shard_local_t* won't cause any issue as
+ both inodes have same reference/address as of the inode passed */
+ inode = ((shard_local_t *)frame->local)->loc.inode;
+
+ shard_set_iattr_invoke_post_handler(frame, this, inode, op_ret, op_errno,
+ buf, xdata);
+ return 0;
+}
+
+/* This function decides whether to make file based lookup or
+ * fd based lookup (fstat) depending on the 3rd and 4th arg.
+ * If fd != NULL and loc == NULL then call is for fstat
+ * If fd == NULL and loc != NULL then call is for file based
+ * lookup. Please pass args based on the requirement.
+ */
+int
+shard_refresh_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, shard_post_fop_handler_t handler)
{
int ret = -1;
+ inode_t *inode = NULL;
shard_local_t *local = NULL;
dict_t *xattr_req = NULL;
gf_boolean_t need_refresh = _gf_false;
local = frame->local;
local->handler = handler;
+ inode = fd ? fd->inode : loc->inode;
- ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf,
+ ret = shard_inode_ctx_fill_iatt_from_cache(inode, this, &local->prebuf,
&need_refresh);
/* By this time, inode ctx should have been created either in create,
* mknod, readdirp or lookup. If not it is a bug!
@@ -1732,7 +1802,7 @@ shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
gf_msg_debug(this->name, 0,
"Skipping lookup on base file: %s"
"Serving prebuf off the inode ctx cache",
- uuid_utoa(loc->gfid));
+ uuid_utoa(inode->gfid));
goto out;
}
@@ -1743,10 +1813,14 @@ shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
goto out;
}
- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out);
+ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, inode->gfid, local, out);
- STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+ if (fd)
+ STACK_WIND(frame, shard_fstat_base_file_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xattr_req);
+ else
+ STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
dict_unref(xattr_req);
return 0;
@@ -2015,8 +2089,8 @@ shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, inode_t *inode)
*/
if (!inode) {
gf_msg_debug(this->name, 0,
- "Last shard to be truncated absent in backend: %d of "
- "gfid %s. Directly proceeding to update file size",
+ "Last shard to be truncated absent in backend: %" PRIu64
+ " of gfid %s. Directly proceeding to update file size",
local->first_block, uuid_utoa(local->loc.inode->gfid));
shard_update_file_size(frame, this, NULL, &local->loc,
shard_post_update_size_truncate_handler);
@@ -2399,7 +2473,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
int count = 0;
int call_count = 0;
int32_t shard_idx_iter = 0;
- int last_block = 0;
+ int lookup_count = 0;
char path[PATH_MAX] = {
0,
};
@@ -2419,7 +2493,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
local = frame->local;
count = call_count = local->call_count;
shard_idx_iter = local->first_block;
- last_block = local->last_block;
+ lookup_count = local->last_block - local->create_count;
local->pls_fop_handler = handler;
if (local->lookup_shards_barriered)
local->barrier.waitfor = local->call_count;
@@ -2429,7 +2503,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
else
gf_uuid_copy(gfid, local->base_gfid);
- while (shard_idx_iter <= last_block) {
+ while (shard_idx_iter <= lookup_count) {
if (local->inode_list[i]) {
i++;
shard_idx_iter++;
@@ -2574,6 +2648,7 @@ shard_truncate_begin(call_frame_t *frame, xlator_t *this)
local->block_size);
local->num_blocks = local->last_block - local->first_block + 1;
+ GF_ASSERT(local->num_blocks > 0);
local->resolver_base_inode = (local->fop == GF_FOP_TRUNCATE)
? local->loc.inode
: local->fd->inode;
@@ -2723,8 +2798,8 @@ shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
local->resolver_base_inode = loc->inode;
GF_ATOMIC_INIT(local->delta_blocks, 0);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_truncate_handler);
+ shard_refresh_base_file(frame, this, &local->loc, NULL,
+ shard_post_lookup_truncate_handler);
return 0;
err:
@@ -2779,8 +2854,8 @@ shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
local->resolver_base_inode = fd->inode;
GF_ATOMIC_INIT(local->delta_blocks, 0);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_truncate_handler);
+ shard_refresh_base_file(frame, this, NULL, fd,
+ shard_post_lookup_truncate_handler);
return 0;
err:
shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM);
@@ -2924,8 +2999,8 @@ shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
if (!local->xattr_req)
goto err;
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_link_handler);
+ shard_refresh_base_file(frame, this, &local->loc, NULL,
+ shard_post_lookup_link_handler);
return 0;
err:
shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM);
@@ -4254,8 +4329,8 @@ shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this)
switch (local->fop) {
case GF_FOP_UNLINK:
case GF_FOP_RENAME:
- shard_lookup_base_file(frame, this, &local->int_inodelk.loc,
- shard_post_lookup_base_shard_rm_handler);
+ shard_refresh_base_file(frame, this, &local->int_inodelk.loc, NULL,
+ shard_post_lookup_base_shard_rm_handler);
break;
default:
gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
@@ -4510,8 +4585,8 @@ shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (local->block_size) {
local->tmp_loc.inode = inode_new(this->itable);
gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid);
- shard_lookup_base_file(frame, this, &local->tmp_loc,
- shard_post_rename_lookup_handler);
+ shard_refresh_base_file(frame, this, &local->tmp_loc, NULL,
+ shard_post_rename_lookup_handler);
} else {
shard_rename_cbk(frame, this);
}
@@ -5150,6 +5225,7 @@ shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this)
local->block_size);
local->num_blocks = local->last_block - local->first_block + 1;
+ GF_ASSERT(local->num_blocks > 0);
local->resolver_base_inode = local->loc.inode;
local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *),
@@ -5246,8 +5322,8 @@ shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
local->loc.inode = inode_ref(fd->inode);
gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_readv_handler);
+ shard_refresh_base_file(frame, this, NULL, fd,
+ shard_post_lookup_readv_handler);
return 0;
err:
shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
@@ -5610,6 +5686,8 @@ shard_common_inode_write_post_resolve_handler(call_frame_t *frame,
shard_common_lookup_shards(
frame, this, local->resolver_base_inode,
shard_common_inode_write_post_lookup_shards_handler);
+ } else if (local->create_count) {
+ shard_common_inode_write_post_lookup_shards_handler(frame, this);
} else {
shard_common_inode_write_do(frame, this);
}
@@ -5640,6 +5718,7 @@ shard_common_inode_write_post_lookup_handler(call_frame_t *frame,
local->last_block = get_highest_block(local->offset, local->total_size,
local->block_size);
local->num_blocks = local->last_block - local->first_block + 1;
+ GF_ASSERT(local->num_blocks > 0);
local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *),
gf_shard_mt_inode_list);
if (!local->inode_list) {
@@ -5648,9 +5727,9 @@ shard_common_inode_write_post_lookup_handler(call_frame_t *frame,
}
gf_msg_trace(this->name, 0,
- "%s: gfid=%s first_block=%" PRIu32
+ "%s: gfid=%s first_block=%" PRIu64
" "
- "last_block=%" PRIu32 " num_blocks=%" PRIu32 " offset=%" PRId64
+ "last_block=%" PRIu64 " num_blocks=%" PRIu64 " offset=%" PRId64
" total_size=%zu flags=%" PRId32 "",
gf_fop_list[local->fop],
uuid_utoa(local->resolver_base_inode->gfid),
@@ -6045,8 +6124,8 @@ shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
local->loc.inode = inode_ref(fd->inode);
gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_fsync_handler);
+ shard_refresh_base_file(frame, this, NULL, fd,
+ shard_post_lookup_fsync_handler);
return 0;
err:
shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM);
@@ -6238,48 +6317,210 @@ shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
}
int32_t
-shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
+shard_modify_and_set_iatt_in_dict(dict_t *xdata, shard_local_t *local,
+ char *key)
{
- int op_errno = EINVAL;
+ int ret = 0;
+ struct iatt *tmpbuf = NULL;
+ struct iatt *stbuf = NULL;
+ data_t *data = NULL;
- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
- GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out);
+ if (!xdata)
+ return 0;
+
+ data = dict_get(xdata, key);
+ if (!data)
+ return 0;
+
+ tmpbuf = data_to_iatt(data, key);
+ stbuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char);
+ if (stbuf == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
}
+ *stbuf = *tmpbuf;
+ stbuf->ia_size = local->prebuf.ia_size;
+ stbuf->ia_blocks = local->prebuf.ia_blocks;
+ ret = dict_set_iatt(xdata, key, stbuf, false);
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+ return 0;
- if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
- dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE);
- dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE);
+err:
+ GF_FREE(stbuf);
+ return -1;
+}
+
+int32_t
+shard_common_remove_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int ret = -1;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto err;
}
- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT);
+ if (ret < 0)
+ goto err;
+
+ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT);
+ if (ret < 0)
+ goto err;
+
+ if (local->fd)
+ SHARD_STACK_UNWIND(fremovexattr, frame, local->op_ret, local->op_errno,
+ xdata);
+ else
+ SHARD_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
+ xdata);
return 0;
-out:
- shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno);
+
+err:
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
return 0;
}
int32_t
-shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
+shard_post_lookup_remove_xattr_handler(call_frame_t *frame, xlator_t *this)
{
- int op_errno = EINVAL;
+ shard_local_t *local = NULL;
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+
+ if (local->fd)
+ STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, local->fd,
+ local->name, local->xattr_req);
+ else
+ STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, &local->loc,
+ local->name, local->xattr_req);
+ return 0;
+}
+
+int32_t
+shard_common_remove_xattr(call_frame_t *frame, xlator_t *this,
+ glusterfs_fop_t fop, loc_t *loc, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int ret = -1;
+ int op_errno = ENOMEM;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+ inode_t *inode = loc ? loc->inode : fd->inode;
+
+ if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) {
+ if (loc)
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name,
+ xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name,
+ xdata);
+ return 0;
+ }
+
+ /* If shard's special xattrs are attempted to be removed,
+ * fail the fop with EPERM (except if the client is gsyncd).
+ */
if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
- GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out);
+ GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, err);
}
+ /* Repeat the same check for bulk-removexattr */
if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE);
dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE);
}
- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ ret = shard_inode_ctx_get_block_size(inode, this, &block_size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get block size from inode ctx of %s",
+ uuid_utoa(inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ if (loc)
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name,
+ xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name,
+ xdata);
+ return 0;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+ local->fop = fop;
+ if (loc) {
+ if (loc_copy(&local->loc, loc) != 0)
+ goto err;
+ }
+
+ if (fd) {
+ local->fd = fd_ref(fd);
+ local->loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+ }
+
+ if (name) {
+ local->name = gf_strdup(name);
+ if (!local->name)
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ shard_refresh_base_file(frame, this, loc, fd,
+ shard_post_lookup_remove_xattr_handler);
return 0;
-out:
- shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno);
+err:
+ shard_common_failure_unwind(fop, frame, -1, op_errno);
+ return 0;
+}
+
+int32_t
+shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ shard_common_remove_xattr(frame, this, GF_FOP_REMOVEXATTR, loc, NULL, name,
+ xdata);
+ return 0;
+}
+
+int32_t
+shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ shard_common_remove_xattr(frame, this, GF_FOP_FREMOVEXATTR, NULL, fd, name,
+ xdata);
return 0;
}
@@ -6360,38 +6601,164 @@ out:
}
int32_t
-shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags, dict_t *xdata)
+shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- int op_errno = EINVAL;
+ int ret = -1;
+ shard_local_t *local = NULL;
- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
- GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out);
+ local = frame->local;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto err;
}
- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT);
+ if (ret < 0)
+ goto err;
+
+ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT);
+ if (ret < 0)
+ goto err;
+
+ if (local->fd)
+ SHARD_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno,
+ xdata);
+ else
+ SHARD_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
+ xdata);
return 0;
-out:
- shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno);
+
+err:
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
return 0;
}
int32_t
-shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags, dict_t *xdata)
+shard_post_lookup_set_xattr_handler(call_frame_t *frame, xlator_t *this)
{
- int op_errno = EINVAL;
+ shard_local_t *local = NULL;
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+
+ if (local->fd)
+ STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, local->fd,
+ local->xattr_req, local->flags, local->xattr_rsp);
+ else
+ STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, &local->loc,
+ local->xattr_req, local->flags, local->xattr_rsp);
+ return 0;
+}
+
+int32_t
+shard_common_set_xattr(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+ loc_t *loc, fd_t *fd, dict_t *dict, int32_t flags,
+ dict_t *xdata)
+{
+ int ret = -1;
+ int op_errno = ENOMEM;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+ inode_t *inode = loc ? loc->inode : fd->inode;
+
+ if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) {
+ if (loc)
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags,
+ xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags,
+ xdata);
+ return 0;
+ }
+
+ /* Sharded or not, if shard's special xattrs are attempted to be set,
+ * fail the fop with EPERM (except if the client is gsyncd.
+ */
if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
- GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out);
+ GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, err);
}
- STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags, xdata);
+ ret = shard_inode_ctx_get_block_size(inode, this, &block_size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get block size from inode ctx of %s",
+ uuid_utoa(inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ if (loc)
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags,
+ xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags,
+ xdata);
+ return 0;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+ local->fop = fop;
+ if (loc) {
+ if (loc_copy(&local->loc, loc) != 0)
+ goto err;
+ }
+
+ if (fd) {
+ local->fd = fd_ref(fd);
+ local->loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+ }
+ local->flags = flags;
+ /* Reusing local->xattr_req and local->xattr_rsp to store the setxattr dict
+ * and the xdata dict
+ */
+ if (dict)
+ local->xattr_req = dict_ref(dict);
+ if (xdata)
+ local->xattr_rsp = dict_ref(xdata);
+
+ shard_refresh_base_file(frame, this, loc, fd,
+ shard_post_lookup_set_xattr_handler);
return 0;
-out:
- shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno);
+err:
+ shard_common_failure_unwind(fop, frame, -1, op_errno);
+ return 0;
+}
+
+int32_t
+shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ shard_common_set_xattr(frame, this, GF_FOP_FSETXATTR, NULL, fd, dict, flags,
+ xdata);
+ return 0;
+}
+
+int32_t
+shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ shard_common_set_xattr(frame, this, GF_FOP_SETXATTR, loc, NULL, dict, flags,
+ xdata);
return 0;
}
@@ -6654,8 +7021,8 @@ shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this,
local->loc.inode = inode_ref(fd->inode);
gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_common_inode_write_post_lookup_handler);
+ shard_refresh_base_file(frame, this, NULL, fd,
+ shard_common_inode_write_post_lookup_handler);
return 0;
out:
shard_common_failure_unwind(fop, frame, -1, ENOMEM);
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
index 04abd62c21c..4fe181b64d5 100644
--- a/xlators/features/shard/src/shard.h
+++ b/xlators/features/shard/src/shard.h
@@ -254,9 +254,9 @@ typedef int32_t (*shard_post_update_size_fop_handler_t)(call_frame_t *frame,
typedef struct shard_local {
int op_ret;
int op_errno;
- int first_block;
- int last_block;
- int num_blocks;
+ uint64_t first_block;
+ uint64_t last_block;
+ uint64_t num_blocks;
int call_count;
int eexist_count;
int create_count;
@@ -318,6 +318,7 @@ typedef struct shard_local {
uint32_t deletion_rate;
gf_boolean_t cleanup_required;
uuid_t base_gfid;
+ char *name;
} shard_local_t;
typedef struct shard_inode_ctx {
diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c
index f44b11c6872..7d09cba3e9c 100644
--- a/xlators/features/trash/src/trash.c
+++ b/xlators/features/trash/src/trash.c
@@ -212,11 +212,11 @@ void
append_time_stamp(char *name, size_t name_size)
{
int i;
- char timestr[64] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
- gf_time_fmt(timestr, sizeof(timestr), time(NULL), gf_timefmt_F_HMS);
+ gf_time_fmt(timestr, sizeof(timestr), gf_time(), gf_timefmt_F_HMS);
/* removing white spaces in timestamp */
for (i = 0; i < strlen(timestr); i++) {
diff --git a/xlators/features/upcall/src/upcall-internal.c b/xlators/features/upcall/src/upcall-internal.c
index 978825f6b56..c641bd6f432 100644
--- a/xlators/features/upcall/src/upcall-internal.c
+++ b/xlators/features/upcall/src/upcall-internal.c
@@ -316,7 +316,7 @@ upcall_reaper_thread(void *data)
priv = this->private;
GF_ASSERT(priv);
- time_now = time(NULL);
+ time_now = gf_time();
while (!priv->fini) {
list_for_each_entry_safe(inode_ctx, tmp, &priv->inode_ctx_list,
inode_ctx_list)
@@ -344,7 +344,7 @@ upcall_reaper_thread(void *data)
/* don't do a very busy loop */
timeout = get_cache_invalidation_timeout(this);
sleep(timeout / 2);
- time_now = time(NULL);
+ time_now = gf_time();
}
return NULL;
@@ -533,7 +533,7 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client,
goto out;
}
- time_now = time(NULL);
+ time_now = gf_time();
pthread_mutex_lock(&up_inode_ctx->client_list_lock);
{
list_for_each_entry_safe(up_client_entry, tmp,
@@ -670,13 +670,13 @@ upcall_cache_forget(xlator_t *this, inode_t *inode,
return;
}
- time_now = time(NULL);
+ time_now = gf_time();
pthread_mutex_lock(&up_inode_ctx->client_list_lock);
{
list_for_each_entry_safe(up_client_entry, tmp,
&up_inode_ctx->client_list, client_list)
{
- /* Set the access time to time(NULL)
+ /* Set the access time to gf_time()
* to send notify */
up_client_entry->access_time = time_now;
diff --git a/xlators/meta/src/meta-helpers.c b/xlators/meta/src/meta-helpers.c
index 8b3d7b2f2f2..cb54f547468 100644
--- a/xlators/meta/src/meta-helpers.c
+++ b/xlators/meta/src/meta-helpers.c
@@ -182,14 +182,15 @@ meta_uuid_copy(uuid_t dst, uuid_t src)
}
static void
-default_meta_iatt_fill(struct iatt *iatt, inode_t *inode, ia_type_t type)
+default_meta_iatt_fill(struct iatt *iatt, inode_t *inode, ia_type_t type,
+ gf_boolean_t is_tunable)
{
struct timeval tv = {};
iatt->ia_type = type;
switch (type) {
case IA_IFDIR:
- iatt->ia_prot = ia_prot_from_st_mode(0755);
+ iatt->ia_prot = ia_prot_from_st_mode(0555);
iatt->ia_nlink = 2;
break;
case IA_IFLNK:
@@ -197,7 +198,7 @@ default_meta_iatt_fill(struct iatt *iatt, inode_t *inode, ia_type_t type)
iatt->ia_nlink = 1;
break;
default:
- iatt->ia_prot = ia_prot_from_st_mode(0644);
+ iatt->ia_prot = ia_prot_from_st_mode(is_tunable ? 0644 : 0444);
iatt->ia_nlink = 1;
break;
}
@@ -225,7 +226,7 @@ meta_iatt_fill(struct iatt *iatt, inode_t *inode, ia_type_t type)
return;
if (!ops->iatt_fill)
- default_meta_iatt_fill(iatt, inode, type);
+ default_meta_iatt_fill(iatt, inode, type, !!ops->file_write);
else
ops->iatt_fill(THIS, inode, iatt);
return;
diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am
index eaa61c435e5..685beb42d27 100644
--- a/xlators/mgmt/glusterd/src/Makefile.am
+++ b/xlators/mgmt/glusterd/src/Makefile.am
@@ -25,13 +25,14 @@ glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \
glusterd-conn-helper.c glusterd-snapd-svc.c glusterd-snapd-svc-helper.c \
glusterd-bitd-svc.c glusterd-scrub-svc.c glusterd-server-quorum.c \
glusterd-reset-brick.c glusterd-shd-svc.c glusterd-shd-svc-helper.c \
- glusterd-gfproxyd-svc.c glusterd-gfproxyd-svc-helper.c glusterd-ganesha.c
+ glusterd-gfproxyd-svc.c glusterd-gfproxyd-svc-helper.c glusterd-ganesha.c \
+ $(CONTRIBDIR)/mount/mntent.c
glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
$(top_builddir)/libglusterd/src/libglusterd.la \
$(top_builddir)/rpc/xdr/src/libgfxdr.la \
$(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
- $(XML_LIBS) -lcrypto $(URCU_LIBS) $(URCU_CDS_LIBS) $(LIB_DL)
+ $(XML_LIBS) -lcrypto $(URCU_LIBS) $(URCU_CDS_LIBS) $(LIB_DL) $(GF_XLATOR_MGNT_LIBADD)
noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \
glusterd-sm.h glusterd-store.h glusterd-mem-types.h \
@@ -46,7 +47,8 @@ noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \
glusterd-scrub-svc.h glusterd-server-quorum.h glusterd-errno.h \
glusterd-shd-svc.h glusterd-shd-svc-helper.h \
glusterd-gfproxyd-svc.h glusterd-gfproxyd-svc-helper.h \
- $(CONTRIBDIR)/userspace-rcu/rculist-extra.h
+ $(CONTRIBDIR)/userspace-rcu/rculist-extra.h \
+ $(CONTRIBDIR)/mount/mntent_compat.h
AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
-I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index 3af2867b82a..e56cd0e6c74 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -21,7 +21,6 @@
#include "glusterd-messages.h"
#include "glusterd-server-quorum.h"
#include <glusterfs/run.h>
-#include "glusterd-volgen.h"
#include <glusterfs/syscall.h>
#include <sys/signal.h>
@@ -183,6 +182,9 @@ gd_rmbr_validate_replica_count(glusterd_volinfo_t *volinfo,
{
int ret = -1;
int replica_nodes = 0;
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
switch (volinfo->type) {
case GF_CLUSTER_TYPE_NONE:
@@ -191,8 +193,8 @@ gd_rmbr_validate_replica_count(glusterd_volinfo_t *volinfo,
"replica count (%d) option given for non replicate "
"volume %s",
replica_count, volinfo->volname);
- gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_REPLICA, "%s",
- err_str);
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ARGUMENT,
+ err_str, NULL);
goto out;
case GF_CLUSTER_TYPE_REPLICATE:
@@ -203,8 +205,8 @@ gd_rmbr_validate_replica_count(glusterd_volinfo_t *volinfo,
"than volume %s's replica count (%d)",
replica_count, volinfo->volname,
volinfo->replica_count);
- gf_msg(THIS->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ENTRY,
- "%s", err_str);
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL,
+ GD_MSG_INVALID_ARGUMENT, err_str, NULL);
goto out;
}
if (replica_count == volinfo->replica_count) {
@@ -218,8 +220,8 @@ gd_rmbr_validate_replica_count(glusterd_volinfo_t *volinfo,
"(or %dxN)",
brick_count, volinfo->dist_leaf_count,
volinfo->dist_leaf_count);
- gf_msg(THIS->name, GF_LOG_WARNING, EINVAL,
- GD_MSG_INVALID_ENTRY, "%s", err_str);
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL,
+ GD_MSG_INVALID_ARGUMENT, err_str, NULL);
goto out;
}
ret = 1;
@@ -234,6 +236,8 @@ gd_rmbr_validate_replica_count(glusterd_volinfo_t *volinfo,
"need %d(xN) bricks for reducing replica "
"count of the volume from %d to %d",
replica_nodes, volinfo->replica_count, replica_count);
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL,
+ GD_MSG_INVALID_ARGUMENT, err_str, NULL);
goto out;
}
break;
@@ -283,6 +287,7 @@ __glusterd_handle_add_brick(rpcsvc_request_t *req)
// failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
snprintf(err_str, sizeof(err_str), "Garbage args received");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
@@ -510,6 +515,8 @@ subvol_matcher_verify(int *subvols, glusterd_volinfo_t *volinfo, char *err_str,
int i = 0;
int ret = 0;
int count = volinfo->replica_count - replica_count;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
if (replica_count && subvols) {
for (i = 0; i < volinfo->subvol_count; i++) {
@@ -519,6 +526,8 @@ subvol_matcher_verify(int *subvols, glusterd_volinfo_t *volinfo, char *err_str,
"Remove exactly %d"
" brick(s) from each subvolume.",
count);
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_BRICK_SUBVOL_VERIFY_FAIL, err_str, NULL);
break;
}
}
@@ -532,6 +541,8 @@ subvol_matcher_verify(int *subvols, glusterd_volinfo_t *volinfo, char *err_str,
ret = -1;
snprintf(err_str, err_len, "Bricks not from same subvol for %s",
vol_type);
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_BRICK_SUBVOL_VERIFY_FAIL, err_str, NULL);
break;
}
} while (++i < volinfo->subvol_count);
@@ -556,6 +567,9 @@ glusterd_remove_brick_validate_arbiters(glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo = NULL;
glusterd_brickinfo_t *last = NULL;
char *arbiter_array = NULL;
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
if (volinfo->type != GF_CLUSTER_TYPE_REPLICATE)
goto out;
@@ -574,6 +588,8 @@ glusterd_remove_brick_validate_arbiters(glusterd_volinfo_t *volinfo,
"Remove arbiter "
"brick(s) only when converting from "
"arbiter to replica 2 subvolume.");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_REMOVE_ARBITER_BRICK, err_str, NULL);
ret = -1;
goto out;
}
@@ -598,6 +614,8 @@ glusterd_remove_brick_validate_arbiters(glusterd_volinfo_t *volinfo,
"Removed bricks "
"must contain arbiter when converting"
" to plain distribute.");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_REMOVE_ARBITER_BRICK, err_str, NULL);
ret = -1;
break;
}
@@ -621,6 +639,7 @@ __glusterd_handle_remove_brick(rpcsvc_request_t *req)
char key[64] = "";
int keylen;
int i = 1;
+ glusterd_conf_t *conf = NULL;
glusterd_volinfo_t *volinfo = NULL;
glusterd_brickinfo_t *brickinfo = NULL;
glusterd_brickinfo_t **brickinfo_list = NULL;
@@ -639,12 +658,15 @@ __glusterd_handle_remove_brick(rpcsvc_request_t *req)
GF_ASSERT(req);
this = THIS;
GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
if (ret < 0) {
// failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
snprintf(err_str, sizeof(err_str), "Received garbage args");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
@@ -835,7 +857,17 @@ __glusterd_handle_remove_brick(rpcsvc_request_t *req)
if (ret)
goto out;
- ret = glusterd_op_begin_synctask(req, GD_OP_REMOVE_BRICK, dict);
+ if (conf->op_version < GD_OP_VERSION_8_0) {
+ gf_msg_debug(this->name, 0,
+ "The cluster is operating at "
+ "version less than %d. remove-brick operation"
+ "falling back to syncop framework.",
+ GD_OP_VERSION_8_0);
+ ret = glusterd_op_begin_synctask(req, GD_OP_REMOVE_BRICK, dict);
+ } else {
+ ret = glusterd_mgmt_v3_initiate_all_phases(req, GD_OP_REMOVE_BRICK,
+ dict);
+ }
out:
if (ret) {
@@ -991,6 +1023,7 @@ glusterd_op_perform_add_bricks(glusterd_volinfo_t *volinfo, int32_t count,
xlator_t *this = NULL;
glusterd_conf_t *conf = NULL;
gf_boolean_t is_valid_add_brick = _gf_false;
+ gf_boolean_t restart_shd = _gf_false;
struct statvfs brickstat = {
0,
};
@@ -1147,6 +1180,15 @@ glusterd_op_perform_add_bricks(glusterd_volinfo_t *volinfo, int32_t count,
if (glusterd_is_volume_replicate(volinfo)) {
if (replica_count && conf->op_version >= GD_OP_VERSION_3_7_10) {
is_valid_add_brick = _gf_true;
+ if (volinfo->status == GLUSTERD_STATUS_STARTED) {
+ ret = volinfo->shd.svc.stop(&(volinfo->shd.svc), SIGTERM);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTER_SERVICES_STOP_FAIL,
+ "Failed to stop shd for %s.", volinfo->volname);
+ }
+ restart_shd = _gf_true;
+ }
ret = generate_dummy_client_volfiles(volinfo);
if (ret) {
gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
@@ -1221,6 +1263,14 @@ generate_volfiles:
out:
GF_FREE(free_ptr1);
GF_FREE(free_ptr2);
+ if (restart_shd) {
+ if (volinfo->shd.svc.manager(&(volinfo->shd.svc), volinfo,
+ PROC_START_NO_WAIT)) {
+ gf_msg("glusterd", GF_LOG_CRITICAL, 0,
+ GD_MSG_GLUSTER_SERVICE_START_FAIL,
+ "Failed to start shd for %s.", volinfo->volname);
+ }
+ }
gf_msg_debug("glusterd", 0, "Returning %d", ret);
return ret;
@@ -1309,14 +1359,14 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
"Unable to get volume name");
goto out;
}
ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
"Unable to find volume: %s", volname);
goto out;
}
@@ -1328,13 +1378,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"),
&replica_count);
if (ret) {
- gf_msg_debug(THIS->name, 0, "Unable to get replica count");
- }
-
- ret = dict_get_int32n(dict, "arbiter-count", SLEN("arbiter-count"),
- &arbiter_count);
- if (ret) {
- gf_msg_debug(THIS->name, 0, "No arbiter count present in the dict");
+ gf_msg_debug(this->name, 0, "Unable to get replica count");
}
if (replica_count > 0) {
@@ -1348,18 +1392,20 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
}
}
- if (glusterd_is_volume_replicate(volinfo)) {
+ glusterd_add_peers_to_auth_list(volname);
+
+ if (replica_count && glusterd_is_volume_replicate(volinfo)) {
/* Do not allow add-brick for stopped volumes when replica-count
* is being increased.
*/
- if (conf->op_version >= GD_OP_VERSION_3_7_10 && replica_count &&
- GLUSTERD_STATUS_STOPPED == volinfo->status) {
+ if (GLUSTERD_STATUS_STOPPED == volinfo->status &&
+ conf->op_version >= GD_OP_VERSION_3_7_10) {
ret = -1;
snprintf(msg, sizeof(msg),
" Volume must not be in"
" stopped state when replica-count needs to "
" be increased.");
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
msg);
*op_errstr = gf_strdup(msg);
goto out;
@@ -1367,25 +1413,31 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
/* op-version check for replica 2 to arbiter conversion. If we
* don't have this check, an older peer added as arbiter brick
* will not have the arbiter xlator in its volfile. */
- if ((conf->op_version < GD_OP_VERSION_3_8_0) && (arbiter_count == 1) &&
- (replica_count == 3)) {
- ret = -1;
- snprintf(msg, sizeof(msg),
- "Cluster op-version must "
- "be >= 30800 to add arbiter brick to a "
- "replica 2 volume.");
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
- msg);
- *op_errstr = gf_strdup(msg);
- goto out;
+ if ((replica_count == 3) && (conf->op_version < GD_OP_VERSION_3_8_0)) {
+ ret = dict_get_int32n(dict, "arbiter-count", SLEN("arbiter-count"),
+ &arbiter_count);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "No arbiter count present in the dict");
+ } else if (arbiter_count == 1) {
+ ret = -1;
+ snprintf(msg, sizeof(msg),
+ "Cluster op-version must "
+ "be >= 30800 to add arbiter brick to a "
+ "replica 2 volume.");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
+ msg);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+ }
}
/* Do not allow increasing replica count for arbiter volumes. */
- if (replica_count && volinfo->arbiter_count) {
+ if (volinfo->arbiter_count) {
ret = -1;
snprintf(msg, sizeof(msg),
"Increasing replica count "
"for arbiter volumes is not supported.");
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
msg);
*op_errstr = gf_strdup(msg);
goto out;
@@ -1394,6 +1446,43 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
is_force = dict_get_str_boolean(dict, "force", _gf_false);
+ /* Check brick order if the volume type is replicate or disperse. If
+ * force at the end of command not given then check brick order.
+ * doing this check at the originator node is sufficient.
+ */
+
+ if (!is_force && is_origin_glusterd(dict)) {
+ ret = 0;
+ if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
+ gf_msg_debug(this->name, 0,
+ "Replicate cluster type "
+ "found. Checking brick order.");
+ if (replica_count)
+ ret = glusterd_check_brick_order(dict, msg, volinfo->type,
+ &volname, &bricks, &count,
+ replica_count);
+ else
+ ret = glusterd_check_brick_order(dict, msg, volinfo->type,
+ &volname, &bricks, &count,
+ volinfo->replica_count);
+ } else if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) {
+ gf_msg_debug(this->name, 0,
+ "Disperse cluster type"
+ " found. Checking brick order.");
+ ret = glusterd_check_brick_order(dict, msg, volinfo->type, &volname,
+ &bricks, &count,
+ volinfo->disperse_count);
+ }
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER,
+ "Not adding brick because of "
+ "bad brick order. %s",
+ msg);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+ }
+ }
+
if (volinfo->replica_count < replica_count && !is_force) {
cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
{
@@ -1410,7 +1499,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
if (len < 0) {
strcpy(msg, "<error>");
}
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
msg);
*op_errstr = gf_strdup(msg);
goto out;
@@ -1442,7 +1531,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
"Volume name %s rebalance is in "
"progress. Please retry after completion",
volname);
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_OIP_RETRY_LATER, "%s", msg);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OIP_RETRY_LATER, "%s", msg);
*op_errstr = gf_strdup(msg);
ret = -1;
goto out;
@@ -1460,18 +1549,22 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
msg[0] = '\0';
}
- ret = dict_get_int32n(dict, "count", SLEN("count"), &count);
- if (ret) {
- gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "Unable to get count");
- goto out;
+ if (!count) {
+ ret = dict_get_int32n(dict, "count", SLEN("count"), &count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get count");
+ goto out;
+ }
}
- ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks);
- if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "Unable to get bricks");
- goto out;
+ if (!bricks) {
+ ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get bricks");
+ goto out;
+ }
}
if (bricks) {
@@ -1490,7 +1583,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
"brick path %s is "
"too long",
brick);
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRKPATH_TOO_LONG, "%s",
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRKPATH_TOO_LONG, "%s",
msg);
*op_errstr = gf_strdup(msg);
@@ -1501,7 +1594,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
ret = glusterd_brickinfo_new_from_brick(brick, &brickinfo, _gf_true,
NULL);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NOT_FOUND,
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NOT_FOUND,
"Add-brick: Unable"
" to get brickinfo");
goto out;
@@ -1571,7 +1664,7 @@ out:
GF_FREE(str_ret);
GF_FREE(all_bricks);
- gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
return ret;
}
@@ -1595,6 +1688,8 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count,
};
glusterd_conf_t *priv = THIS->private;
int pid = -1;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
/* Check whether all the nodes of the bricks to be removed are
* up, if not fail the operation */
@@ -1603,6 +1698,8 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count,
ret = dict_get_strn(dict, key, keylen, &brick);
if (ret) {
snprintf(msg, sizeof(msg), "Unable to get %s", key);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "key=%s", key, NULL);
*errstr = gf_strdup(msg);
goto out;
}
@@ -1614,6 +1711,8 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count,
"Incorrect brick "
"%s for volume %s",
brick, volinfo->volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INCORRECT_BRICK,
+ "Brick=%s, Volume=%s", brick, volinfo->volname, NULL);
*errstr = gf_strdup(msg);
goto out;
}
@@ -1626,6 +1725,8 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count,
"is not decommissioned. "
"Use start or force option",
brick);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BRICK_NOT_DECOM,
+ "Use 'start' or 'force' option, Brick=%s", brick, NULL);
*errstr = gf_strdup(msg);
ret = -1;
goto out;
@@ -1652,6 +1753,10 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count,
"brick %s. Use force option to "
"remove the offline brick",
brick);
+ gf_smsg(
+ this->name, GF_LOG_ERROR, errno, GD_MSG_BRICK_STOPPED,
+ "Use 'force' option to remove the offline brick, Brick=%s",
+ brick, NULL);
*errstr = gf_strdup(msg);
ret = -1;
goto out;
@@ -1662,6 +1767,8 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count,
"Found dead "
"brick %s",
brick);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BRICK_DEAD,
+ "Brick=%s", brick, NULL);
*errstr = gf_strdup(msg);
ret = -1;
goto out;
@@ -1679,6 +1786,8 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count,
"Host node of the "
"brick %s is not in cluster",
brick);
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_BRICK_HOST_NOT_FOUND, "Brick=%s", brick, NULL);
*errstr = gf_strdup(msg);
ret = -1;
goto out;
@@ -1689,6 +1798,8 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count,
"Host node of the "
"brick %s is down",
brick);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_HOST_DOWN,
+ "Brick=%s", brick, NULL);
*errstr = gf_strdup(msg);
ret = -1;
goto out;
@@ -1768,6 +1879,7 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
errstr = gf_strdup(
"Deleting all the bricks of the "
"volume is not allowed");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_DELETE, NULL);
ret = -1;
goto out;
}
@@ -1776,6 +1888,8 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
switch (cmd) {
case GF_OP_CMD_NONE:
errstr = gf_strdup("no remove-brick command issued");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NO_REMOVE_CMD,
+ NULL);
goto out;
case GF_OP_CMD_STATUS:
@@ -1900,6 +2014,8 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
errstr = gf_strdup(
"use 'force' option as migration "
"is in progress");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_MIGRATION_PROG,
+ "Use 'force' option", NULL);
goto out;
}
@@ -1907,6 +2023,8 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
errstr = gf_strdup(
"use 'force' option as migration "
"has failed");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_MIGRATION_FAIL,
+ "Use 'force' option", NULL);
goto out;
}
@@ -1917,6 +2035,11 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
"use 'force' option as migration "
"of some files might have been skipped or "
"has failed");
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_MIGRATION_FAIL,
+ "Use 'force' option, some files might have been "
+ "skipped",
+ NULL);
goto out;
}
}
@@ -2111,6 +2234,119 @@ out:
}
int
+glusterd_post_commit_add_brick(dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+ ret = glusterd_replace_old_auth_allow_list(volname);
+out:
+ return ret;
+}
+
+int
+glusterd_post_commit_replace_brick(dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+ ret = glusterd_replace_old_auth_allow_list(volname);
+out:
+ return ret;
+}
+
+int
+glusterd_set_rebalance_id_for_remove_brick(dict_t *req_dict, dict_t *rsp_dict)
+{
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char msg[2048] = {0};
+ char *task_id_str = NULL;
+ xlator_t *this = NULL;
+ int32_t cmd = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(rsp_dict);
+ GF_ASSERT(req_dict);
+
+ ret = dict_get_strn(rsp_dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "volname not found");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ "Unable to allocate memory");
+ goto out;
+ }
+
+ ret = dict_get_int32n(rsp_dict, "command", SLEN("command"), &cmd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get command");
+ goto out;
+ }
+
+ /* remove brick task id is generted in glusterd_op_stage_remove_brick(),
+ * but rsp_dict is unavailable there. So copying it to rsp_dict from
+ * req_dict here. */
+
+ if (is_origin_glusterd(rsp_dict)) {
+ ret = dict_get_strn(req_dict, GF_REMOVE_BRICK_TID_KEY,
+ SLEN(GF_REMOVE_BRICK_TID_KEY), &task_id_str);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Missing rebalance id for remove-brick");
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_REBALANCE_ID_MISSING,
+ "%s", msg);
+ ret = 0;
+ } else {
+ gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id);
+
+ ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id,
+ rsp_dict, GF_REMOVE_BRICK_TID_KEY,
+ SLEN(GF_REMOVE_BRICK_TID_KEY));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REMOVE_BRICK_ID_SET_FAIL,
+ "Failed to set remove-brick-id");
+ goto out;
+ }
+ }
+ }
+ if (!gf_uuid_is_null(volinfo->rebal.rebalance_id) &&
+ GD_OP_REMOVE_BRICK == volinfo->rebal.op) {
+ ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id, rsp_dict,
+ GF_REMOVE_BRICK_TID_KEY,
+ SLEN(GF_REMOVE_BRICK_TID_KEY));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set task-id for volume %s", volname);
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+int
glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
{
int ret = -1;
@@ -2424,7 +2660,7 @@ out:
GF_FREE(brick_tmpstr);
if (bricks_dict)
dict_unref(bricks_dict);
-
+ gf_msg_debug(this->name, 0, "returning %d ", ret);
return ret;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c
index 09f0a35dc45..5c01f0c70b6 100644
--- a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c
+++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c
@@ -26,12 +26,17 @@ glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout,
xlator_t *this = THIS;
glusterd_svc_t *svc = NULL;
- if (!this)
+ if (!this) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_XLATOR_NOT_DEFINED,
+ NULL);
goto out;
+ }
options = dict_new();
- if (!options)
+ if (!options) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
svc = glusterd_conn_get_svc_object(conn);
if (!svc) {
@@ -46,8 +51,11 @@ glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout,
ret = dict_set_int32n(options, "transport.socket.ignore-enoent",
SLEN("transport.socket.ignore-enoent"), 1);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=transport.socket.ignore-enoent", NULL);
goto out;
+ }
/* @options is free'd by rpc_transport when destroyed */
rpc = rpc_clnt_new(options, this, (char *)svc->name, 16);
@@ -61,9 +69,10 @@ glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout,
goto out;
ret = snprintf(conn->sockpath, sizeof(conn->sockpath), "%s", sockpath);
- if (ret < 0)
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
- else
+ } else
ret = 0;
conn->frame_timeout = frame_timeout;
diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
index cf567fa4172..f08bd6cebee 100644
--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
@@ -421,6 +421,35 @@ check_host_list(void)
}
int
+gd_ganesha_send_dbus(char *volname, char *value)
+{
+ runner_t runner = {
+ 0,
+ };
+ int ret = -1;
+ runinit(&runner);
+
+ GF_VALIDATE_OR_GOTO("glusterd-ganesha", volname, out);
+ GF_VALIDATE_OR_GOTO("glusterd-ganesha", value, out);
+
+ ret = 0;
+ if (check_host_list()) {
+ /* Check whether ganesha is running on this node */
+ if (manage_service("status")) {
+ gf_msg("glusterd-ganesha", GF_LOG_WARNING, 0,
+ GD_MSG_GANESHA_NOT_RUNNING,
+ "Export failed, NFS-Ganesha is not running");
+ } else {
+ runner_add_args(&runner, GANESHA_PREFIX "/dbus-send.sh", CONFDIR,
+ value, volname, NULL);
+ ret = runner_run(&runner);
+ }
+ }
+out:
+ return ret;
+}
+
+int
manage_export_config(char *volname, char *value, char **op_errstr)
{
runner_t runner = {
@@ -447,9 +476,6 @@ int
ganesha_manage_export(dict_t *dict, char *value,
gf_boolean_t update_cache_invalidation, char **op_errstr)
{
- runner_t runner = {
- 0,
- };
int ret = -1;
glusterd_volinfo_t *volinfo = NULL;
dict_t *vol_opts = NULL;
@@ -458,7 +484,6 @@ ganesha_manage_export(dict_t *dict, char *value,
glusterd_conf_t *priv = NULL;
gf_boolean_t option = _gf_false;
- runinit(&runner);
this = THIS;
GF_ASSERT(this);
priv = this->private;
@@ -538,26 +563,13 @@ ganesha_manage_export(dict_t *dict, char *value,
goto out;
}
}
-
- if (check_host_list()) {
- /* Check whether ganesha is running on this node */
- if (manage_service("status")) {
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_GANESHA_NOT_RUNNING,
- "Export failed, NFS-Ganesha is not running");
- } else {
- runner_add_args(&runner, GANESHA_PREFIX "/dbus-send.sh", CONFDIR,
- value, volname, NULL);
- ret = runner_run(&runner);
- if (ret) {
- gf_asprintf(op_errstr,
- "Dynamic export"
- " addition/deletion failed."
- " Please see log file for details");
- goto out;
- }
- }
+ ret = gd_ganesha_send_dbus(volname, value);
+ if (ret) {
+ gf_asprintf(op_errstr,
+ "Dynamic export addition/deletion failed."
+ " Please see log file for details");
+ goto out;
}
-
if (update_cache_invalidation) {
vol_opts = volinfo->dict;
ret = dict_set_dynstr_with_alloc(vol_opts,
@@ -617,8 +629,9 @@ tear_down_cluster(gf_boolean_t run_teardown)
goto out;
}
- GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
- while (entry) {
+ while ((entry = sys_readdir(dir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
snprintf(path, PATH_MAX, "%s/%s", CONFDIR, entry->d_name);
ret = sys_lstat(path, &st);
if (ret == -1) {
@@ -649,7 +662,6 @@ tear_down_cluster(gf_boolean_t run_teardown)
gf_msg_debug(THIS->name, 0, "%s %s",
ret ? "Failed to remove" : "Removed", entry->d_name);
- GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
}
ret = sys_closedir(dir);
diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
index 76b7684538f..bf062c87060 100644
--- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
+++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
@@ -115,13 +115,18 @@ __glusterd_handle_sys_exec(rpcsvc_request_t *req)
ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
if (ret < 0) {
req->rpc_err = GARBAGE_ARGS;
+ snprintf(err_str, sizeof(err_str), "Garbage args received");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
if (cli_req.dict.dict_len) {
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
goto out;
+ }
ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
&dict);
@@ -142,13 +147,18 @@ __glusterd_handle_sys_exec(rpcsvc_request_t *req)
snprintf(err_str, sizeof(err_str),
"Failed to get "
"the uuid of local glusterd");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_UUID_GET_FAIL,
+ NULL);
ret = -1;
goto out;
}
ret = dict_set_dynstr(dict, "host-uuid", host_uuid);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=host-uuid", NULL);
goto out;
+ }
}
ret = glusterd_op_begin_synctask(req, cli_op, dict);
@@ -188,13 +198,18 @@ __glusterd_handle_copy_file(rpcsvc_request_t *req)
ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
if (ret < 0) {
req->rpc_err = GARBAGE_ARGS;
+ snprintf(err_str, sizeof(err_str), "Garbage args received");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
if (cli_req.dict.dict_len) {
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
goto out;
+ }
ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
&dict);
@@ -215,6 +230,8 @@ __glusterd_handle_copy_file(rpcsvc_request_t *req)
snprintf(err_str, sizeof(err_str),
"Failed to get "
"the uuid of local glusterd");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_UUID_GET_FAIL,
+ NULL);
ret = -1;
goto out;
}
@@ -267,13 +284,18 @@ __glusterd_handle_gsync_set(rpcsvc_request_t *req)
ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
if (ret < 0) {
req->rpc_err = GARBAGE_ARGS;
+ snprintf(err_str, sizeof(err_str), "Garbage args received");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
if (cli_req.dict.dict_len) {
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
goto out;
+ }
ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
&dict);
@@ -294,6 +316,8 @@ __glusterd_handle_gsync_set(rpcsvc_request_t *req)
snprintf(err_str, sizeof(err_str),
"Failed to get "
"the uuid of local glusterd");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_UUID_GET_FAIL,
+ NULL);
ret = -1;
goto out;
}
@@ -2251,6 +2275,9 @@ glusterd_op_verify_gsync_running(glusterd_volinfo_t *volinfo, char *slave,
"Volume %s needs to be started "
"before " GEOREP " start",
volinfo->volname);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_GEO_REP_START_FAILED,
+ "Volume is not in a started state, Volname=%s",
+ volinfo->volname, NULL);
goto out;
}
@@ -2555,6 +2582,7 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr)
len = snprintf(abs_filename, sizeof(abs_filename), "%s/%s",
priv->workdir, filename);
if ((len < 0) || (len >= sizeof(abs_filename))) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -2567,6 +2595,9 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr)
if (len < 0) {
strcpy(errmsg, "<error>");
}
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_REALPATH_GET_FAIL,
+ "Realpath=%s, Reason=%s", priv->workdir, strerror(errno),
+ NULL);
*op_errstr = gf_strdup(errmsg);
ret = -1;
goto out;
@@ -2577,6 +2608,8 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr)
"Failed to get "
"realpath of %s: %s",
filename, strerror(errno));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_REALPATH_GET_FAIL,
+ "Filename=%s, Reason=%s", filename, strerror(errno), NULL);
*op_errstr = gf_strdup(errmsg);
ret = -1;
goto out;
@@ -2586,6 +2619,7 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr)
will succeed for /var/lib/glusterd_bad */
len = snprintf(workdir, sizeof(workdir), "%s/", realpath_workdir);
if ((len < 0) || (len >= sizeof(workdir))) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -2599,6 +2633,8 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr)
if (len < 0) {
strcpy(errmsg, "<error>");
}
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SRC_FILE_ERROR, errmsg,
+ NULL);
*op_errstr = gf_strdup(errmsg);
ret = -1;
goto out;
@@ -2613,6 +2649,8 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr)
if (len < 0) {
strcpy(errmsg, "<error>");
}
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SRC_FILE_ERROR, errmsg,
+ NULL);
*op_errstr = gf_strdup(errmsg);
goto out;
}
@@ -2621,9 +2659,9 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr)
snprintf(errmsg, sizeof(errmsg),
"Source file"
" is not a regular file.");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SRC_FILE_ERROR, errmsg,
+ NULL);
*op_errstr = gf_strdup(errmsg);
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SRC_FILE_ERROR, "%s",
- errmsg);
ret = -1;
goto out;
}
@@ -2842,8 +2880,11 @@ glusterd_verify_slave(char *volname, char *slave_url, char *slave_vol,
*/
if (strstr(slave_url, "@")) {
slave_url_buf = gf_strdup(slave_url);
- if (!slave_url_buf)
+ if (!slave_url_buf) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_STRDUP_FAILED,
+ "Slave_url=%s", slave_url, NULL);
goto out;
+ }
slave_user = strtok_r(slave_url_buf, "@", &save_ptr);
slave_ip = strtok_r(NULL, "@", &save_ptr);
diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c
index b01fd4da24b..a0bfea41f0f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c
+++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c
@@ -310,7 +310,7 @@ glusterd_gfproxydsvc_start(glusterd_svc_t *svc, int flags)
}
runinit(&runner);
- if (this->ctx->cmd_args.valgrind) {
+ if (this->ctx->cmd_args.vgtool != _gf_none) {
len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s",
svc->proc.logdir, svc->proc.logfile);
if ((len < 0) || (len >= PATH_MAX)) {
@@ -318,8 +318,13 @@ glusterd_gfproxydsvc_start(glusterd_svc_t *svc, int flags)
goto out;
}
- runner_add_args(&runner, "valgrind", "--leak-check=full",
- "--trace-children=yes", "--track-origins=yes", NULL);
+ if (this->ctx->cmd_args.vgtool == _gf_memcheck)
+ runner_add_args(&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ else
+ runner_add_args(&runner, "valgrind", "--tool=drd", NULL);
+
runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index 6e6593e7263..1b21c40596d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -140,6 +140,7 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
ctx->req = req;
if (!dict) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -147,9 +148,11 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
ret = dict_unserialize(friend_req->vols.vols_val, friend_req->vols.vols_len,
&dict);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
goto out;
- else
+ } else
dict->extra_stdfree = friend_req->vols.vols_val;
ctx->vols = dict;
@@ -386,82 +389,129 @@ glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes,
keylen = snprintf(key, sizeof(key), "volume%d.name", count);
ret = dict_set_strn(volumes, key, keylen, volinfo->volname);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.type", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->type);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.status", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->status);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.brick_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->brick_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.dist_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->dist_leaf_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.stripe_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->stripe_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.replica_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->replica_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.disperse_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->disperse_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.redundancy_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->redundancy_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.arbiter_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->arbiter_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.transport", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->transport_type);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.thin_arbiter_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->thin_arbiter_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
volume_id_str = gf_strdup(uuid_utoa(volinfo->volume_id));
- if (!volume_id_str)
+ if (!volume_id_str) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.volume_id", count);
ret = dict_set_dynstrn(volumes, key, keylen, volume_id_str);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.rebalance", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->rebal.defrag_cmd);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.snap_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->snap_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
{
@@ -474,23 +524,33 @@ glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes,
len = snprintf(brick, sizeof(brick), "%s:%s", brickinfo->hostname,
brickinfo->path);
if ((len < 0) || (len >= sizeof(brick))) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
buf = gf_strdup(brick);
keylen = snprintf(key, sizeof(key), "volume%d.brick%d", count, i);
ret = dict_set_dynstrn(volumes, key, keylen, buf);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.brick%d.uuid", count, i);
snprintf(brick_uuid, sizeof(brick_uuid), "%s",
uuid_utoa(brickinfo->uuid));
buf = gf_strdup(brick_uuid);
- if (!buf)
+ if (!buf) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "brick_uuid=%s", brick_uuid, NULL);
goto out;
+ }
ret = dict_set_dynstrn(volumes, key, keylen, buf);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
i++;
}
@@ -500,6 +560,7 @@ glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes,
len = snprintf(ta_brick, sizeof(ta_brick), "%s:%s",
ta_brickinfo->hostname, ta_brickinfo->path);
if ((len < 0) || (len >= sizeof(ta_brick))) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -507,16 +568,23 @@ glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes,
keylen = snprintf(key, sizeof(key), "volume%d.thin_arbiter_brick",
count);
ret = dict_set_dynstrn(volumes, key, keylen, buf);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
}
ret = glusterd_add_arbiter_info_to_bricks(volinfo, volumes, count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ARBITER_BRICK_SET_INFO_FAIL, NULL);
goto out;
+ }
dict = volinfo->dict;
if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = 0;
goto out;
}
@@ -812,11 +880,14 @@ glusterd_req_ctx_create(rpcsvc_request_t *rpc_req, int op, uuid_t uuid,
gf_msg_debug(this->name, 0, "Received op from uuid %s", str);
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
req_ctx = GF_CALLOC(1, sizeof(*req_ctx), mem_type);
if (!req_ctx) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
}
@@ -824,8 +895,8 @@ glusterd_req_ctx_create(rpcsvc_request_t *rpc_req, int op, uuid_t uuid,
req_ctx->op = op;
ret = dict_unserialize(buf_val, buf_len, &dict);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
- "failed to unserialize the dictionary");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
goto out;
}
@@ -1601,6 +1672,8 @@ __glusterd_handle_cli_uuid_get(rpcsvc_request_t *req)
if (cli_req.dict.dict_len) {
dict = dict_new();
if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
ret = -1;
goto out;
}
@@ -1623,6 +1696,7 @@ __glusterd_handle_cli_uuid_get(rpcsvc_request_t *req)
rsp_dict = dict_new();
if (!rsp_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -1639,9 +1713,8 @@ __glusterd_handle_cli_uuid_get(rpcsvc_request_t *req)
ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
&rsp.dict.dict_len);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "Failed to serialize "
- "dictionary.");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
}
ret = 0;
@@ -1694,8 +1767,10 @@ __glusterd_handle_cli_list_volume(rpcsvc_request_t *req)
GF_ASSERT(priv);
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
{
@@ -1707,8 +1782,11 @@ __glusterd_handle_cli_list_volume(rpcsvc_request_t *req)
}
ret = dict_set_int32n(dict, "count", SLEN("count"), count);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=count", NULL);
goto out;
+ }
ret = dict_allocate_and_serialize(dict, &rsp.dict.dict_val,
&rsp.dict.dict_len);
@@ -1790,6 +1868,8 @@ __glusterd_handle_ganesha_cmd(rpcsvc_request_t *req)
/* Unserialize the dictionary */
dict = dict_new();
if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
ret = -1;
goto out;
}
@@ -2158,9 +2238,8 @@ glusterd_fsm_log_send_resp(rpcsvc_request_t *req, int op_ret, char *op_errstr,
ret = dict_allocate_and_serialize(dict, &rsp.fsm_log.fsm_log_val,
&rsp.fsm_log.fsm_log_len);
if (ret < 0) {
- gf_msg("glusterd", GF_LOG_ERROR, 0,
- GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "failed to get serialized length of dict");
+ gf_smsg("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
return ret;
}
}
@@ -2206,6 +2285,7 @@ __glusterd_handle_fsm_log(rpcsvc_request_t *req)
dict = dict_new();
if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -2432,8 +2512,8 @@ glusterd_op_stage_send_resp(rpcsvc_request_t *req, int32_t op, int32_t status,
ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
&rsp.dict.dict_len);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "failed to get serialized length of dict");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
return ret;
}
@@ -2472,9 +2552,8 @@ glusterd_op_commit_send_resp(rpcsvc_request_t *req, int32_t op, int32_t status,
ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
&rsp.dict.dict_len);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "failed to get serialized length of dict");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
}
}
@@ -2715,12 +2794,18 @@ __glusterd_handle_friend_update(rpcsvc_request_t *req)
}
ret = dict_get_int32n(dict, "count", SLEN("count"), &count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=count", NULL);
goto out;
+ }
ret = dict_get_int32n(dict, "op", SLEN("op"), &op);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=op", NULL);
goto out;
+ }
if (GD_FRIEND_UPDATE_DEL == op) {
(void)glusterd_handle_friend_update_delete(dict);
@@ -2979,8 +3064,11 @@ __glusterd_handle_cli_profile_volume(rpcsvc_request_t *req)
if (cli_req.dict.dict_len > 0) {
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
goto out;
+ }
dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, &dict);
}
@@ -3207,6 +3295,7 @@ __glusterd_handle_umount(rpcsvc_request_t *req)
/* check if it is allowed to umount path */
path = gf_strdup(umnt_req.path);
if (!path) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, NULL);
rsp.op_errno = ENOMEM;
goto out;
}
@@ -3414,12 +3503,16 @@ glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo,
char *af = NULL;
peerctx = GF_CALLOC(1, sizeof(*peerctx), gf_gld_mt_peerctx_t);
- if (!peerctx)
+ if (!peerctx) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
+ }
options = dict_new();
- if (!options)
+ if (!options) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
if (args)
peerctx->args = *args;
@@ -3513,6 +3606,7 @@ glusterd_friend_add(const char *hoststr, int port,
*friend = glusterd_peerinfo_new(state, uuid, hoststr, port);
if (*friend == NULL) {
ret = -1;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_ADD_FAIL, NULL);
goto out;
}
@@ -4090,13 +4184,15 @@ glusterd_list_friends(rpcsvc_request_t *req, dict_t *dict, int32_t flags)
};
int keylen;
- priv = THIS->private;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
GF_ASSERT(priv);
friends = dict_new();
if (!friends) {
- gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
- "Out of Memory");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
}
@@ -4122,24 +4218,36 @@ unlock:
keylen = snprintf(key, sizeof(key), "friend%d.uuid", count);
uuid_utoa_r(MY_UUID, my_uuid_str);
ret = dict_set_strn(friends, key, keylen, my_uuid_str);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "friend%d.hostname", count);
ret = dict_set_nstrn(friends, key, keylen, "localhost",
SLEN("localhost"));
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "friend%d.connected", count);
ret = dict_set_int32n(friends, key, keylen, 1);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
}
ret = dict_set_int32n(friends, "count", SLEN("count"), count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=count", NULL);
goto out;
+ }
ret = dict_allocate_and_serialize(friends, &rsp.friends.friends_val,
&rsp.friends.friends_len);
@@ -4311,8 +4419,11 @@ __glusterd_handle_status_volume(rpcsvc_request_t *req)
if (cli_req.dict.dict_len > 0) {
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
goto out;
+ }
ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
&dict);
if (ret < 0) {
@@ -4580,6 +4691,7 @@ __glusterd_handle_barrier(rpcsvc_request_t *req)
dict = dict_new();
if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -5114,12 +5226,17 @@ glusterd_print_gsync_status_by_vol(FILE *fp, glusterd_volinfo_t *volinfo)
0,
};
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
GF_VALIDATE_OR_GOTO(THIS->name, volinfo, out);
GF_VALIDATE_OR_GOTO(THIS->name, fp, out);
gsync_rsp_dict = dict_new();
- if (!gsync_rsp_dict)
+ if (!gsync_rsp_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
ret = gethostname(my_hostname, sizeof(my_hostname));
if (ret) {
@@ -5146,7 +5263,7 @@ glusterd_print_snapinfo_by_vol(FILE *fp, glusterd_volinfo_t *volinfo,
glusterd_volinfo_t *tmp_vol = NULL;
glusterd_snap_t *snapinfo = NULL;
int snapcount = 0;
- char timestr[64] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
char snap_status_str[STATUS_STRLEN] = {
@@ -5264,16 +5381,25 @@ glusterd_print_client_details(FILE *fp, dict_t *dict,
ret = dict_set_strn(dict, "brick-name", SLEN("brick-name"),
brickinfo->path);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=brick-name", NULL);
goto out;
+ }
ret = dict_set_int32n(dict, "cmd", SLEN("cmd"), GF_CLI_STATUS_CLIENTS);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cmd", NULL);
goto out;
+ }
ret = dict_set_strn(dict, "volname", SLEN("volname"), volinfo->volname);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=volname", NULL);
goto out;
+ }
ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val,
&brick_req->input.input_len);
@@ -5467,7 +5593,7 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
ret = dict_get_strn(dict, "filename", SLEN("filename"), &tmp_str);
if (ret) {
- now = time(NULL);
+ now = gf_time();
strftime(timestamp, sizeof(timestamp), "%Y%m%d_%H%M%S",
localtime(&now));
gf_asprintf(&filename, "%s_%s", "glusterd_state", timestamp);
@@ -5911,14 +6037,27 @@ get_brickinfo_from_brickid(char *brickid, glusterd_brickinfo_t **brickinfo)
uuid_t volid = {0};
int ret = -1;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
brickid_dup = gf_strdup(brickid);
- if (!brickid_dup)
+ if (!brickid_dup) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "brick_id=%s", brickid, NULL);
goto out;
+ }
volid_str = brickid_dup;
brick = strchr(brickid_dup, ':');
- if (!volid_str || !brick)
+ if (!volid_str) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
goto out;
+ }
+
+ if (!brick) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
+ goto out;
+ }
*brick = '\0';
brick++;
diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c
index 7cb70fcb4e2..d96e35503dd 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handshake.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c
@@ -111,6 +111,8 @@ get_snap_volname_and_volinfo(const char *volpath, char **volname,
volfile_token = strtok_r(NULL, "/", &save_ptr);
*volname = gf_strdup(volfile_token);
if (NULL == *volname) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "Volname=%s", volfile_token, NULL);
ret = -1;
goto out;
}
@@ -236,6 +238,7 @@ build_volfile_path(char *volume_id, char *path, size_t path_len,
if (volid_ptr) {
volid_ptr = strchr(volid_ptr, '/');
if (!volid_ptr) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
ret = -1;
goto out;
}
@@ -256,6 +259,7 @@ build_volfile_path(char *volume_id, char *path, size_t path_len,
if (volid_ptr) {
volid_ptr = strchr(volid_ptr, '/');
if (!volid_ptr) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
ret = -1;
goto out;
}
@@ -271,6 +275,7 @@ build_volfile_path(char *volume_id, char *path, size_t path_len,
if (volid_ptr) {
volid_ptr = strchr(volid_ptr, '/');
if (!volid_ptr) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
ret = -1;
goto out;
}
@@ -292,6 +297,7 @@ build_volfile_path(char *volume_id, char *path, size_t path_len,
if (volid_ptr) {
volid_ptr = strchr(volid_ptr, '/');
if (!volid_ptr) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
ret = -1;
goto out;
}
@@ -312,6 +318,7 @@ build_volfile_path(char *volume_id, char *path, size_t path_len,
if (volid_ptr) {
volid_ptr = strchr(volid_ptr, '/');
if (!volid_ptr) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
ret = -1;
goto out;
}
@@ -366,6 +373,7 @@ build_volfile_path(char *volume_id, char *path, size_t path_len,
if (volid_ptr) {
volid_ptr = strchr(volid_ptr, '/');
if (!volid_ptr) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
ret = -1;
goto out;
}
@@ -386,6 +394,7 @@ build_volfile_path(char *volume_id, char *path, size_t path_len,
if (volid_ptr) {
volid_ptr = strchr(volid_ptr, '/');
if (!volid_ptr) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
ret = -1;
goto out;
}
@@ -402,6 +411,8 @@ build_volfile_path(char *volume_id, char *path, size_t path_len,
/* Split the volume name */
vol = strtok_r(dup_volname, ".", &save_ptr);
if (!vol) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SPLIT_FAIL,
+ "Volume name=%s", dup_volname, NULL);
ret = -1;
goto out;
}
@@ -446,18 +457,25 @@ build_volfile_path(char *volume_id, char *path, size_t path_len,
if (ret) {
dup_volname = gf_strdup(volid_ptr);
if (!dup_volname) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "Volume name=%s", volid_ptr, NULL);
ret = -1;
goto out;
}
/* Split the volume name */
vol = strtok_r(dup_volname, ".", &save_ptr);
if (!vol) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SPLIT_FAIL,
+ "Volume name=%s", dup_volname, NULL);
ret = -1;
goto out;
}
ret = glusterd_volinfo_find(vol, &volinfo);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL,
+ NULL);
goto out;
+ }
}
gotvolinfo:
@@ -466,8 +484,10 @@ gotvolinfo:
ret = snprintf(path, path_len, "%s/%s/%s.vol", path_prefix,
volinfo->volname, volid_ptr);
- if (ret == -1)
+ if (ret == -1) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
+ }
ret = sys_stat(path, &stbuf);
@@ -522,12 +542,14 @@ glusterd_get_args_from_dict(gf_getspec_req *args, peer_info_t *peerinfo,
GF_ASSERT(peerinfo);
if (!args->xdata.xdata_len) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
ret = 0;
goto out;
}
dict = dict_new();
if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -561,6 +583,8 @@ glusterd_get_args_from_dict(gf_getspec_req *args, peer_info_t *peerinfo,
}
*brick_name = gf_strdup(name);
if (*brick_name == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "Brick_name=%s", name, NULL);
ret = -1;
goto out;
}
@@ -976,6 +1000,7 @@ __server_getspec(rpcsvc_request_t *req)
dict = dict_new();
if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -ENOMEM;
goto fail;
}
@@ -1037,10 +1062,11 @@ __server_getspec(rpcsvc_request_t *req)
}
RCU_READ_UNLOCK;
if (peer_cnt) {
- ret = dict_set_str(dict, GLUSTERD_BRICK_SERVERS, peer_hosts);
- if (ret) {
+ op_ret = dict_set_str(dict, GLUSTERD_BRICK_SERVERS, peer_hosts);
+ if (op_ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
"failed to set peer_host in dict");
+ ret = op_ret;
goto fail;
}
}
@@ -1050,9 +1076,8 @@ __server_getspec(rpcsvc_request_t *req)
ret = dict_allocate_and_serialize(dict, &rsp.xdata.xdata_val,
&rsp.xdata.xdata_len);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "Failed to serialize dict to request buffer");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto fail;
}
}
@@ -1073,6 +1098,7 @@ __server_getspec(rpcsvc_request_t *req)
}
ret = file_len = stbuf.st_size;
} else {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_NOT_FOUND, NULL);
op_errno = ENOENT;
goto fail;
}
@@ -1080,6 +1106,7 @@ __server_getspec(rpcsvc_request_t *req)
if (file_len) {
rsp.spec = CALLOC(file_len + 1, sizeof(char));
if (!rsp.spec) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
ret = -1;
op_errno = ENOMEM;
goto fail;
@@ -1158,13 +1185,17 @@ __server_event_notify(rpcsvc_request_t *req)
(xdrproc_t)xdr_gf_event_notify_req);
if (ret < 0) {
req->rpc_err = GARBAGE_ARGS;
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto fail;
}
if (args.dict.dict_len) {
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
return ret;
+ }
ret = dict_unserialize(args.dict.dict_val, args.dict.dict_len, &dict);
if (ret) {
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
@@ -1357,6 +1388,7 @@ __glusterd_mgmt_hndsk_versions(rpcsvc_request_t *req)
if (ret < 0) {
// failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
@@ -1370,8 +1402,10 @@ __glusterd_mgmt_hndsk_versions(rpcsvc_request_t *req)
}
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
ret = dict_set_int32(dict, GD_OP_VERSION_KEY, conf->op_version);
if (ret) {
@@ -1457,6 +1491,7 @@ __glusterd_mgmt_hndsk_versions_ack(rpcsvc_request_t *req)
if (ret < 0) {
// failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
@@ -1529,22 +1564,25 @@ __server_get_volume_info(rpcsvc_request_t *req)
char *volume_id_str = NULL;
int32_t flags = 0;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
ret = xdr_to_generic(req->msg[0], &vol_info_req,
(xdrproc_t)xdr_gf_get_volume_info_req);
if (ret < 0) {
/* failed to decode msg */
req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
- gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_VOL_INFO_REQ_RECVD,
- "Received get volume info req");
+ gf_smsg(this->name, GF_LOG_INFO, 0, GD_MSG_VOL_INFO_REQ_RECVD, NULL);
if (vol_info_req.dict.dict_len) {
/* Unserialize the dictionary */
dict = dict_new();
if (!dict) {
- gf_msg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_NO_MEMORY,
- "Out of Memory");
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
op_errno = ENOMEM;
ret = -1;
goto out;
@@ -1553,9 +1591,8 @@ __server_get_volume_info(rpcsvc_request_t *req)
ret = dict_unserialize(vol_info_req.dict.dict_val,
vol_info_req.dict.dict_len, &dict);
if (ret < 0) {
- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
- "failed to "
- "unserialize req-buffer to dictionary");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
op_errno = -ret;
ret = -1;
goto out;
@@ -1566,8 +1603,8 @@ __server_get_volume_info(rpcsvc_request_t *req)
ret = dict_get_int32(dict, "flags", &flags);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, -ret, GD_MSG_DICT_GET_FAILED,
- "failed to get flags");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=flags", NULL);
op_errno = -ret;
ret = -1;
goto out;
@@ -1575,13 +1612,15 @@ __server_get_volume_info(rpcsvc_request_t *req)
if (!flags) {
/* Nothing to query about. Just return success */
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_NO_FLAG_SET, "No flags set");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_FLAG_SET, NULL);
ret = 0;
goto out;
}
ret = dict_get_str(dict, "volname", &volname);
if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=volname", NULL);
op_errno = EINVAL;
ret = -1;
goto out;
@@ -1589,6 +1628,8 @@ __server_get_volume_info(rpcsvc_request_t *req)
ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL,
+ "Volname=%s", volname, NULL);
op_errno = EINVAL;
ret = -1;
goto out;
@@ -1597,6 +1638,8 @@ __server_get_volume_info(rpcsvc_request_t *req)
if (flags & (int32_t)GF_GET_VOLUME_UUID) {
volume_id_str = gf_strdup(uuid_utoa(volinfo->volume_id));
if (!volume_id_str) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ NULL);
op_errno = ENOMEM;
ret = -1;
goto out;
@@ -1604,8 +1647,8 @@ __server_get_volume_info(rpcsvc_request_t *req)
dict_rsp = dict_new();
if (!dict_rsp) {
- gf_msg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_NO_MEMORY,
- "Out of Memory");
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
op_errno = ENOMEM;
GF_FREE(volume_id_str);
ret = -1;
@@ -1613,6 +1656,8 @@ __server_get_volume_info(rpcsvc_request_t *req)
}
ret = dict_set_dynstr(dict_rsp, "volume_id", volume_id_str);
if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=volume_id", NULL);
op_errno = -ret;
ret = -1;
goto out;
@@ -1621,6 +1666,8 @@ __server_get_volume_info(rpcsvc_request_t *req)
ret = dict_allocate_and_serialize(dict_rsp, &vol_info_rsp.dict.dict_val,
&vol_info_rsp.dict.dict_len);
if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
op_errno = -ret;
ret = -1;
goto out;
@@ -1686,6 +1733,8 @@ __server_get_snap_info(rpcsvc_request_t *req)
if (snap_info_req.dict.dict_len) {
dict = dict_new();
if (!dict) {
+ gf_smsg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
op_errno = ENOMEM;
ret = -1;
goto out;
@@ -1716,6 +1765,8 @@ __server_get_snap_info(rpcsvc_request_t *req)
dict_rsp = dict_new();
if (!dict_rsp) {
+ gf_smsg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
op_errno = ENOMEM;
ret = -1;
goto out;
@@ -1908,22 +1959,45 @@ gd_validate_peer_op_version(xlator_t *this, glusterd_peerinfo_t *peerinfo,
int32_t peer_min_op_version = 0;
int32_t peer_max_op_version = 0;
- if (!dict || !this || !peerinfo)
+ if (!dict) {
+ gf_smsg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
goto out;
+ }
+
+ if (!this) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_XLATOR_NOT_DEFINED,
+ NULL);
+ goto out;
+ }
+
+ if (!peerinfo) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
+ goto out;
+ }
conf = this->private;
ret = dict_get_int32(dict, GD_OP_VERSION_KEY, &peer_op_version);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", GD_OP_VERSION_KEY, NULL);
goto out;
+ }
ret = dict_get_int32(dict, GD_MAX_OP_VERSION_KEY, &peer_max_op_version);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", GD_MAX_OP_VERSION_KEY, NULL);
goto out;
+ }
ret = dict_get_int32(dict, GD_MIN_OP_VERSION_KEY, &peer_min_op_version);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", GD_MIN_OP_VERSION_KEY, NULL);
goto out;
+ }
ret = -1;
/* Check if peer can support our op_version */
@@ -2189,14 +2263,20 @@ glusterd_mgmt_handshake(xlator_t *this, glusterd_peerctx_t *peerctx)
int ret = -1;
frame = create_frame(this, this->ctx->pool);
- if (!frame)
+ if (!frame) {
+ gf_smsg("glusterd", GF_LOG_WARNING, errno, GD_MSG_FRAME_CREATE_FAIL,
+ NULL);
goto out;
+ }
frame->local = peerctx;
req_dict = dict_new();
- if (!req_dict)
+ if (!req_dict) {
+ gf_smsg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
goto out;
+ }
ret = dict_set_dynstr(req_dict, GD_PEER_ID_KEY,
gf_strdup(uuid_utoa(MY_UUID)));
@@ -2463,12 +2543,17 @@ glusterd_peer_dump_version(xlator_t *this, struct rpc_clnt *rpc,
int ret = -1;
frame = create_frame(this, this->ctx->pool);
- if (!frame)
+ if (!frame) {
+ gf_smsg(this->name, GF_LOG_WARNING, errno, GD_MSG_FRAME_CREATE_FAIL,
+ NULL);
goto out;
+ }
frame->local = peerctx;
- if (!peerctx)
+ if (!peerctx) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
RCU_READ_LOCK;
diff --git a/xlators/mgmt/glusterd/src/glusterd-hooks.c b/xlators/mgmt/glusterd/src/glusterd-hooks.c
index 511a102d016..61c0f1c946f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-hooks.c
+++ b/xlators/mgmt/glusterd/src/glusterd-hooks.c
@@ -87,21 +87,24 @@ glusterd_hooks_create_hooks_directory(char *basedir)
glusterd_conf_t *priv = NULL;
int32_t len = 0;
- priv = THIS->private;
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
snprintf(path, sizeof(path), "%s/hooks", basedir);
ret = mkdir_p(path, 0755, _gf_true);
if (ret) {
- gf_msg(THIS->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED,
- "Unable to create %s", path);
+ gf_smsg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED,
+ "Path=%s", path, NULL);
goto out;
}
GLUSTERD_GET_HOOKS_DIR(version_dir, GLUSTERD_HOOK_VER, priv);
ret = mkdir_p(version_dir, 0755, _gf_true);
if (ret) {
- gf_msg(THIS->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED,
- "Unable to create %s", version_dir);
+ gf_smsg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED,
+ "Directory=%s", version_dir, NULL);
goto out;
}
@@ -112,13 +115,14 @@ glusterd_hooks_create_hooks_directory(char *basedir)
len = snprintf(path, sizeof(path), "%s/%s", version_dir, cmd_subdir);
if ((len < 0) || (len >= sizeof(path))) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
ret = mkdir_p(path, 0755, _gf_true);
if (ret) {
- gf_msg(THIS->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED,
- "Unable to create %s", path);
+ gf_smsg(this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED, "Path=%s", path, NULL);
goto out;
}
@@ -126,13 +130,15 @@ glusterd_hooks_create_hooks_directory(char *basedir)
len = snprintf(path, sizeof(path), "%s/%s/%s", version_dir,
cmd_subdir, type_subdir[type]);
if ((len < 0) || (len >= sizeof(path))) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL,
+ NULL);
ret = -1;
goto out;
}
ret = mkdir_p(path, 0755, _gf_true);
if (ret) {
- gf_msg(THIS->name, GF_LOG_CRITICAL, errno,
- GD_MSG_CREATE_DIR_FAILED, "Unable to create %s", path);
+ gf_smsg(this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED, "Path=%s", path, NULL);
goto out;
}
}
@@ -200,20 +206,31 @@ glusterd_hooks_set_volume_args(dict_t *dict, runner_t *runner)
int i = 0;
int count = 0;
int ret = -1;
+ int flag = 0;
char query[1024] = {
0,
};
char *key = NULL;
char *value = NULL;
+ char *inet_family = NULL;
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
ret = dict_get_int32(dict, "count", &count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=count", NULL);
goto out;
+ }
/* This will not happen unless op_ctx
* is corrupted*/
- if (!count)
+ if (!count) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ENTRY, "count",
+ NULL);
goto out;
+ }
runner_add_arg(runner, "-o");
for (i = 1; ret == 0; i++) {
@@ -228,9 +245,23 @@ glusterd_hooks_set_volume_args(dict_t *dict, runner_t *runner)
continue;
runner_argprintf(runner, "%s=%s", key, value);
+ if ((strncmp(key, "cluster.enable-shared-storage",
+ SLEN("cluster.enable-shared-storage")) == 0 ||
+ strncmp(key, "enable-shared-storage",
+ SLEN("enable-shared-storage")) == 0) &&
+ strncmp(value, "enable", SLEN("enable")) == 0)
+ flag = 1;
}
glusterd_hooks_add_custom_args(dict, runner);
+ if (flag == 1) {
+ ret = dict_get_str_sizen(this->options, "transport.address-family",
+ &inet_family);
+ if (!ret) {
+ runner_argprintf(runner, "transport.address-family=%s",
+ inet_family);
+ }
+ }
ret = 0;
out:
@@ -357,27 +388,31 @@ glusterd_hooks_run_hooks(char *hooks_path, glusterd_op_t op, dict_t *op_ctx,
lines = GF_CALLOC(1, N * sizeof(*lines), gf_gld_mt_charptr);
if (!lines) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
ret = -1;
goto out;
}
ret = -1;
line_count = 0;
- GF_SKIP_IRRELEVANT_ENTRIES(entry, hookdir, scratch);
- while (entry) {
+
+ while ((entry = sys_readdir(hookdir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
if (line_count == N - 1) {
N *= 2;
lines = GF_REALLOC(lines, N * sizeof(char *));
- if (!lines)
+ if (!lines) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
+ }
}
if (glusterd_is_hook_enabled(entry->d_name)) {
lines[line_count] = gf_strdup(entry->d_name);
line_count++;
}
-
- GF_SKIP_IRRELEVANT_ENTRIES(entry, hookdir, scratch);
}
lines[line_count] = NULL;
@@ -461,31 +496,40 @@ glusterd_hooks_stub_init(glusterd_hooks_stub_t **stub, char *scriptdir,
int ret = -1;
glusterd_hooks_stub_t *hooks_stub = NULL;
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
GF_ASSERT(stub);
if (!stub)
goto out;
hooks_stub = GF_CALLOC(1, sizeof(*hooks_stub), gf_gld_mt_hooks_stub_t);
- if (!hooks_stub)
+ if (!hooks_stub) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
+ }
CDS_INIT_LIST_HEAD(&hooks_stub->all_hooks);
hooks_stub->op = op;
hooks_stub->scriptdir = gf_strdup(scriptdir);
- if (!hooks_stub->scriptdir)
+ if (!hooks_stub->scriptdir) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "scriptdir=%s", scriptdir, NULL);
goto out;
+ }
hooks_stub->op_ctx = dict_copy_with_ref(op_ctx, hooks_stub->op_ctx);
- if (!hooks_stub->op_ctx)
+ if (!hooks_stub->op_ctx) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_COPY_FAIL, NULL);
goto out;
+ }
*stub = hooks_stub;
ret = 0;
out:
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_POST_HOOK_STUB_INIT_FAIL,
- "Failed to initialize "
- "post hooks stub");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_HOOK_STUB_INIT_FAIL,
+ NULL);
glusterd_hooks_stub_cleanup(hooks_stub);
}
@@ -547,12 +591,20 @@ glusterd_hooks_priv_init(glusterd_hooks_private_t **new)
int ret = -1;
glusterd_hooks_private_t *hooks_priv = NULL;
- if (!new)
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
+
+ if (!new) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
hooks_priv = GF_CALLOC(1, sizeof(*hooks_priv), gf_gld_mt_hooks_priv_t);
- if (!hooks_priv)
+ if (!hooks_priv) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
+ }
pthread_mutex_init(&hooks_priv->mutex, NULL);
pthread_cond_init(&hooks_priv->cond, NULL);
diff --git a/xlators/mgmt/glusterd/src/glusterd-log-ops.c b/xlators/mgmt/glusterd/src/glusterd-log-ops.c
index a48923e26e1..34abf35cb00 100644
--- a/xlators/mgmt/glusterd/src/glusterd-log-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-log-ops.c
@@ -43,6 +43,7 @@ __glusterd_handle_log_rotate(rpcsvc_request_t *req)
if (ret < 0) {
// failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
@@ -75,7 +76,7 @@ __glusterd_handle_log_rotate(rpcsvc_request_t *req)
"for volume %s",
volname);
- ret = dict_set_uint64(dict, "rotate-key", (uint64_t)time(NULL));
+ ret = dict_set_uint64(dict, "rotate-key", (uint64_t)gf_time());
if (ret)
goto out;
@@ -138,6 +139,8 @@ glusterd_op_stage_log_rotate(dict_t *dict, char **op_errstr)
/* If no brick is specified, do log-rotate for
all the bricks in the volume */
if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=brick", NULL);
ret = 0;
goto out;
}
@@ -204,8 +207,11 @@ glusterd_op_log_rotate(dict_t *dict)
ret = dict_get_str(dict, "brick", &brick);
/* If no brick is specified, do log-rotate for
all the bricks in the volume */
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=brick", NULL);
goto cont;
+ }
ret = glusterd_brickinfo_new_from_brick(brick, &tmpbrkinfo, _gf_false,
NULL);
diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h
index 17052cee263..d7257e1a7b5 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h
+++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h
@@ -27,6 +27,7 @@ typedef enum gf_gld_mem_types_ {
gf_gld_mt_mop_stage_req_t,
gf_gld_mt_probe_ctx_t,
gf_gld_mt_glusterd_volinfo_t,
+ gf_gld_mt_volinfo_dict_data_t,
gf_gld_mt_glusterd_brickinfo_t,
gf_gld_mt_peer_hostname_t,
gf_gld_mt_defrag_info,
diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h
index 435a43df85d..3a1e600fb03 100644
--- a/xlators/mgmt/glusterd/src/glusterd-messages.h
+++ b/xlators/mgmt/glusterd/src/glusterd-messages.h
@@ -46,7 +46,7 @@ GLFS_MSGID(
GD_MSG_SNAP_STATUS_FAIL, GD_MSG_SNAP_INIT_FAIL, GD_MSG_VOLINFO_SET_FAIL,
GD_MSG_VOLINFO_GET_FAIL, GD_MSG_BRICK_CREATION_FAIL,
GD_MSG_BRICK_GET_INFO_FAIL, GD_MSG_BRICK_NEW_INFO_FAIL, GD_MSG_LVS_FAIL,
- GD_MSG_SETXATTR_FAIL, GD_MSG_UMOUNTING_SNAP_BRICK, GD_MSG_OP_UNSUPPORTED,
+ GD_MSG_SET_XATTR_FAIL, GD_MSG_UMOUNTING_SNAP_BRICK, GD_MSG_OP_UNSUPPORTED,
GD_MSG_SNAP_NOT_FOUND, GD_MSG_FS_LABEL_UPDATE_FAIL, GD_MSG_LVM_MOUNT_FAILED,
GD_MSG_DICT_SET_FAILED, GD_MSG_CANONICALIZE_FAIL, GD_MSG_DICT_GET_FAILED,
GD_MSG_SNAP_INFO_FAIL, GD_MSG_SNAP_VOL_CONFIG_FAIL,
@@ -78,7 +78,7 @@ GLFS_MSGID(
GD_MSG_COMMIT_OP_FAIL, GD_MSG_PEER_LIST_CREATE_FAIL, GD_MSG_BRICK_OP_FAIL,
GD_MSG_OPINFO_SET_FAIL, GD_MSG_OP_EVENT_UNLOCK_FAIL,
GD_MSG_MGMTV3_OP_RESP_FAIL, GD_MSG_PEER_NOT_FOUND, GD_MSG_REQ_DECODE_FAIL,
- GD_MSG_DICT_SERL_LENGTH_GET_FAIL, GD_MSG_ALREADY_STOPPED,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, GD_MSG_ALREADY_STOPPED,
GD_MSG_PRE_VALD_RESP_FAIL, GD_MSG_SVC_GET_FAIL, GD_MSG_VOLFILE_NOT_FOUND,
GD_MSG_OP_EVENT_LOCK_FAIL, GD_MSG_NON_STRIPE_VOL, GD_MSG_SNAPD_OBJ_GET_FAIL,
GD_MSG_QUOTA_DISABLED, GD_MSG_CACHE_MINMAX_SIZE_INVALID,
@@ -116,7 +116,7 @@ GLFS_MSGID(
GD_MSG_PARSE_BRICKINFO_FAIL, GD_MSG_VERS_STORE_FAIL, GD_MSG_HEADER_ADD_FAIL,
GD_MSG_QUOTA_CONF_WRITE_FAIL, GD_MSG_QUOTA_CONF_CORRUPT, GD_MSG_FORK_FAIL,
GD_MSG_CKSUM_COMPUTE_FAIL, GD_MSG_VERS_CKSUM_STORE_FAIL,
- GD_MSG_GETXATTR_FAIL, GD_MSG_CONVERSION_FAILED, GD_MSG_VOL_NOT_DISTRIBUTE,
+ GD_MSG_GET_XATTR_FAIL, GD_MSG_CONVERSION_FAILED, GD_MSG_VOL_NOT_DISTRIBUTE,
GD_MSG_VOL_STOPPED, GD_MSG_OPCTX_GET_FAIL, GD_MSG_TASKID_GEN_FAIL,
GD_MSG_REBALANCE_ID_MISSING, GD_MSG_NO_REBALANCE_PFX_IN_VOLNAME,
GD_MSG_DEFRAG_STATUS_UPDATE_FAIL, GD_MSG_UUID_GEN_STORE_FAIL,
@@ -302,6 +302,150 @@ GLFS_MSGID(
GD_MSG_SHD_OBJ_GET_FAIL, GD_MSG_SVC_ATTACH_FAIL, GD_MSG_ATTACH_INFO,
GD_MSG_DETACH_INFO, GD_MSG_SVC_DETACH_FAIL,
GD_MSG_RPC_TRANSPORT_GET_PEERNAME_FAIL, GD_MSG_CLUSTER_RC_ENABLE,
- GD_MSG_NFS_GANESHA_DISABLED, GD_MSG_GANESHA_NOT_RUNNING, GD_MSG_SNAP_WARN);
+ GD_MSG_NFS_GANESHA_DISABLED, GD_MSG_GANESHA_NOT_RUNNING, GD_MSG_SNAP_WARN,
+ GD_MSG_BRICK_SUBVOL_VERIFY_FAIL, GD_MSG_REMOVE_ARBITER_BRICK,
+ GD_MSG_BRICK_NOT_DECOM, GD_MSG_BRICK_STOPPED, GD_MSG_BRICK_DEAD,
+ GD_MSG_BRICK_HOST_NOT_FOUND, GD_MSG_BRICK_HOST_DOWN, GD_MSG_BRICK_DELETE,
+ GD_MSG_BRICK_NO_REMOVE_CMD, GD_MSG_MIGRATION_PROG, GD_MSG_MIGRATION_FAIL,
+ GD_MSG_COPY_FAIL, GD_MSG_REALPATH_GET_FAIL,
+ GD_MSG_ARBITER_BRICK_SET_INFO_FAIL, GD_MSG_STRCHR_FAIL, GD_MSG_SPLIT_FAIL,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, GD_MSG_VOL_SHD_NOT_COMP,
+ GD_MSG_BITROT_NOT_ENABLED, GD_MSG_CREATE_BRICK_DIR_FAILED,
+ GD_MSG_CREATE_GLUSTER_DIR_FAILED, GD_MSG_BRICK_CREATE_MNTPNT,
+ GD_MSG_BRICK_CREATE_ROOT, GD_MSG_SET_XATTR_BRICK_FAIL,
+ GD_MSG_REMOVE_XATTR_FAIL, GD_MSG_XLATOR_NOT_DEFINED,
+ GD_MSG_BRICK_NOT_RUNNING, GD_MSG_INCORRECT_BRICK, GD_MSG_UUID_GET_FAIL,
+ GD_MSG_INVALID_ARGUMENT, GD_MSG_FRAME_CREATE_FAIL,
+ GD_MSG_SNAPSHOT_NOT_THIN_PROVISIONED, GD_MSG_VOL_STOP_ARGS_GET_FAILED,
+ GD_MSG_LSTAT_FAIL, GD_MSG_VOLUME_NOT_IMPORTED,
+ GD_MSG_ADD_BRICK_MNT_INFO_FAIL, GD_MSG_GET_MNT_ENTRY_INFO_FAIL,
+ GD_MSG_QUORUM_CLUSTER_COUNT_GET_FAIL, GD_MSG_POST_COMMIT_OP_FAIL,
+ GD_MSG_POST_COMMIT_FROM_UUID_REJCT, GD_MSG_POST_COMMIT_REQ_SEND_FAIL);
+
+#define GD_MSG_INVALID_ENTRY_STR "Invalid data entry"
+#define GD_MSG_INVALID_ARGUMENT_STR \
+ "Invalid arguments have been given to function"
+#define GD_MSG_GARBAGE_ARGS_STR "Garbage args received"
+#define GD_MSG_BRICK_SUBVOL_VERIFY_FAIL_STR "Brick's subvol verification fail"
+#define GD_MSG_REMOVE_ARBITER_BRICK_STR "Failed to remove arbiter bricks"
+#define GD_MSG_DICT_GET_FAILED_STR "Dict get failed"
+#define GD_MSG_DICT_SET_FAILED_STR "Dict set failed"
+#define GD_MSG_BRICK_NOT_FOUND_STR "Brick not found in volume"
+#define GD_MSG_BRICK_NOT_DECOM_STR "Brick is not decommissoned"
+#define GD_MSG_BRICK_STOPPED_STR "Found stopped brick"
+#define GD_MSG_BRICK_DEAD_STR "Found dead brick"
+#define GD_MSG_BRICK_HOST_NOT_FOUND_STR \
+ "Host node of the brick is not a part of cluster"
+#define GD_MSG_BRICK_HOST_DOWN_STR "Host node of the brick is down"
+#define GD_MSG_BRICK_DELETE_STR \
+ "Deleting all the bricks of the volume is not allowed"
+#define GD_MSG_BRICK_NO_REMOVE_CMD_STR "No remove-brick command issued"
+#define GD_MSG_INCORRECT_BRICK_STR "Incorrect brick for volume"
+#define GD_MSG_MIGRATION_PROG_STR "Migration is in progress"
+#define GD_MSG_MIGRATION_FAIL_STR "Migration has failed"
+#define GD_MSG_XLATOR_NOT_DEFINED_STR "Xlator not defined"
+#define GD_MSG_DICT_CREATE_FAIL_STR "Failed to create dictionary"
+#define GD_MSG_COPY_FAIL_STR "Failed to copy"
+#define GD_MSG_UUID_GET_FAIL_STR "Failed to get the uuid of local glusterd"
+#define GD_MSG_GEO_REP_START_FAILED_STR "Georep start failed for volume"
+#define GD_MSG_REALPATH_GET_FAIL_STR "Failed to get realpath"
+#define GD_MSG_FILE_NOT_FOUND_STR "File not found in directory"
+#define GD_MSG_SRC_FILE_ERROR_STR "Error in source file"
+#define GD_MSG_DICT_UNSERIALIZE_FAIL_STR "Failed to unserialize dict"
+#define GD_MSG_VOL_ID_SET_FAIL_STR "Failed to set volume id"
+#define GD_MSG_ARBITER_BRICK_SET_INFO_FAIL_STR \
+ "Failed to add arbiter info to brick"
+#define GD_MSG_NO_MEMORY_STR "Out of memory"
+#define GD_MSG_GLUSTERD_UMOUNT_FAIL_STR "Failed to unmount path"
+#define GD_MSG_PEER_ADD_FAIL_STR "Failed to add new peer"
+#define GD_MSG_BRICK_GET_INFO_FAIL_STR "Failed to get brick info"
+#define GD_MSG_STRCHR_FAIL_STR "Failed to get the character"
+#define GD_MSG_SPLIT_FAIL_STR "Failed to split"
+#define GD_MSG_VOLINFO_GET_FAIL_STR "Failed to get volinfo"
+#define GD_MSG_PEER_NOT_FOUND_STR "Failed to find peer info"
+#define GD_MSG_DICT_COPY_FAIL_STR "Failed to copy values from dictionary"
+#define GD_MSG_ALLOC_AND_COPY_UUID_FAIL_STR \
+ "Failed to allocate memory or copy uuid"
+#define GD_MSG_VOL_NOT_FOUND_STR "Volume not found"
+#define GD_MSG_PEER_DISCONNECTED_STR "Peer is disconnected"
+#define GD_MSG_QUOTA_GET_STAT_FAIL_STR "Failed to get quota status"
+#define GD_MSG_SNAP_STATUS_FAIL_STR "Failed to get status of snapd"
+#define GD_MSG_VALIDATE_FAILED_STR "Failed to validate volume"
+#define GD_MSG_VOL_NOT_STARTED_STR "Volume is not started"
+#define GD_MSG_VOL_SHD_NOT_COMP_STR "Volume is not Self-heal compatible"
+#define GD_MSG_SELF_HEALD_DISABLED_STR "Self-heal daemon is disabled"
+#define GD_MSG_NFS_GANESHA_DISABLED_STR "NFS server is disabled"
+#define GD_MSG_QUOTA_DISABLED_STR "Quota is disabled"
+#define GD_MSG_BITROT_NOT_RUNNING_STR "Bitrot is not enabled"
+#define GD_MSG_BITROT_NOT_ENABLED_STR "Volume does not have bitrot enabled"
+#define GD_MSG_SNAPD_NOT_RUNNING_STR "Snapd is not enabled"
+#define GD_MSG_STRDUP_FAILED_STR "Strdup operation failed"
+#define GD_MSG_QUORUM_CLUSTER_COUNT_GET_FAIL_STR \
+ "Failed to get quorum cluster counts"
+#define GD_MSG_GLUSTER_SERVICE_START_FAIL_STR "Failed to start glusterd service"
+#define GD_MSG_PEER_ADDRESS_GET_FAIL_STR "Failed to get the address of peer"
+#define GD_MSG_INVALID_SLAVE_STR "Volume is not a slave volume"
+#define GD_MSG_BRICK_NOT_RUNNING_STR "One or more bricks are not running"
+#define GD_MSG_BRK_MNTPATH_GET_FAIL_STR "Failed to get brick mount device"
+#define GD_MSG_SNAPSHOT_NOT_THIN_PROVISIONED_STR \
+ "Snapshot is supported only for thin provisioned LV."
+#define GD_MSG_SNAP_DEVICE_NAME_GET_FAIL_STR \
+ "Failed to copy snapshot device name"
+#define GD_MSG_SNAP_NOT_FOUND_STR "Snapshot does not exist"
+#define GD_MSG_CREATE_BRICK_DIR_FAILED_STR "Failed to create brick directory"
+#define GD_MSG_LSTAT_FAIL_STR "Lstat operation failed"
+#define GD_MSG_DIR_OP_FAILED_STR \
+ "The provided path is already present. It is not a directory"
+#define GD_MSG_BRICK_CREATION_FAIL_STR \
+ "Brick isn't allowed to be created inside glusterd's working directory."
+#define GD_MSG_BRICK_CREATE_ROOT_STR \
+ "The brick is being created in the root partition. It is recommended " \
+ "that you don't use the system's root partition for storage backend."
+#define GD_MSG_BRICK_CREATE_MNTPNT_STR \
+ "The brick is a mount point. Please create a sub-directory under the " \
+ "mount point and use that as the brick directory."
+#define GD_MSG_CREATE_GLUSTER_DIR_FAILED_STR \
+ "Failed to create glusterfs directory"
+#define GD_MSG_VOLINFO_IMPORT_FAIL_STR "Volume is not yet imported"
+#define GD_MSG_BRICK_SET_INFO_FAIL_STR \
+ "Failed to add brick mount details to dict"
+#define GD_MSG_SET_XATTR_BRICK_FAIL_STR \
+ "Glusterfs is not supported on brick. Setting extended attribute failed"
+#define GD_MSG_SET_XATTR_FAIL_STR "Failed to set extended attribute"
+#define GD_MSG_REMOVE_XATTR_FAIL_STR "Failed to remove extended attribute"
+#define GD_MSG_XLATOR_SET_OPT_FAIL_STR "Failed to set xlator type"
+#define GD_MSG_XLATOR_LINK_FAIL_STR \
+ "Failed to do the link of xlator with children"
+#define GD_MSG_READ_ERROR_STR "Failed to read directory"
+#define GD_MSG_INCOMPATIBLE_VALUE_STR "Incompatible transport type"
+#define GD_MSG_VOL_STOP_ARGS_GET_FAILED_STR "Failed to get volume stop args"
+#define GD_MSG_FRAME_CREATE_FAIL_STR "Failed to create frame"
+#define GD_MSG_VOLUME_NOT_IMPORTED_STR "Volume has not been imported"
+#define GD_MSG_ADD_BRICK_MNT_INFO_FAIL_STR \
+ "Failed to add brick mount details to dict"
+#define GD_MSG_GET_MNT_ENTRY_INFO_FAIL_STR "Failed to get mount entry details"
+#define GD_MSG_BRICKPATH_ROOT_GET_FAIL_STR "failed to get brick root details"
+#define GD_MSG_VOL_INFO_REQ_RECVD_STR "Received get volume info req"
+#define GD_MSG_NO_FLAG_SET_STR "No flags set"
+#define GD_MSG_CREATE_DIR_FAILED_STR "Failed to create directory"
+#define GD_MSG_POST_HOOK_STUB_INIT_FAIL_STR \
+ "Failed to initialize post hooks stub"
+#define GD_MSG_FILE_OP_FAILED_STR "File operation failed"
+#define GD_MSG_INODE_SIZE_GET_FAIL_STR "Failed to get inode size"
+#define GD_MSG_CMD_EXEC_FAIL_STR "Command execution failed"
+#define GD_MSG_XLATOR_CREATE_FAIL_STR "Failed to create xlator"
+#define GD_MSG_CLRCLK_VOL_REQ_RCVD_STR "Received clear-locks request for volume"
+#define GD_MSG_BRK_PORT_NUM_GET_FAIL_STR \
+ "Couldn't get port number of local bricks"
+#define GD_MSG_CLRLOCKS_MOUNTDIR_CREATE_FAIL_STR \
+ "Creating mount directory for clear-locks failed"
+#define GD_MSG_CLRLOCKS_CLNT_MOUNT_FAIL_STR \
+ "Failed to mount clear-locks maintenance client"
+#define GD_MSG_CLRLOCKS_CLNT_UMOUNT_FAIL_STR \
+ "Failed to unmount clear-locks mount point"
+#define GD_MSG_CLRCLK_SND_CMD_FAIL_STR "Failed to send command for clear-locks"
+#define GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL_STR \
+ "Failed to allocate memory or get serialized length of dict"
+#define GD_MSG_GET_XATTR_FAIL_STR "Failed to get extended attribute"
#endif /* !_GLUSTERD_MESSAGES_H_ */
diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c
index ef8b4c38571..1069688a89d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c
@@ -165,6 +165,7 @@ glusterd_handle_mgmt_v3_lock_fn(rpcsvc_request_t *req)
ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_op_lock_ctx_t);
if (!ctx) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
ret = -1;
goto out;
}
@@ -174,6 +175,7 @@ glusterd_handle_mgmt_v3_lock_fn(rpcsvc_request_t *req)
ctx->dict = dict_new();
if (!ctx->dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -181,8 +183,8 @@ glusterd_handle_mgmt_v3_lock_fn(rpcsvc_request_t *req)
ret = dict_unserialize(lock_req.dict.dict_val, lock_req.dict.dict_len,
&ctx->dict);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
- "failed to unserialize the dictionary");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
goto out;
}
@@ -264,8 +266,8 @@ glusterd_mgmt_v3_pre_validate_send_resp(rpcsvc_request_t *req, int32_t op,
ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
&rsp.dict.dict_len);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "failed to get serialized length of dict");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
}
@@ -315,20 +317,21 @@ glusterd_handle_pre_validate_fn(rpcsvc_request_t *req)
}
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
- "failed to unserialize the dictionary");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
goto out;
}
rsp_dict = dict_new();
if (!rsp_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
- "Failed to get new dictionary");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
return -1;
}
@@ -391,8 +394,8 @@ glusterd_mgmt_v3_brick_op_send_resp(rpcsvc_request_t *req, int32_t op,
ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
&rsp.dict.dict_len);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "failed to get serialized length of dict");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
}
@@ -441,20 +444,21 @@ glusterd_handle_brick_op_fn(rpcsvc_request_t *req)
}
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
- "failed to unserialize the dictionary");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
goto out;
}
rsp_dict = dict_new();
if (!rsp_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
- "Failed to get new dictionary");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
return -1;
}
@@ -518,8 +522,8 @@ glusterd_mgmt_v3_commit_send_resp(rpcsvc_request_t *req, int32_t op,
ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
&rsp.dict.dict_len);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "failed to get serialized length of dict");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
}
@@ -569,20 +573,21 @@ glusterd_handle_commit_fn(rpcsvc_request_t *req)
}
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
- "failed to unserialize the dictionary");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
goto out;
}
rsp_dict = dict_new();
if (!rsp_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
- "Failed to get new dictionary");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
return -1;
}
@@ -621,6 +626,136 @@ out:
}
static int
+glusterd_mgmt_v3_post_commit_send_resp(rpcsvc_request_t *req, int32_t op,
+ int32_t status, char *op_errstr,
+ uint32_t op_errno, dict_t *rsp_dict)
+{
+ gd1_mgmt_v3_post_commit_rsp rsp = {
+ {0},
+ };
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ rsp.op_ret = status;
+ glusterd_get_uuid(&rsp.uuid);
+ rsp.op = op;
+ rsp.op_errno = op_errno;
+ if (op_errstr)
+ rsp.op_errstr = op_errstr;
+ else
+ rsp.op_errstr = "";
+
+ ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+
+ ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_rsp);
+
+ GF_FREE(rsp.dict.dict_val);
+out:
+ gf_msg_debug(this->name, 0, "Responded to post commit, ret: %d", ret);
+ return ret;
+}
+
+static int
+glusterd_handle_post_commit_fn(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_post_commit_req op_req = {
+ {0},
+ };
+ xlator_t *this = NULL;
+ char *op_errstr = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+ uint32_t op_errno = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ ret = xdr_to_generic(req->msg[0], &op_req,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_req);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode post commit "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (glusterd_peerinfo_find_by_uuid(op_req.uuid) == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND,
+ "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa(op_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+
+ ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
+ goto out;
+ }
+
+ rsp_dict = dict_new();
+ if (!rsp_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
+ return -1;
+ }
+
+ ret = gd_mgmt_v3_post_commit_fn(op_req.op, dict, &op_errstr, &op_errno,
+ rsp_dict);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "post commit failed on operation %s", gd_op_list[op_req.op]);
+ }
+
+ ret = glusterd_mgmt_v3_post_commit_send_resp(req, op_req.op, ret, op_errstr,
+ op_errno, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_OP_RESP_FAIL,
+ "Failed to send post commit "
+ "response for operation %s",
+ gd_op_list[op_req.op]);
+ goto out;
+ }
+
+out:
+ if (op_errstr && (strcmp(op_errstr, "")))
+ GF_FREE(op_errstr);
+
+ free(op_req.dict.dict_val);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+
+ /* Return 0 from handler to avoid double deletion of req obj */
+ return 0;
+}
+
+static int
glusterd_mgmt_v3_post_validate_send_resp(rpcsvc_request_t *req, int32_t op,
int32_t status, char *op_errstr,
dict_t *rsp_dict)
@@ -646,8 +781,8 @@ glusterd_mgmt_v3_post_validate_send_resp(rpcsvc_request_t *req, int32_t op,
ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
&rsp.dict.dict_len);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "failed to get serialized length of dict");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
}
@@ -696,20 +831,21 @@ glusterd_handle_post_validate_fn(rpcsvc_request_t *req)
}
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
- "failed to unserialize the dictionary");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
goto out;
}
rsp_dict = dict_new();
if (!rsp_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
- "Failed to get new dictionary");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
return -1;
}
@@ -867,6 +1003,7 @@ glusterd_handle_mgmt_v3_unlock_fn(rpcsvc_request_t *req)
ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_op_lock_ctx_t);
if (!ctx) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL);
ret = -1;
goto out;
}
@@ -876,6 +1013,7 @@ glusterd_handle_mgmt_v3_unlock_fn(rpcsvc_request_t *req)
ctx->dict = dict_new();
if (!ctx->dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -883,8 +1021,8 @@ glusterd_handle_mgmt_v3_unlock_fn(rpcsvc_request_t *req)
ret = dict_unserialize(lock_req.dict.dict_val, lock_req.dict.dict_len,
&ctx->dict);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
- "failed to unserialize the dictionary");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
goto out;
}
@@ -955,6 +1093,12 @@ glusterd_handle_commit(rpcsvc_request_t *req)
}
static int
+glusterd_handle_post_commit(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, glusterd_handle_post_commit_fn);
+}
+
+static int
glusterd_handle_post_validate(rpcsvc_request_t *req)
{
return glusterd_big_locked_handler(req, glusterd_handle_post_validate_fn);
@@ -978,6 +1122,9 @@ static rpcsvc_actor_t gd_svc_mgmt_v3_actors[GLUSTERD_MGMT_V3_MAXVALUE] = {
GLUSTERD_MGMT_V3_BRICK_OP, DRC_NA, 0},
[GLUSTERD_MGMT_V3_COMMIT] = {"COMMIT", glusterd_handle_commit, NULL,
GLUSTERD_MGMT_V3_COMMIT, DRC_NA, 0},
+ [GLUSTERD_MGMT_V3_POST_COMMIT] = {"POST_COMMIT",
+ glusterd_handle_post_commit, NULL,
+ GLUSTERD_MGMT_V3_POST_COMMIT, DRC_NA, 0},
[GLUSTERD_MGMT_V3_POST_VALIDATE] = {"POST_VAL",
glusterd_handle_post_validate, NULL,
GLUSTERD_MGMT_V3_POST_VALIDATE, DRC_NA,
diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
index bf9b5a870a0..bca7221062b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c
+++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
@@ -86,6 +86,11 @@ gd_mgmt_v3_collate_errors(struct syncargs *args, int op_ret, int op_errno,
peer_str, err_string);
break;
}
+ case GLUSTERD_MGMT_V3_POST_COMMIT: {
+ snprintf(op_err, sizeof(op_err), "Post commit failed on %s. %s",
+ peer_str, err_string);
+ break;
+ }
case GLUSTERD_MGMT_V3_POST_VALIDATE: {
snprintf(op_err, sizeof(op_err),
"Post Validation failed on %s. %s", peer_str,
@@ -187,6 +192,16 @@ gd_mgmt_v3_pre_validate_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
goto out;
}
break;
+ case GD_OP_REMOVE_BRICK:
+ ret = glusterd_op_stage_remove_brick(dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PRE_VALIDATION_FAIL,
+ "Remove brick prevalidation failed.");
+ goto out;
+ }
+ break;
+
case GD_OP_RESET_BRICK:
ret = glusterd_reset_brick_prevalidate(dict, op_errstr, rsp_dict);
if (ret) {
@@ -337,6 +352,15 @@ gd_mgmt_v3_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
}
break;
}
+ case GD_OP_REMOVE_BRICK: {
+ ret = glusterd_op_remove_brick(dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
+ "Remove-brick commit failed.");
+ goto out;
+ }
+ break;
+ }
case GD_OP_RESET_BRICK: {
ret = glusterd_op_reset_brick(dict, rsp_dict);
if (ret) {
@@ -386,6 +410,47 @@ out:
}
int32_t
+gd_mgmt_v3_post_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
+ uint32_t *op_errno, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+ GF_ASSERT(rsp_dict);
+
+ switch (op) {
+ case GD_OP_ADD_BRICK:
+ ret = glusterd_post_commit_add_brick(dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Add-brick post commit failed.");
+ goto out;
+ }
+ break;
+ case GD_OP_REPLACE_BRICK:
+ ret = glusterd_post_commit_replace_brick(dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Replace-brick post commit failed.");
+ goto out;
+ }
+ break;
+ default:
+ break;
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug(this->name, 0, "OP = %d. Returning %d", op, ret);
+ return ret;
+}
+
+int32_t
gd_mgmt_v3_post_validate_fn(glusterd_op_t op, int32_t op_ret, dict_t *dict,
char **op_errstr, dict_t *rsp_dict)
{
@@ -582,15 +647,21 @@ gd_mgmt_v3_lock(glusterd_op_t op, dict_t *op_ctx, glusterd_peerinfo_t *peerinfo,
ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
&req.dict.dict_len);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
gf_uuid_copy(req.uuid, my_uuid);
req.op = op;
GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
goto out;
+ }
ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid,
&gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_LOCK,
@@ -759,6 +830,7 @@ glusterd_pre_validate_aggr_rsp_dict(glusterd_op_t op, dict_t *aggr, dict_t *rsp)
goto out;
}
case GD_OP_STOP_VOLUME:
+ case GD_OP_REMOVE_BRICK:
case GD_OP_PROFILE_VOLUME:
case GD_OP_DEFRAG_BRICK_VOLUME:
case GD_OP_REBALANCE:
@@ -897,15 +969,21 @@ gd_mgmt_v3_pre_validate_req(glusterd_op_t op, dict_t *op_ctx,
ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
&req.dict.dict_len);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
gf_uuid_copy(req.uuid, my_uuid);
req.op = op;
GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
goto out;
+ }
ret = gd_syncop_submit_request(
peerinfo->rpc, &req, args, peerid, &gd_mgmt_v3_prog,
@@ -948,7 +1026,7 @@ glusterd_mgmt_v3_pre_validate(glusterd_op_t op, dict_t *req_dict,
}
if (op == GD_OP_PROFILE_VOLUME || op == GD_OP_STOP_VOLUME ||
- op == GD_OP_REBALANCE) {
+ op == GD_OP_REBALANCE || op == GD_OP_REMOVE_BRICK) {
ret = glusterd_validate_quorum(this, op, req_dict, op_errstr);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SERVER_QUORUM_NOT_MET,
@@ -1076,6 +1154,7 @@ glusterd_mgmt_v3_build_payload(dict_t **req, char **op_errstr, dict_t *dict,
case GD_OP_START_VOLUME:
case GD_OP_STOP_VOLUME:
case GD_OP_ADD_BRICK:
+ case GD_OP_REMOVE_BRICK:
case GD_OP_DEFRAG_BRICK_VOLUME:
case GD_OP_REPLACE_BRICK:
case GD_OP_RESET_BRICK:
@@ -1258,15 +1337,21 @@ gd_mgmt_v3_brick_op_req(glusterd_op_t op, dict_t *op_ctx,
ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
&req.dict.dict_len);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
gf_uuid_copy(req.uuid, my_uuid);
req.op = op;
GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
goto out;
+ }
ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid,
&gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_BRICK_OP,
@@ -1515,15 +1600,21 @@ gd_mgmt_v3_commit_req(glusterd_op_t op, dict_t *op_ctx,
ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
&req.dict.dict_len);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
gf_uuid_copy(req.uuid, my_uuid);
req.op = op;
GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
goto out;
+ }
ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid,
&gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_COMMIT,
@@ -1559,12 +1650,25 @@ glusterd_mgmt_v3_commit(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
GF_ASSERT(op_errstr);
GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
- if (op == GD_OP_REBALANCE || op == GD_OP_DEFRAG_BRICK_VOLUME) {
- ret = glusterd_set_rebalance_id_in_rsp_dict(req_dict, op_ctx);
- if (ret) {
- gf_log(this->name, GF_LOG_WARNING,
- "Failed to set rebalance id in dict.");
- }
+ switch (op) {
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+
+ ret = glusterd_set_rebalance_id_in_rsp_dict(req_dict, op_ctx);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to set rebalance id in dict.");
+ }
+ break;
+ case GD_OP_REMOVE_BRICK:
+ ret = glusterd_set_rebalance_id_for_remove_brick(req_dict, op_ctx);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to set rebalance id for remove-brick in dict.");
+ }
+ break;
+ default:
+ break;
}
rsp_dict = dict_new();
if (!rsp_dict) {
@@ -1662,6 +1766,274 @@ out:
}
int32_t
+gd_mgmt_v3_post_commit_cbk_fn(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int32_t ret = -1;
+ struct syncargs *args = NULL;
+ gd1_mgmt_v3_post_commit_rsp rsp = {
+ {0},
+ };
+ call_frame_t *frame = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ dict_t *rsp_dict = NULL;
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+ GF_ASSERT(myframe);
+
+ frame = myframe;
+ args = frame->local;
+ peerid = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL);
+
+ ret = xdr_to_generic(*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_rsp);
+ if (ret < 0)
+ goto out;
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ rsp_dict = dict_new();
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict);
+ if (ret < 0) {
+ free(rsp.dict.dict_val);
+ goto out;
+ } else {
+ rsp_dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ }
+
+ gf_uuid_copy(args->uuid, rsp.uuid);
+ pthread_mutex_lock(&args->lock_dict);
+ {
+ ret = glusterd_syncop_aggr_rsp_dict(rsp.op, args->dict, rsp_dict);
+ }
+ pthread_mutex_unlock(&args->lock_dict);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ if (!rsp.op_ret)
+ op_ret = ret;
+ else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+ } else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+
+out:
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+
+ gd_mgmt_v3_collate_errors(args, op_ret, op_errno, rsp.op_errstr,
+ GLUSTERD_MGMT_V3_POST_COMMIT, *peerid, rsp.uuid);
+ GF_FREE(peerid);
+
+ if (rsp.op_errstr)
+ free(rsp.op_errstr);
+
+ /* req->rpc_status set to -1 means, STACK_DESTROY will be called from
+ * the caller function.
+ */
+ if (req->rpc_status != -1)
+ STACK_DESTROY(frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_mgmt_v3_post_commit_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ gd_mgmt_v3_post_commit_cbk_fn);
+}
+
+int
+gd_mgmt_v3_post_commit_req(glusterd_op_t op, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo, struct syncargs *args,
+ uuid_t my_uuid, uuid_t recv_uuid)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_post_commit_req req = {
+ {0},
+ };
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(op_ctx);
+ GF_ASSERT(peerinfo);
+ GF_ASSERT(args);
+
+ ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+
+ gf_uuid_copy(req.uuid, my_uuid);
+ req.op = op;
+
+ GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
+ goto out;
+ }
+
+ ret = gd_syncop_submit_request(
+ peerinfo->rpc, &req, args, peerid, &gd_mgmt_v3_prog,
+ GLUSTERD_MGMT_V3_POST_COMMIT, gd_mgmt_v3_post_commit_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_req);
+out:
+ GF_FREE(req.dict.dict_val);
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_post_commit(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+ char **op_errstr, uint32_t *op_errno,
+ uint32_t txn_generation)
+{
+ int32_t ret = -1;
+ int32_t peer_cnt = 0;
+ dict_t *rsp_dict = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ struct syncargs args = {0};
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ GF_ASSERT(op_ctx);
+ GF_ASSERT(req_dict);
+ GF_ASSERT(op_errstr);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ rsp_dict = dict_new();
+ if (!rsp_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Failed to create response dictionary");
+ goto out;
+ }
+
+ /* Post commit on local node */
+ ret = gd_mgmt_v3_post_commit_fn(op, req_dict, op_errstr, op_errno,
+ rsp_dict);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Post commit failed for "
+ "operation %s on local node",
+ gd_op_list[op]);
+
+ if (*op_errstr == NULL) {
+ ret = gf_asprintf(op_errstr,
+ "Post commit failed "
+ "on localhost. Please "
+ "check log file for details.");
+ if (ret == -1)
+ *op_errstr = NULL;
+
+ ret = -1;
+ }
+ goto out;
+ }
+
+ ret = glusterd_syncop_aggr_rsp_dict(op, op_ctx, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ goto out;
+ }
+
+ dict_unref(rsp_dict);
+ rsp_dict = NULL;
+
+ /* Sending post commit req to other nodes in the cluster */
+ gd_syncargs_init(&args, op_ctx);
+ ret = synctask_barrier_init((&args));
+ if (ret)
+ goto out;
+ peer_cnt = 0;
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
+ {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > txn_generation)
+ continue;
+ if (!peerinfo->connected)
+ continue;
+
+ if (op != GD_OP_SYNC_VOLUME &&
+ peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
+ continue;
+
+ gd_mgmt_v3_post_commit_req(op, req_dict, peerinfo, &args, MY_UUID,
+ peer_uuid);
+ peer_cnt++;
+ }
+ RCU_READ_UNLOCK;
+
+ if (0 == peer_cnt) {
+ ret = 0;
+ goto out;
+ }
+
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Post commit failed on peers");
+
+ if (args.errstr)
+ *op_errstr = gf_strdup(args.errstr);
+ }
+
+ ret = args.op_ret;
+ *op_errno = args.op_errno;
+
+ gf_msg_debug(this->name, 0,
+ "Sent post commit req for %s to %d "
+ "peers. Returning %d",
+ gd_op_list[op], peer_cnt, ret);
+out:
+ glusterd_op_modify_op_ctx(op, op_ctx);
+ return ret;
+}
+
+int32_t
gd_mgmt_v3_post_validate_cbk_fn(struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
@@ -1751,16 +2123,22 @@ gd_mgmt_v3_post_validate_req(glusterd_op_t op, int32_t op_ret, dict_t *op_ctx,
ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
&req.dict.dict_len);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
gf_uuid_copy(req.uuid, my_uuid);
req.op = op;
req.op_ret = op_ret;
GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
goto out;
+ }
ret = gd_syncop_submit_request(
peerinfo->rpc, &req, args, peerid, &gd_mgmt_v3_prog,
@@ -1967,15 +2345,21 @@ gd_mgmt_v3_unlock(glusterd_op_t op, dict_t *op_ctx,
ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
&req.dict.dict_len);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
gf_uuid_copy(req.uuid, my_uuid);
req.op = op;
GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
goto out;
+ }
ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid,
&gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_UNLOCK,
@@ -2338,6 +2722,15 @@ glusterd_mgmt_v3_initiate_all_phases(rpcsvc_request_t *req, glusterd_op_t op,
goto out;
}
+ /* POST COMMIT OP PHASE */
+ ret = glusterd_mgmt_v3_post_commit(op, dict, req_dict, &op_errstr,
+ &op_errno, txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Post commit Op Failed");
+ goto out;
+ }
+
/* POST-COMMIT VALIDATE PHASE */
/* As of now, post_validate is not trying to cleanup any failed
commands. So as of now, I am sending 0 (op_ret as 0).
diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-mgmt.h
index 71f793d0397..27dd1849519 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.h
+++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.h
@@ -28,6 +28,10 @@ gd_mgmt_v3_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
uint32_t *op_errno, dict_t *rsp_dict);
int32_t
+gd_mgmt_v3_post_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
+ uint32_t *op_errno, dict_t *rsp_dict);
+
+int32_t
gd_mgmt_v3_post_validate_fn(glusterd_op_t op, int32_t op_ret, dict_t *dict,
char **op_errstr, dict_t *rsp_dict);
@@ -84,4 +88,10 @@ glusterd_reset_brick_prevalidate(dict_t *dict, char **op_errstr,
dict_t *rsp_dict);
int
glusterd_op_reset_brick(dict_t *dict, dict_t *rsp_dict);
+
+int
+glusterd_post_commit_add_brick(dict_t *dict, char **op_errstr);
+
+int
+glusterd_post_commit_replace_brick(dict_t *dict, char **op_errstr);
#endif /* _GLUSTERD_MGMT_H_ */
diff --git a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c
index 9c4b2fb18cc..645d845ee76 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c
+++ b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c
@@ -81,6 +81,7 @@ parse_mount_pattern_desc(gf_mount_spec_t *mspec, char *pdesc)
mspec->patterns = GF_CALLOC(mspec->len, sizeof(*mspec->patterns),
gf_gld_mt_mount_pattern);
if (!mspec->patterns) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
ret = -1;
goto out;
}
@@ -261,8 +262,11 @@ make_georep_mountspec(gf_mount_spec_t *mspec, const char *volnames, char *user,
int ret = 0;
vols = gf_strdup((char *)volnames);
- if (!vols)
+ if (!vols) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "Volume name=%s", volnames, NULL);
goto out;
+ }
for (vc = 1, p = vols; *p; p++) {
if (*p == ',')
@@ -270,8 +274,10 @@ make_georep_mountspec(gf_mount_spec_t *mspec, const char *volnames, char *user,
}
siz = strlen(volnames) + vc * SLEN("volfile-id=");
meetspec = GF_CALLOC(1, siz + 1, gf_gld_mt_georep_meet_spec);
- if (!meetspec)
+ if (!meetspec) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
+ }
for (p = vols;;) {
vol = strtok_r(p, ",", &savetok);
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index 14915b3fc17..c537fc33a85 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -106,6 +106,7 @@ glusterd_txn_opinfo_dict_init()
priv->glusterd_txn_opinfo = dict_new();
if (!priv->glusterd_txn_opinfo) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -178,8 +179,10 @@ glusterd_generate_txn_id(dict_t *dict, uuid_t **txn_id)
GF_ASSERT(dict);
*txn_id = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
- if (!*txn_id)
+ if (!*txn_id) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
+ }
if (priv->op_version < GD_OP_VERSION_3_6_0)
gf_uuid_copy(**txn_id, priv->global_txn_id);
@@ -541,8 +544,11 @@ glusterd_brick_op_build_payload(glusterd_op_t op,
case GD_OP_STOP_VOLUME:
brick_req = GF_CALLOC(1, sizeof(*brick_req),
gf_gld_mt_mop_brick_req_t);
- if (!brick_req)
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
+ }
brick_req->op = GLUSTERD_BRICK_TERMINATE;
brick_req->name = brickinfo->path;
glusterd_set_brick_status(brickinfo, GF_BRICK_STOPPING);
@@ -551,8 +557,11 @@ glusterd_brick_op_build_payload(glusterd_op_t op,
brick_req = GF_CALLOC(1, sizeof(*brick_req),
gf_gld_mt_mop_brick_req_t);
- if (!brick_req)
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
+ }
brick_req->op = GLUSTERD_BRICK_XLATOR_INFO;
brick_req->name = brickinfo->path;
@@ -561,45 +570,69 @@ glusterd_brick_op_build_payload(glusterd_op_t op,
case GD_OP_HEAL_VOLUME: {
brick_req = GF_CALLOC(1, sizeof(*brick_req),
gf_gld_mt_mop_brick_req_t);
- if (!brick_req)
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
+ }
brick_req->op = GLUSTERD_BRICK_XLATOR_OP;
brick_req->name = "";
ret = dict_get_int32n(dict, "heal-op", SLEN("heal-op"),
(int32_t *)&heal_op);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=heal-op", NULL);
goto out;
+ }
ret = dict_set_int32n(dict, "xl-op", SLEN("xl-op"), heal_op);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=xl-op", NULL);
goto out;
+ }
} break;
case GD_OP_STATUS_VOLUME: {
brick_req = GF_CALLOC(1, sizeof(*brick_req),
gf_gld_mt_mop_brick_req_t);
- if (!brick_req)
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
+ }
brick_req->op = GLUSTERD_BRICK_STATUS;
brick_req->name = "";
ret = dict_set_strn(dict, "brick-name", SLEN("brick-name"),
brickinfo->path);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=brick-name", NULL);
goto out;
+ }
} break;
case GD_OP_REBALANCE:
case GD_OP_DEFRAG_BRICK_VOLUME:
brick_req = GF_CALLOC(1, sizeof(*brick_req),
gf_gld_mt_mop_brick_req_t);
- if (!brick_req)
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
+ }
brick_req->op = GLUSTERD_BRICK_XLATOR_DEFRAG;
ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=volname", NULL);
goto out;
+ }
ret = glusterd_volinfo_find(volname, &volinfo);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_VOLINFO_GET_FAIL, "Volume=%s", volname, NULL);
goto out;
+ }
snprintf(name, sizeof(name), "%s-dht", volname);
brick_req->name = gf_strdup(name);
@@ -608,8 +641,11 @@ glusterd_brick_op_build_payload(glusterd_op_t op,
case GD_OP_BARRIER:
brick_req = GF_CALLOC(1, sizeof(*brick_req),
gf_gld_mt_mop_brick_req_t);
- if (!brick_req)
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
+ }
brick_req->op = GLUSTERD_BRICK_BARRIER;
brick_req->name = brickinfo->path;
break;
@@ -623,8 +659,11 @@ glusterd_brick_op_build_payload(glusterd_op_t op,
brick_req->dict.dict_val = NULL;
ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val,
&brick_req->input.input_len);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
*req = brick_req;
ret = 0;
@@ -646,13 +685,19 @@ glusterd_node_op_build_payload(glusterd_op_t op, gd1_mgmt_brick_op_req **req,
GF_ASSERT(op < GD_OP_MAX);
GF_ASSERT(op > GD_OP_NONE);
GF_ASSERT(req);
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
switch (op) {
case GD_OP_PROFILE_VOLUME:
brick_req = GF_CALLOC(1, sizeof(*brick_req),
gf_gld_mt_mop_brick_req_t);
- if (!brick_req)
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
+ }
brick_req->op = GLUSTERD_NODE_PROFILE;
brick_req->name = "";
@@ -662,8 +707,11 @@ glusterd_node_op_build_payload(glusterd_op_t op, gd1_mgmt_brick_op_req **req,
case GD_OP_STATUS_VOLUME:
brick_req = GF_CALLOC(1, sizeof(*brick_req),
gf_gld_mt_mop_brick_req_t);
- if (!brick_req)
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
+ }
brick_req->op = GLUSTERD_NODE_STATUS;
brick_req->name = "";
@@ -674,14 +722,20 @@ glusterd_node_op_build_payload(glusterd_op_t op, gd1_mgmt_brick_op_req **req,
case GD_OP_SCRUB_ONDEMAND:
brick_req = GF_CALLOC(1, sizeof(*brick_req),
gf_gld_mt_mop_brick_req_t);
- if (!brick_req)
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
+ }
brick_req->op = GLUSTERD_NODE_BITROT;
ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=volname", NULL);
goto out;
+ }
brick_req->name = gf_strdup(volname);
break;
@@ -694,8 +748,11 @@ glusterd_node_op_build_payload(glusterd_op_t op, gd1_mgmt_brick_op_req **req,
ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val,
&brick_req->input.input_len);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
*req = brick_req;
ret = 0;
@@ -703,7 +760,7 @@ glusterd_node_op_build_payload(glusterd_op_t op, gd1_mgmt_brick_op_req **req,
out:
if (ret && brick_req)
GF_FREE(brick_req);
- gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
return ret;
}
@@ -719,12 +776,14 @@ glusterd_validate_quorum_options(xlator_t *this, char *fullkey, char *value,
goto out;
key = strchr(fullkey, '.');
if (key == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
ret = -1;
goto out;
}
key++;
opt = xlator_volume_option_get(this, key);
if (!opt) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL, NULL);
ret = -1;
goto out;
}
@@ -988,8 +1047,8 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
if (check_op_version) {
ret = dict_get_uint32(dict, "new-op-version", &new_op_version);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Failed to get new_op_version");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=new-op-version", NULL);
goto out;
}
@@ -1043,8 +1102,8 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
ret = dict_get_str_sizen(dict, "volname", &volname);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Unable to get volume name");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=volname", NULL);
goto out;
}
@@ -1069,14 +1128,19 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
}
val_dict = dict_new();
- if (!val_dict)
+ if (!val_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
for (count = 1; ret != 1; count++) {
keystr_len = sprintf(keystr, "key%d", count);
ret = dict_get_strn(dict, keystr, keystr_len, &key);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", keystr, NULL);
break;
+ }
keystr_len = sprintf(keystr, "value%d", count);
ret = dict_get_strn(dict, keystr, keystr_len, &value);
@@ -1603,12 +1667,17 @@ glusterd_op_stage_sync_volume(dict_t *dict, char **op_errstr)
0,
};
glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
ret = dict_get_strn(dict, "hostname", SLEN("hostname"), &hostname);
if (ret) {
snprintf(msg, sizeof(msg),
"hostname couldn't be "
"retrieved from msg");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=hostname", NULL);
*op_errstr = gf_strdup(msg);
goto out;
}
@@ -1623,6 +1692,8 @@ glusterd_op_stage_sync_volume(dict_t *dict, char **op_errstr)
"Volume %s "
"does not exist",
volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOL_NOT_FOUND,
+ "Volume=%s", volname, NULL);
*op_errstr = gf_strdup(msg);
goto out;
}
@@ -1635,6 +1706,8 @@ glusterd_op_stage_sync_volume(dict_t *dict, char **op_errstr)
RCU_READ_UNLOCK;
ret = -1;
snprintf(msg, sizeof(msg), "%s, is not a friend", hostname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_NOT_FOUND,
+ "Peer_name=%s", hostname, NULL);
*op_errstr = gf_strdup(msg);
goto out;
@@ -1645,6 +1718,8 @@ glusterd_op_stage_sync_volume(dict_t *dict, char **op_errstr)
"%s, is not connected at "
"the moment",
hostname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_DISCONNECTED,
+ "Peer_name=%s", hostname, NULL);
*op_errstr = gf_strdup(msg);
goto out;
}
@@ -1685,8 +1760,11 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr)
GF_ASSERT(priv);
ret = dict_get_uint32(dict, "cmd", &cmd);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=cmd", NULL);
goto out;
+ }
if (cmd & GF_CLI_STATUS_ALL)
goto out;
@@ -1697,6 +1775,8 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr)
"The cluster is operating at "
"version 1. Getting the status of quotad is not "
"allowed in this state.");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_QUOTA_GET_STAT_FAIL,
+ msg, NULL);
ret = -1;
goto out;
}
@@ -1708,6 +1788,8 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr)
"version less than %d. Getting the "
"status of snapd is not allowed in this state.",
GD_OP_VERSION_3_6_0);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SNAP_STATUS_FAIL, msg,
+ NULL);
ret = -1;
goto out;
}
@@ -1722,17 +1804,23 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr)
ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
snprintf(msg, sizeof(msg), FMTSTR_CHECK_VOL_EXISTS, volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL,
+ "Volume=%s", volname, NULL);
ret = -1;
goto out;
}
ret = glusterd_validate_volume_id(dict, volinfo);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VALIDATE_FAILED, NULL);
goto out;
+ }
ret = glusterd_is_volume_started(volinfo);
if (!ret) {
snprintf(msg, sizeof(msg), "Volume %s is not started", volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOL_NOT_STARTED,
+ "Volume=%s", volname, NULL);
ret = -1;
goto out;
}
@@ -1746,12 +1834,16 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr)
ret = -1;
snprintf(msg, sizeof(msg), "Volume %s is not Self-heal compatible",
volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOL_SHD_NOT_COMP,
+ "Volume=%s", volname, NULL);
goto out;
}
if (!shd_enabled) {
ret = -1;
snprintf(msg, sizeof(msg),
"Self-heal Daemon is disabled for volume %s", volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SELF_HEALD_DISABLED,
+ "Volume=%s", volname, NULL);
goto out;
}
#ifdef BUILD_GNFS
@@ -1762,6 +1854,8 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr)
ret = -1;
snprintf(msg, sizeof(msg), "NFS server is disabled for volume %s",
volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_NFS_GANESHA_DISABLED, "Volume=%s", volname, NULL);
goto out;
}
#endif
@@ -1772,6 +1866,8 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr)
"Volume %s does not have "
"quota enabled",
volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_QUOTA_DISABLED,
+ "Volume=%s", volname, NULL);
goto out;
}
} else if ((cmd & GF_CLI_STATUS_BITD) != 0) {
@@ -1781,6 +1877,8 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr)
"Volume %s does not have "
"bitrot enabled",
volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BITROT_NOT_ENABLED,
+ "Volume=%s", volname, NULL);
goto out;
}
} else if ((cmd & GF_CLI_STATUS_SCRUB) != 0) {
@@ -1791,6 +1889,10 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr)
"bitrot enabled. Scrubber will be enabled "
"automatically if bitrot is enabled",
volname);
+ gf_smsg(
+ this->name, GF_LOG_ERROR, errno, GD_MSG_BITROT_NOT_ENABLED,
+ "Scrubber will be enabled automatically if bitrot is enabled",
+ "Volume=%s", volname, NULL);
goto out;
}
} else if ((cmd & GF_CLI_STATUS_SNAPD) != 0) {
@@ -1800,12 +1902,17 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr)
"Volume %s does not have "
"uss enabled",
volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SNAPD_NOT_RUNNING,
+ "Volume=%s", volname, NULL);
goto out;
}
} else if ((cmd & GF_CLI_STATUS_BRICK) != 0) {
ret = dict_get_strn(dict, "brick", SLEN("brick"), &brick);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=brick", NULL);
goto out;
+ }
ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, &brickinfo,
_gf_false);
@@ -1814,6 +1921,8 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr)
"No brick %s in"
" volume %s",
brick, volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BRICK_NOT_FOUND,
+ "Brick=%s, Volume=%s", brick, volname, NULL);
ret = -1;
goto out;
}
@@ -2100,8 +2209,10 @@ glusterd_op_reset_all_volume_options(xlator_t *this, dict_t *dict)
ret = -1;
dup_opt = dict_new();
- if (!dup_opt)
+ if (!dup_opt) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
if (!all) {
dict_copy(conf->opts, dup_opt);
dict_del(dup_opt, key);
@@ -2112,8 +2223,11 @@ glusterd_op_reset_all_volume_options(xlator_t *this, dict_t *dict)
ret = dict_set_strn(dup_opt, GLUSTERD_GLOBAL_OPT_VERSION,
SLEN(GLUSTERD_GLOBAL_OPT_VERSION), next_version);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", GLUSTERD_GLOBAL_OPT_VERSION, NULL);
goto out;
+ }
ret = glusterd_store_options(this, dup_opt);
if (ret)
@@ -2124,9 +2238,11 @@ glusterd_op_reset_all_volume_options(xlator_t *this, dict_t *dict)
ret = dict_set_dynstrn(conf->opts, GLUSTERD_GLOBAL_OPT_VERSION,
SLEN(GLUSTERD_GLOBAL_OPT_VERSION), next_version);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", GLUSTERD_GLOBAL_OPT_VERSION, NULL);
goto out;
- else
+ } else
next_version = NULL;
if (!all) {
@@ -2410,8 +2526,11 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict,
conf = this->private;
ret = dict_get_strn(dict, "key1", SLEN("key1"), &key);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=key1", NULL);
goto out;
+ }
ret = dict_get_strn(dict, "value1", SLEN("value1"), &value);
if (ret) {
@@ -2530,12 +2649,17 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict,
}
ret = -1;
dup_opt = dict_new();
- if (!dup_opt)
+ if (!dup_opt) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
dict_copy(conf->opts, dup_opt);
ret = dict_set_str(dup_opt, key, value);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
ret = glusterd_get_next_global_opt_version_str(conf->opts, &next_version);
if (ret)
@@ -2543,8 +2667,11 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict,
ret = dict_set_strn(dup_opt, GLUSTERD_GLOBAL_OPT_VERSION,
SLEN(GLUSTERD_GLOBAL_OPT_VERSION), next_version);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", GLUSTERD_GLOBAL_OPT_VERSION, NULL);
goto out;
+ }
ret = glusterd_store_options(this, dup_opt);
if (ret)
@@ -2555,9 +2682,11 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict,
ret = dict_set_dynstrn(conf->opts, GLUSTERD_GLOBAL_OPT_VERSION,
SLEN(GLUSTERD_GLOBAL_OPT_VERSION), next_version);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", GLUSTERD_GLOBAL_OPT_VERSION, NULL);
goto out;
- else
+ } else
next_version = NULL;
dup_value = gf_strdup(value);
@@ -2565,9 +2694,11 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict,
goto out;
ret = dict_set_dynstr(conf->opts, key, dup_value);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
- else
+ } else
dup_value = NULL; /* Protect the allocation from GF_FREE */
out:
@@ -3002,6 +3133,8 @@ glusterd_op_sync_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
snprintf(msg, sizeof(msg),
"hostname couldn't be "
"retrieved from msg");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=hostname", NULL);
*op_errstr = gf_strdup(msg);
goto out;
}
@@ -3026,6 +3159,7 @@ glusterd_op_sync_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
if (!rsp_dict) {
// this should happen only on source
+ gf_smsg(this->name, GF_LOG_INFO, errno, GD_MSG_INVALID_ARGUMENT, NULL);
ret = 0;
goto out;
}
@@ -3582,27 +3716,30 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
}
ret = dict_set_int32n(rsp_dict, "type", SLEN("type"), volinfo->type);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=type", NULL);
goto out;
+ }
ret = dict_set_int32n(rsp_dict, "brick-index-max", SLEN("brick-index-max"),
brick_index);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
- "Error setting brick-index-max to dict");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Key=brick-index-max", NULL);
goto out;
}
ret = dict_set_int32n(rsp_dict, "other-count", SLEN("other-count"),
other_count);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
- "Error setting other-count to dict");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Key=other-count", NULL);
goto out;
}
ret = dict_set_int32n(rsp_dict, "count", SLEN("count"), node_count);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
- "Error setting node count to dict");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Key=count", NULL);
goto out;
}
@@ -4079,8 +4216,10 @@ glusterd_dict_set_volid(dict_t *dict, char *volname, char **op_errstr)
this = THIS;
GF_ASSERT(this);
- if (!dict || !volname)
+ if (!dict || !volname) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
@@ -5838,13 +5977,8 @@ glusterd_op_stage_validate(glusterd_op_t op, dict_t *dict, char **op_errstr,
static void
glusterd_wait_for_blockers(glusterd_conf_t *priv)
{
- uint64_t blockers = GF_ATOMIC_GET(priv->blockers);
-
- while (blockers) {
- synclock_unlock(&priv->big_lock);
- sleep(1);
- blockers = GF_ATOMIC_GET(priv->blockers);
- synclock_lock(&priv->big_lock);
+ while (GF_ATOMIC_GET(priv->blockers)) {
+ synccond_wait(&priv->cond_blockers, &priv->big_lock);
}
}
@@ -6479,6 +6613,10 @@ _select_hxlators_for_full_self_heal(xlator_t *this, glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo = NULL;
int hxl_children = 0;
uuid_t candidate = {0};
+ int brick_index = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ int delta = 0;
+ uuid_t candidate_max = {0};
if ((*index) == 0)
(*index)++;
@@ -6490,13 +6628,40 @@ _select_hxlators_for_full_self_heal(xlator_t *this, glusterd_volinfo_t *volinfo,
cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
{
+ if (gf_uuid_compare(brickinfo->uuid, candidate_max) > 0) {
+ if (!gf_uuid_compare(MY_UUID, brickinfo->uuid)) {
+ gf_uuid_copy(candidate_max, brickinfo->uuid);
+ } else {
+ peerinfo = glusterd_peerinfo_find(brickinfo->uuid, NULL);
+ if (peerinfo && peerinfo->connected) {
+ gf_uuid_copy(candidate_max, brickinfo->uuid);
+ }
+ }
+ }
+ }
+
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
if (gf_uuid_is_null(brickinfo->uuid))
(void)glusterd_resolve_brick(brickinfo);
- if (gf_uuid_compare(brickinfo->uuid, candidate) > 0)
- gf_uuid_copy(candidate, brickinfo->uuid);
+ delta %= hxl_children;
+ if ((*index + delta) == (brick_index + hxl_children)) {
+ if (!gf_uuid_compare(MY_UUID, brickinfo->uuid)) {
+ gf_uuid_copy(candidate, brickinfo->uuid);
+ } else {
+ peerinfo = glusterd_peerinfo_find(brickinfo->uuid, NULL);
+ if (peerinfo && peerinfo->connected) {
+ gf_uuid_copy(candidate, brickinfo->uuid);
+ } else if (peerinfo &&
+ (!gf_uuid_compare(candidate_max, MY_UUID))) {
+ _add_hxlator_to_dict(dict, volinfo,
+ ((*index) - 1) / hxl_children,
+ (*hxlator_count));
+ (*hxlator_count)++;
+ }
+ }
- if ((*index) % hxl_children == 0) {
if (!gf_uuid_compare(MY_UUID, candidate)) {
_add_hxlator_to_dict(dict, volinfo,
((*index) - 1) / hxl_children,
@@ -6504,6 +6669,8 @@ _select_hxlators_for_full_self_heal(xlator_t *this, glusterd_volinfo_t *volinfo,
(*hxlator_count)++;
}
gf_uuid_clear(candidate);
+ brick_index += hxl_children;
+ delta++;
}
(*index)++;
diff --git a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c
index 82acf5bf03c..18d355cb186 100644
--- a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c
@@ -367,8 +367,10 @@ glusterd_peerinfo_new(glusterd_friend_sm_state_t state, uuid_t *uuid,
GF_ASSERT(conf);
new_peer = GF_CALLOC(1, sizeof(*new_peer), gf_gld_mt_peerinfo_t);
- if (!new_peer)
+ if (!new_peer) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
+ }
CDS_INIT_LIST_HEAD(&new_peer->uuid_list);
@@ -564,12 +566,16 @@ glusterd_peer_hostname_new(const char *hostname,
GF_ASSERT(hostname);
GF_ASSERT(name);
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
peer_hostname = GF_CALLOC(1, sizeof(*peer_hostname),
gf_gld_mt_peer_hostname_t);
- if (!peer_hostname)
+ if (!peer_hostname) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
+ }
peer_hostname->hostname = gf_strdup(hostname);
CDS_INIT_LIST_HEAD(&peer_hostname->hostname_list);
@@ -900,8 +906,11 @@ gd_add_peer_hostnames_to_dict(glusterd_peerinfo_t *peerinfo, dict_t *dict,
{
snprintf(key, sizeof(key), "%s.hostname%d", prefix, count);
ret = dict_set_dynstr_with_alloc(dict, key, addr->hostname);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
count++;
}
@@ -923,41 +932,61 @@ gd_add_peer_detail_to_dict(glusterd_peerinfo_t *peerinfo, dict_t *friends,
int keylen;
char *peer_uuid_str = NULL;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
GF_ASSERT(peerinfo);
GF_ASSERT(friends);
peer_uuid_str = gd_peer_uuid_str(peerinfo);
keylen = snprintf(key, sizeof(key), "friend%d.uuid", count);
ret = dict_set_strn(friends, key, keylen, peer_uuid_str);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "friend%d.hostname", count);
ret = dict_set_strn(friends, key, keylen, peerinfo->hostname);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "friend%d.port", count);
ret = dict_set_int32n(friends, key, keylen, peerinfo->port);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "friend%d.stateId", count);
ret = dict_set_int32n(friends, key, keylen, peerinfo->state.state);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Key=%s in dict", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "friend%d.state", count);
ret = dict_set_strn(
friends, key, keylen,
glusterd_friend_sm_state_name_get(peerinfo->state.state));
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "key=%s",
+ key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "friend%d.connected", count);
ret = dict_set_int32n(friends, key, keylen, (int32_t)peerinfo->connected);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
goto out;
+ }
snprintf(key, sizeof(key), "friend%d", count);
ret = gd_add_peer_hostnames_to_dict(peerinfo, friends, key);
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c
index ec5bd1137f1..16ac628ab82 100644
--- a/xlators/mgmt/glusterd/src/glusterd-pmap.c
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c
@@ -433,17 +433,20 @@ __gluster_pmap_portbybrick(rpcsvc_request_t *req)
char *brick = NULL;
int port = 0;
int ret = -1;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
ret = xdr_to_generic(req->msg[0], &args,
(xdrproc_t)xdr_pmap_port_by_brick_req);
if (ret < 0) {
req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto fail;
}
brick = args.brick;
- port = pmap_registry_search(THIS, brick, GF_PMAP_PORT_BRICKSERVER,
+ port = pmap_registry_search(this, brick, GF_PMAP_PORT_BRICKSERVER,
_gf_false);
if (!port)
@@ -475,11 +478,14 @@ __gluster_pmap_brickbyport(rpcsvc_request_t *req)
0,
};
int ret = -1;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
ret = xdr_to_generic(req->msg[0], &args,
(xdrproc_t)xdr_pmap_brick_by_port_req);
if (ret < 0) {
req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto fail;
}
@@ -513,10 +519,13 @@ __gluster_pmap_signin(rpcsvc_request_t *req)
};
int ret = -1;
glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
ret = xdr_to_generic(req->msg[0], &args, (xdrproc_t)xdr_pmap_signin_req);
if (ret < 0) {
req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto fail;
}
@@ -570,6 +579,7 @@ __gluster_pmap_signout(rpcsvc_request_t *req)
if (ret < 0) {
// failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto fail;
}
rsp.op_ret = pmap_registry_remove(THIS, args.port, args.brick,
diff --git a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
index d96adcae89e..a05c90d7b10 100644
--- a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
+++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
@@ -114,7 +114,7 @@ glusterd_proc_stop(glusterd_proc_t *proc, int sig, int flags)
goto out;
synclock_unlock(&conf->big_lock);
- sleep(1);
+ synctask_sleep(1);
synclock_lock(&conf->big_lock);
if (gf_is_service_running(proc->pidfile, &pid)) {
ret = kill(pid, SIGKILL);
diff --git a/xlators/mgmt/glusterd/src/glusterd-quota.c b/xlators/mgmt/glusterd/src/glusterd-quota.c
index cb2d9c7c384..8370c174ce3 100644
--- a/xlators/mgmt/glusterd/src/glusterd-quota.c
+++ b/xlators/mgmt/glusterd/src/glusterd-quota.c
@@ -478,8 +478,9 @@ glusterd_stop_all_quota_crawl_service(glusterd_conf_t *priv,
if (dir == NULL)
return;
- GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
- while (entry) {
+ while ((entry = sys_readdir(dir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
len = snprintf(pidfile, sizeof(pidfile), "%s/%s", pid_dir,
entry->d_name);
if ((len >= 0) && (len < sizeof(pidfile))) {
@@ -487,8 +488,6 @@ glusterd_stop_all_quota_crawl_service(glusterd_conf_t *priv,
_gf_true);
sys_unlink(pidfile);
}
-
- GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
}
sys_closedir(dir);
}
@@ -1900,10 +1899,9 @@ glusterd_get_gfid_from_brick(dict_t *dict, glusterd_volinfo_t *volinfo,
}
ret = sys_lgetxattr(backend_path, GFID_XATTR_KEY, gfid, 16);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_SETXATTR_FAIL,
- "Failed to get "
- "extended attribute %s for directory %s. ",
- GFID_XATTR_KEY, backend_path);
+ gf_smsg(this->name, GF_LOG_INFO, errno, GD_MSG_GET_XATTR_FAIL,
+ "Attribute=%s, Directory=%s", GFID_XATTR_KEY, backend_path,
+ NULL);
ret = 0;
continue;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-quotad-svc.c b/xlators/mgmt/glusterd/src/glusterd-quotad-svc.c
index fc0aaddcbe3..f26d832a06d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-quotad-svc.c
+++ b/xlators/mgmt/glusterd/src/glusterd-quotad-svc.c
@@ -127,8 +127,10 @@ glusterd_quotadsvc_start(glusterd_svc_t *svc, int flags)
char *options[] = {svc->name, "--process-name", NULL};
cmdline = dict_new();
- if (!cmdline)
+ if (!cmdline) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
for (i = 0; options[i]; i++) {
ret = snprintf(key, sizeof(key), "arg%d", i);
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
index 4ce20a9e592..458bf168ede 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
@@ -219,6 +219,9 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr,
char valgrind_logfile[PATH_MAX] = {
0,
};
+ char msg[1024] = {
+ 0,
+ };
char *volfileserver = NULL;
char *localtime_logging = NULL;
@@ -270,12 +273,17 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr,
"rebalance");
runinit(&runner);
- if (this->ctx->cmd_args.valgrind) {
+ if (this->ctx->cmd_args.vgtool != _gf_none) {
snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s-rebalance.log",
priv->logdir, volinfo->volname);
- runner_add_args(&runner, "valgrind", "--leak-check=full",
- "--trace-children=yes", "--track-origins=yes", NULL);
+ if (this->ctx->cmd_args.vgtool == _gf_memcheck)
+ runner_add_args(&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ else
+ runner_add_args(&runner, "valgrind", "--tool=drd", NULL);
+
runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
}
@@ -316,6 +324,10 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr,
runner_add_arg(&runner, "--localtime-logging");
}
+ snprintf(msg, sizeof(msg), "Starting the rebalance service for volume %s",
+ volinfo->volname);
+ runner_log(&runner, this->name, GF_LOG_DEBUG, msg);
+
ret = runner_run_nowait(&runner);
if (ret) {
gf_msg_debug("glusterd", 0, "rebalance command failed");
@@ -390,8 +402,10 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
goto out;
options = dict_new();
- if (!options)
+ if (!options) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
GLUSTERD_GET_DEFRAG_SOCK_FILE(sockfile, volinfo);
@@ -497,6 +511,7 @@ __glusterd_handle_defrag_volume(rpcsvc_request_t *req)
if (ret < 0) {
// failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
index 3d13ef95ffd..43c2f4373e0 100644
--- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
+++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
@@ -239,6 +239,8 @@ glusterd_op_stage_replace_brick(dict_t *dict, char **op_errstr,
msg[0] = '\0';
}
+ glusterd_add_peers_to_auth_list(volname);
+
ret = glusterd_get_dst_brick_info(&dst_brick, volname, op_errstr,
&dst_brickinfo, &host, dict,
&dup_dstbrick);
diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
index a8e35f32a15..88662e3bbae 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
@@ -183,10 +183,8 @@ glusterd_op_send_cli_response(glusterd_op_t op, int32_t op_ret,
ret = dict_allocate_and_serialize(ctx, &rsp.dict.dict_val,
&rsp.dict.dict_len);
if (ret < 0)
- gf_msg(this->name, GF_LOG_ERROR, 0,
- GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "failed to "
- "serialize buffer");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
else
free_ptr = rsp.dict.dict_val;
}
@@ -1464,6 +1462,7 @@ glusterd_rpc_probe(call_frame_t *frame, xlator_t *this, void *data)
dict_t *dict = NULL;
if (!frame || !this || !data) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
ret = -1;
goto out;
}
@@ -1473,15 +1472,24 @@ glusterd_rpc_probe(call_frame_t *frame, xlator_t *this, void *data)
GF_ASSERT(priv);
ret = dict_get_strn(dict, "hostname", SLEN("hostname"), &hostname);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=hostname", NULL);
goto out;
+ }
ret = dict_get_int32n(dict, "port", SLEN("port"), &port);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_DEBUG, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=port", NULL);
port = GF_DEFAULT_BASE_PORT;
+ }
ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo));
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=peerinfo", NULL);
goto out;
+ }
gf_uuid_copy(req.uuid, MY_UUID);
req.hostname = gf_strdup(hostname);
@@ -1510,6 +1518,7 @@ glusterd_rpc_friend_add(call_frame_t *frame, xlator_t *this, void *data)
dict_t *peer_data = NULL;
if (!frame || !this || !data) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
ret = -1;
goto out;
}
@@ -1540,6 +1549,8 @@ glusterd_rpc_friend_add(call_frame_t *frame, xlator_t *this, void *data)
peer_data = dict_new();
if (!peer_data) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
errno = ENOMEM;
goto out;
}
@@ -1585,8 +1596,11 @@ glusterd_rpc_friend_add(call_frame_t *frame, xlator_t *this, void *data)
if (!req.vols.vols_len) {
ret = dict_allocate_and_serialize(peer_data, &req.vols.vols_val,
&req.vols.vols_len);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
}
ret = glusterd_submit_request(
@@ -1760,8 +1774,11 @@ glusterd_mgmt_v3_lock_peers(call_frame_t *frame, xlator_t *this, void *data)
GF_ASSERT(priv);
ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo));
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=peerinfo", NULL);
goto out;
+ }
// peerinfo should not be in payload
dict_deln(dict, "peerinfo", SLEN("peerinfo"));
@@ -1771,9 +1788,8 @@ glusterd_mgmt_v3_lock_peers(call_frame_t *frame, xlator_t *this, void *data)
ret = dict_allocate_and_serialize(dict, &req.dict.dict_val,
&req.dict.dict_len);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "Failed to serialize dict "
- "to request buffer");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
}
@@ -1797,6 +1813,7 @@ glusterd_mgmt_v3_lock_peers(call_frame_t *frame, xlator_t *this, void *data)
}
frame->cookie = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
if (!frame->cookie) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
ret = -1;
goto out;
}
@@ -1836,8 +1853,11 @@ glusterd_mgmt_v3_unlock_peers(call_frame_t *frame, xlator_t *this, void *data)
GF_ASSERT(priv);
ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo));
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=peerinfo", NULL);
goto out;
+ }
// peerinfo should not be in payload
dict_deln(dict, "peerinfo", SLEN("peerinfo"));
@@ -1847,9 +1867,8 @@ glusterd_mgmt_v3_unlock_peers(call_frame_t *frame, xlator_t *this, void *data)
ret = dict_allocate_and_serialize(dict, &req.dict.dict_val,
&req.dict.dict_len);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "Failed to serialize dict "
- "to request buffer");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
}
@@ -1873,6 +1892,7 @@ glusterd_mgmt_v3_unlock_peers(call_frame_t *frame, xlator_t *this, void *data)
}
frame->cookie = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
if (!frame->cookie) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
ret = -1;
goto out;
}
@@ -1954,8 +1974,11 @@ glusterd_stage_op(call_frame_t *frame, xlator_t *this, void *data)
GF_ASSERT(priv);
ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo));
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=peerinfo", NULL);
goto out;
+ }
// peerinfo should not be in payload
dict_deln(dict, "peerinfo", SLEN("peerinfo"));
@@ -1965,9 +1988,8 @@ glusterd_stage_op(call_frame_t *frame, xlator_t *this, void *data)
ret = dict_allocate_and_serialize(dict, &req.buf.buf_val, &req.buf.buf_len);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "Failed to serialize dict "
- "to request buffer");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
}
/* Sending valid transaction ID to peers */
@@ -1989,6 +2011,7 @@ glusterd_stage_op(call_frame_t *frame, xlator_t *this, void *data)
}
frame->cookie = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
if (!frame->cookie) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
ret = -1;
goto out;
}
@@ -2030,8 +2053,11 @@ glusterd_commit_op(call_frame_t *frame, xlator_t *this, void *data)
GF_ASSERT(priv);
ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo));
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=peerinfo", NULL);
goto out;
+ }
// peerinfo should not be in payload
dict_deln(dict, "peerinfo", SLEN("peerinfo"));
@@ -2041,9 +2067,8 @@ glusterd_commit_op(call_frame_t *frame, xlator_t *this, void *data)
ret = dict_allocate_and_serialize(dict, &req.buf.buf_val, &req.buf.buf_len);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "Failed to serialize dict to "
- "request buffer");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
}
/* Sending valid transaction ID to peers */
@@ -2065,6 +2090,7 @@ glusterd_commit_op(call_frame_t *frame, xlator_t *this, void *data)
}
frame->cookie = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
if (!frame->cookie) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
ret = -1;
goto out;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-scrub-svc.c b/xlators/mgmt/glusterd/src/glusterd-scrub-svc.c
index eab9078eb8e..c49a0eefba5 100644
--- a/xlators/mgmt/glusterd/src/glusterd-scrub-svc.c
+++ b/xlators/mgmt/glusterd/src/glusterd-scrub-svc.c
@@ -117,8 +117,10 @@ glusterd_scrubsvc_start(glusterd_svc_t *svc, int flags)
dict_t *cmdict = NULL;
cmdict = dict_new();
- if (!cmdict)
+ if (!cmdict) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto error_return;
+ }
ret = dict_set_str(cmdict, "cmdarg0", "--global-timer-wheel");
if (ret)
diff --git a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
index f3781879d99..b0b8a2e4018 100644
--- a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
+++ b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
@@ -89,12 +89,15 @@ glusterd_validate_quorum(xlator_t *this, glusterd_op_t op, dict_t *dict,
ret = dict_get_str(dict, "volname", &volname);
if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=volname", NULL);
ret = 0;
goto out;
}
ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL, NULL);
ret = 0;
goto out;
}
@@ -252,8 +255,11 @@ glusterd_is_volume_in_server_quorum(glusterd_volinfo_t *volinfo)
int ret = 0;
ret = dict_get_str(volinfo->dict, GLUSTERD_QUORUM_TYPE_KEY, &quorum_type);
- if (ret)
+ if (ret) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", GLUSTERD_QUORUM_TYPE_KEY, NULL);
goto out;
+ }
if (strcmp(quorum_type, GLUSTERD_SERVER_QUORUM) == 0)
res = _gf_true;
@@ -287,8 +293,11 @@ does_gd_meet_server_quorum(xlator_t *this)
ret = glusterd_get_quorum_cluster_counts(this, &active_count,
&quorum_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_QUORUM_CLUSTER_COUNT_GET_FAIL, NULL);
goto out;
+ }
if (!does_quorum_meet(active_count, quorum_count)) {
goto out;
diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
index e106398e697..1c56384a14b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
@@ -155,6 +155,8 @@ glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo)
int ret = -1;
dict_t *mod_dict = NULL;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
glusterd_svc_build_shd_volfile_path(volinfo, filepath, PATH_MAX);
if (!glusterd_is_shd_compatible_volume(volinfo)) {
@@ -166,28 +168,42 @@ glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo)
goto out;
}
mod_dict = dict_new();
- if (!mod_dict)
+ if (!mod_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
ret = dict_set_uint32(mod_dict, "cluster.background-self-heal-count", 0);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cluster.background-self-heal-count", NULL);
goto out;
+ }
ret = dict_set_str(mod_dict, "cluster.data-self-heal", "on");
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cluster.data-self-heal", NULL);
goto out;
+ }
ret = dict_set_str(mod_dict, "cluster.metadata-self-heal", "on");
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cluster.metadata-self-heal", NULL);
goto out;
+ }
ret = dict_set_str(mod_dict, "cluster.entry-self-heal", "on");
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cluster.entry-self-heal", NULL);
goto out;
+ }
ret = glusterd_shdsvc_generate_volfile(volinfo, filepath, mod_dict);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
"Failed to create volfile");
goto out;
}
@@ -195,7 +211,7 @@ glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo)
out:
if (mod_dict)
dict_unref(mod_dict);
- gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
return ret;
}
@@ -270,9 +286,7 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
}
while (conf->restart_shd) {
- synclock_unlock(&conf->big_lock);
- sleep(2);
- synclock_lock(&conf->big_lock);
+ synccond_wait(&conf->cond_restart_shd, &conf->big_lock);
}
conf->restart_shd = _gf_true;
shd_restart = _gf_true;
@@ -328,8 +342,10 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
}
}
out:
- if (shd_restart)
+ if (shd_restart) {
conf->restart_shd = _gf_false;
+ synccond_broadcast(&conf->cond_restart_shd);
+ }
if (volinfo)
glusterd_volinfo_unref(volinfo);
if (ret)
@@ -346,6 +362,8 @@ glusterd_new_shd_svc_start(glusterd_svc_t *svc, int flags)
char glusterd_uuid_option[PATH_MAX] = {0};
char client_pid[32] = {0};
dict_t *cmdline = NULL;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
cmdline = dict_new();
if (!cmdline)
@@ -362,31 +380,49 @@ glusterd_new_shd_svc_start(glusterd_svc_t *svc, int flags)
goto out;
ret = dict_set_str(cmdline, "arg", client_pid);
- if (ret < 0)
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=arg", NULL);
goto out;
+ }
/* Pass cmdline arguments as key-value pair. The key is merely
* a carrier and is not used. Since dictionary follows LIFO the value
* should be put in reverse order*/
ret = dict_set_str(cmdline, "arg4", svc->name);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=arg4", NULL);
goto out;
+ }
ret = dict_set_str(cmdline, "arg3", GD_SHD_PROCESS_NAME);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=arg3", NULL);
goto out;
+ }
ret = dict_set_str(cmdline, "arg2", glusterd_uuid_option);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=arg2", NULL);
goto out;
+ }
ret = dict_set_str(cmdline, "arg1", "--xlator-option");
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=arg1", NULL);
goto out;
+ }
ret = glusterd_svc_start(svc, flags, cmdline);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_GLUSTER_SERVICE_START_FAIL, NULL);
goto out;
+ }
ret = glusterd_conn_connect(&(svc->conn));
out:
@@ -539,28 +575,45 @@ glusterd_shdsvc_reconfigure(glusterd_volinfo_t *volinfo)
goto out;
}
mod_dict = dict_new();
- if (!mod_dict)
+ if (!mod_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
ret = dict_set_uint32(mod_dict, "cluster.background-self-heal-count", 0);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cluster.background-self-heal-count", NULL);
goto out;
+ }
ret = dict_set_str(mod_dict, "cluster.data-self-heal", "on");
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cluster.data-self-heal", NULL);
goto out;
+ }
ret = dict_set_str(mod_dict, "cluster.metadata-self-heal", "on");
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cluster.metadata-self-heal", NULL);
goto out;
+ }
ret = dict_set_int32(mod_dict, "graph-check", 1);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=graph-check", NULL);
goto out;
+ }
ret = dict_set_str(mod_dict, "cluster.entry-self-heal", "on");
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cluster.entry-self-heal", NULL);
goto out;
+ }
ret = glusterd_volume_svc_check_volfile_identical(
"glustershd", mod_dict, volinfo, glusterd_shdsvc_generate_volfile,
diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c
index 7b67592e27c..bf2d81b644a 100644
--- a/xlators/mgmt/glusterd/src/glusterd-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-sm.c
@@ -146,22 +146,33 @@ glusterd_broadcast_friend_delete(char *hostname, uuid_t uuid)
ctx.op = GD_FRIEND_UPDATE_DEL;
friends = dict_new();
- if (!friends)
+ if (!friends) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "op");
ret = dict_set_int32n(friends, key, keylen, ctx.op);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "hostname");
ret = dict_set_strn(friends, key, keylen, hostname);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
ret = dict_set_int32n(friends, "count", SLEN("count"), count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
@@ -370,30 +381,45 @@ glusterd_ac_friend_probe(glusterd_friend_sm_event_t *event, void *ctx)
peerinfo = glusterd_peerinfo_find(NULL, probe_ctx->hostname);
if (peerinfo == NULL) {
// We should not reach this state ideally
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_NOT_FOUND, NULL);
ret = -1;
goto unlock;
}
- if (!peerinfo->peer)
+ if (!peerinfo->peer) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_ADDRESS_GET_FAIL,
+ NULL);
goto unlock;
+ }
proc = &peerinfo->peer->proctable[GLUSTERD_PROBE_QUERY];
if (proc->fn) {
frame = create_frame(this, this->ctx->pool);
if (!frame) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_FRAME_CREATE_FAIL,
+ NULL);
goto unlock;
}
frame->local = ctx;
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
goto unlock;
+ }
ret = dict_set_strn(dict, "hostname", SLEN("hostname"),
probe_ctx->hostname);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=hostname", NULL);
goto unlock;
+ }
ret = dict_set_int32n(dict, "port", SLEN("port"), probe_ctx->port);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=port", NULL);
goto unlock;
+ }
/* The peerinfo reference being set here is going to be used
* only within this critical section, in glusterd_rpc_probe
@@ -482,12 +508,17 @@ glusterd_ac_send_friend_remove_req(glusterd_friend_sm_event_t *event,
goto unlock;
}
- if (!peerinfo->peer)
+ if (!peerinfo->peer) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_ADDRESS_GET_FAIL,
+ NULL);
goto unlock;
+ }
proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_REMOVE];
if (proc->fn) {
frame = create_frame(this, this->ctx->pool);
if (!frame) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_FRAME_CREATE_FAIL,
+ NULL);
goto unlock;
}
frame->local = data;
@@ -556,13 +587,18 @@ glusterd_ac_send_friend_update(glusterd_friend_sm_event_t *event, void *ctx)
goto out;
}
- if (!friends)
+ if (!friends) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto unlock;
+ }
ev_ctx.op = GD_FRIEND_UPDATE_ADD;
ret = dict_set_int32n(friends, key, keylen, ev_ctx.op);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto unlock;
+ }
cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
{
@@ -578,8 +614,11 @@ glusterd_ac_send_friend_update(glusterd_friend_sm_event_t *event, void *ctx)
}
ret = dict_set_int32n(friends, "count", SLEN("count"), count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=count", NULL);
goto unlock;
+ }
cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
{
@@ -665,13 +704,18 @@ glusterd_ac_update_friend(glusterd_friend_sm_event_t *event, void *ctx)
goto unlock;
}
- if (!friends)
+ if (!friends) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
ev_ctx.op = GD_FRIEND_UPDATE_ADD;
ret = dict_set_int32n(friends, key, keylen, ev_ctx.op);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto unlock;
+ }
cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
{
@@ -687,8 +731,11 @@ glusterd_ac_update_friend(glusterd_friend_sm_event_t *event, void *ctx)
}
ret = dict_set_int32n(friends, "count", SLEN("count"), count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=count", NULL);
goto unlock;
+ }
ret = dict_set_static_ptr(friends, "peerinfo", cur_peerinfo);
if (ret) {
@@ -1062,6 +1109,7 @@ glusterd_friend_sm_transition_state(uuid_t peerid, char *peername,
RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(peerid, peername);
if (!peerinfo) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_PEER_NOT_FOUND, NULL);
goto out;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c
index 3042789916c..d75f249b29e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c
@@ -87,8 +87,10 @@ glusterd_snapdsvc_init(void *data)
svc = &(volinfo->snapd.svc);
ret = snprintf(svc->name, sizeof(svc->name), "%s", snapd_svc_name);
- if (ret < 0)
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
+ }
notify = glusterd_snapdsvc_rpc_notify;
@@ -115,6 +117,7 @@ glusterd_snapdsvc_init(void *data)
glusterd_svc_build_snapd_logfile(logfile, logdir, sizeof(logfile));
len = snprintf(volfileid, sizeof(volfileid), "snapd/%s", volinfo->volname);
if ((len < 0) || (len >= sizeof(volfileid))) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -301,16 +304,22 @@ glusterd_snapdsvc_start(glusterd_svc_t *svc, int flags)
}
runinit(&runner);
- if (this->ctx->cmd_args.valgrind) {
+ if (this->ctx->cmd_args.vgtool != _gf_none) {
len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-snapd.log",
svc->proc.logdir);
if ((len < 0) || (len >= PATH_MAX)) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
- runner_add_args(&runner, "valgrind", "--leak-check=full",
- "--trace-children=yes", "--track-origins=yes", NULL);
+ if (this->ctx->cmd_args.vgtool == _gf_memcheck)
+ runner_add_args(&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ else
+ runner_add_args(&runner, "valgrind", "--tool=drd", NULL);
+
runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
index 43735d33fee..995268b796d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
@@ -282,12 +282,10 @@ glusterd_snap_volinfo_restore(dict_t *dict, dict_t *rsp_dict,
new_volinfo->volume_id,
sizeof(new_volinfo->volume_id), XATTR_REPLACE);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SETXATTR_FAIL,
- "Failed to "
- "set extended attribute %s on %s. "
- "Reason: %s, snap: %s",
- GF_XATTR_VOL_ID_KEY, new_brickinfo->path,
- strerror(errno), new_volinfo->volname);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SET_XATTR_FAIL,
+ "Attribute=%s, Path=%s, Reason=%s, Snap=%s",
+ GF_XATTR_VOL_ID_KEY, new_brickinfo->path,
+ strerror(errno), new_volinfo->volname, NULL);
goto out;
}
}
@@ -1961,9 +1959,7 @@ glusterd_update_snaps_synctask(void *opaque)
synclock_lock(&conf->big_lock);
while (conf->restart_bricks) {
- synclock_unlock(&conf->big_lock);
- sleep(2);
- synclock_lock(&conf->big_lock);
+ synccond_wait(&conf->cond_restart_bricks, &conf->big_lock);
}
conf->restart_bricks = _gf_true;
@@ -2041,8 +2037,9 @@ glusterd_update_snaps_synctask(void *opaque)
"Failed to remove snap %s", snap->snapname);
goto out;
}
- if (dict)
- dict_unref(dict);
+
+ dict_unref(dict);
+ dict = NULL;
}
snprintf(buf, sizeof(buf), "%s.accept_peer_data", prefix);
ret = dict_get_int32(peer_data, buf, &val);
@@ -2070,6 +2067,7 @@ out:
if (dict)
dict_unref(dict);
conf->restart_bricks = _gf_false;
+ synccond_broadcast(&conf->cond_restart_bricks);
return ret;
}
@@ -2149,18 +2147,27 @@ glusterd_add_snapd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
snprintf(base_key, sizeof(base_key), "brick%d", count);
snprintf(key, sizeof(key), "%s.hostname", base_key);
ret = dict_set_str(dict, key, "Snapshot Daemon");
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
goto out;
+ }
snprintf(key, sizeof(key), "%s.path", base_key);
ret = dict_set_dynstr(dict, key, gf_strdup(uuid_utoa(MY_UUID)));
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
goto out;
+ }
snprintf(key, sizeof(key), "%s.port", base_key);
ret = dict_set_int32(dict, key, volinfo->snapd.port);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
goto out;
+ }
glusterd_svc_build_snapd_pidfile(volinfo, pidfile, sizeof(pidfile));
@@ -2170,8 +2177,11 @@ glusterd_add_snapd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
snprintf(key, sizeof(key), "%s.pid", base_key);
ret = dict_set_int32(dict, key, pid);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
goto out;
+ }
snprintf(key, sizeof(key), "%s.status", base_key);
ret = dict_set_int32(dict, key, brick_online);
@@ -2672,8 +2682,10 @@ glusterd_missed_snapinfo_new(glusterd_missed_snap_info **missed_snapinfo)
new_missed_snapinfo = GF_CALLOC(1, sizeof(*new_missed_snapinfo),
gf_gld_mt_missed_snapinfo_t);
- if (!new_missed_snapinfo)
+ if (!new_missed_snapinfo) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
+ }
CDS_INIT_LIST_HEAD(&new_missed_snapinfo->missed_snaps);
CDS_INIT_LIST_HEAD(&new_missed_snapinfo->snap_ops);
@@ -2701,8 +2713,10 @@ glusterd_missed_snap_op_new(glusterd_snap_op_t **snap_op)
new_snap_op = GF_CALLOC(1, sizeof(*new_snap_op),
gf_gld_mt_missed_snapinfo_t);
- if (!new_snap_op)
+ if (!new_snap_op) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
+ }
new_snap_op->brick_num = -1;
new_snap_op->op = -1;
@@ -3594,13 +3608,17 @@ glusterd_copy_folder(const char *source, const char *destination)
continue;
ret = snprintf(src_path, sizeof(src_path), "%s/%s", source,
entry->d_name);
- if (ret < 0)
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
+ }
ret = snprintf(dest_path, sizeof(dest_path), "%s/%s", destination,
entry->d_name);
- if (ret < 0)
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
+ }
ret = glusterd_copy_file(src_path, dest_path);
if (ret) {
@@ -3756,8 +3774,10 @@ glusterd_copy_quota_files(glusterd_volinfo_t *src_vol,
GLUSTERD_GET_VOLUME_DIR(dest_dir, dest_vol, priv);
ret = snprintf(src_path, sizeof(src_path), "%s/quota.conf", src_dir);
- if (ret < 0)
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
+ }
/* quota.conf is not present if quota is not enabled, Hence ignoring
* the absence of this file
@@ -3770,8 +3790,10 @@ glusterd_copy_quota_files(glusterd_volinfo_t *src_vol,
}
ret = snprintf(dest_path, sizeof(dest_path), "%s/quota.conf", dest_dir);
- if (ret < 0)
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
+ }
ret = glusterd_copy_file(src_path, dest_path);
if (ret) {
@@ -3795,8 +3817,10 @@ glusterd_copy_quota_files(glusterd_volinfo_t *src_vol,
}
ret = snprintf(dest_path, sizeof(dest_path), "%s/quota.cksum", dest_dir);
- if (ret < 0)
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
+ }
ret = glusterd_copy_file(src_path, dest_path);
if (ret) {
@@ -4066,8 +4090,10 @@ glusterd_restore_nfs_ganesha_file(glusterd_volinfo_t *src_vol,
ret = snprintf(src_path, sizeof(src_path), "%s/export.%s.conf", snap_dir,
snap->snapname);
- if (ret < 0)
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
+ }
ret = sys_lstat(src_path, &stbuf);
if (ret) {
@@ -4082,8 +4108,10 @@ glusterd_restore_nfs_ganesha_file(glusterd_volinfo_t *src_vol,
ret = snprintf(dest_path, sizeof(dest_path), "%s/export.%s.conf",
GANESHA_EXPORT_DIRECTORY, src_vol->volname);
- if (ret < 0)
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
+ }
ret = glusterd_copy_file(src_path, dest_path);
if (ret)
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
index 4703a072294..aeaa8d15214 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
@@ -514,6 +514,7 @@ glusterd_copy_geo_rep_session_files(char *session, glusterd_volinfo_t *snap_vol)
ret = snprintf(georep_session_dir, sizeof(georep_session_dir), "%s/%s/%s",
priv->workdir, GEOREP, session);
if (ret < 0) { /* Negative value is an error */
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
goto out;
}
@@ -521,6 +522,7 @@ glusterd_copy_geo_rep_session_files(char *session, glusterd_volinfo_t *snap_vol)
priv->workdir, GLUSTERD_VOL_SNAP_DIR_PREFIX,
snap_vol->snapshot->snapname, GEOREP, session);
if (ret < 0) { /* Negative value is an error */
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
goto out;
}
@@ -568,12 +570,14 @@ glusterd_copy_geo_rep_session_files(char *session, glusterd_volinfo_t *snap_vol)
ret = snprintf(src_path, sizeof(src_path), "%s/%s", georep_session_dir,
files[i]->d_name);
if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
goto out;
}
ret = snprintf(dest_path, sizeof(dest_path), "%s/%s", snap_session_dir,
files[i]->d_name);
if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
goto out;
}
@@ -632,12 +636,14 @@ glusterd_snapshot_backup_vol(glusterd_volinfo_t *volinfo)
"%s/" GLUSTERD_TRASH "/vols-%s.deleted", priv->workdir,
volinfo->volname);
if ((len < 0) || (len >= sizeof(delete_path))) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
goto out;
}
len = snprintf(trashdir, sizeof(trashdir), "%s/" GLUSTERD_TRASH,
priv->workdir);
- if ((len < 0) || (len >= sizeof(delete_path))) {
+ if ((len < 0) || (len >= sizeof(trashdir))) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
goto out;
}
@@ -730,6 +736,7 @@ glusterd_copy_geo_rep_files(glusterd_volinfo_t *origin_vol,
* is slave volume.
*/
if (!origin_vol->gsync_slaves) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_SLAVE, NULL);
ret = 0;
goto out;
}
@@ -1418,6 +1425,8 @@ glusterd_handle_snapshot_config(rpcsvc_request_t *req, glusterd_op_t op,
&config_command);
if (ret) {
snprintf(err_str, len, "Failed to get config-command type");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=config-command", NULL);
goto out;
}
@@ -1976,6 +1985,13 @@ glusterd_snap_create_clone_common_prevalidate(
"command or use [force] option in "
"snapshot create to override this "
"behavior.");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_BRICK_NOT_RUNNING,
+ "Please run volume status command to see brick "
+ "status.Please start the stopped brick and then issue "
+ "snapshot create command or use 'force' option in "
+ "snapshot create to override this behavior.",
+ NULL);
} else {
snprintf(err_str, PATH_MAX,
"One or more bricks are not running. "
@@ -1984,6 +2000,12 @@ glusterd_snap_create_clone_common_prevalidate(
"Please start the stopped brick "
"and then issue snapshot clone "
"command ");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_BRICK_NOT_RUNNING,
+ "Please run snapshot status command to see brick "
+ "status. Please start the stopped brick and then issue "
+ "snapshot clone command.",
+ NULL);
}
*op_errno = EG_BRCKDWN;
ret = -1;
@@ -1999,6 +2021,10 @@ glusterd_snap_create_clone_common_prevalidate(
if (len < 0) {
strcpy(err_str, "<error>");
}
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_BRK_MNTPATH_GET_FAIL,
+ "Brick_hostname=%s, Brick_path=%s", brickinfo->hostname,
+ brickinfo->path, NULL);
ret = -1;
goto out;
}
@@ -2010,6 +2036,11 @@ glusterd_snap_create_clone_common_prevalidate(
"all bricks of %s are thinly "
"provisioned LV.",
volinfo->volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_SNAPSHOT_NOT_THIN_PROVISIONED,
+ "Ensure that all bricks of volume are thinly "
+ "provisioned LV, Volume=%s",
+ volinfo->volname, NULL);
ret = -1;
goto out;
}
@@ -2022,6 +2053,9 @@ glusterd_snap_create_clone_common_prevalidate(
"cannot copy the snapshot device "
"name (volname: %s, snapname: %s)",
volinfo->volname, snapname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_SNAP_DEVICE_NAME_GET_FAIL, "Volname=%s, Snapname=%s",
+ volinfo->volname, snapname, NULL);
*loglevel = GF_LOG_WARNING;
ret = -1;
goto out;
@@ -2188,6 +2222,16 @@ glusterd_snapshot_clone_prevalidate(dict_t *dict, char **op_errstr,
goto out;
}
+ if (!glusterd_is_volume_started(snap_vol)) {
+ snprintf(err_str, sizeof(err_str),
+ "Snapshot %s is "
+ "not activated",
+ snap->snapname);
+ loglevel = GF_LOG_WARNING;
+ *op_errno = EG_VOLSTP;
+ goto out;
+ }
+
ret = dict_get_bin(dict, "vol1_volid", (void **)&snap_volid);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
@@ -3211,7 +3255,7 @@ glusterd_snapshot_get_snap_detail(dict_t *dict, glusterd_snap_t *snap,
int volcount = 0;
char key[32] = ""; /* keyprefix is quite small, up to 16 bytes */
int keylen;
- char timestr[64] = "";
+ char timestr[GF_TIMESTR_SIZE] = "";
char *value = NULL;
glusterd_volinfo_t *snap_vol = NULL;
glusterd_volinfo_t *tmp_vol = NULL;
@@ -3886,7 +3930,8 @@ glusterd_handle_snapshot_create(rpcsvc_request_t *req, glusterd_op_t op,
goto out;
}
- ret = dict_set_int64(dict, "snap-time", (int64_t)time(&snap_time));
+ snap_time = gf_time();
+ ret = dict_set_int64(dict, "snap-time", (int64_t)snap_time);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
"Unable to set snap-time");
@@ -4451,6 +4496,7 @@ glusterd_add_missed_snaps_to_dict(dict_t *rsp_dict,
snap_uuid, snap_vol->volname, brick_number, brickinfo->path,
op, GD_MISSED_SNAP_PENDING);
if ((len < 0) || (len >= sizeof(missed_snap_entry))) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
goto out;
}
@@ -4458,6 +4504,8 @@ glusterd_add_missed_snaps_to_dict(dict_t *rsp_dict,
ret = dict_get_int32n(rsp_dict, "missed_snap_count",
SLEN("missed_snap_count"), &missed_snap_count);
if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=missed_snap_count", NULL);
/* Initialize the missed_snap_count for the first time */
missed_snap_count = 0;
}
@@ -4647,7 +4695,7 @@ glusterd_snap_brick_create(glusterd_volinfo_t *snap_volinfo,
ret = sys_lsetxattr(brickinfo->path, GF_XATTR_VOL_ID_KEY,
snap_volinfo->volume_id, 16, XATTR_REPLACE);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_SETXATTR_FAIL,
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_SET_XATTR_FAIL,
"Failed to set "
"extended attribute %s on %s. Reason: "
"%s, snap: %s",
@@ -5275,6 +5323,48 @@ glusterd_do_snap_vol(glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap,
dict_deln(snap_vol->dict, "features.barrier", SLEN("features.barrier"));
gd_update_volume_op_versions(snap_vol);
+ /* *
+ * Create the export file from the node where ganesha.enable "on"
+ * is executed
+ * */
+ if (glusterd_is_ganesha_cluster() &&
+ glusterd_check_ganesha_export(snap_vol)) {
+ if (is_origin_glusterd(dict)) {
+ ret = manage_export_config(clonename, "on", NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_EXPORT_FILE_CREATE_FAIL,
+ "Failed to create"
+ "export file for NFS-Ganesha\n");
+ goto out;
+ }
+ }
+
+ ret = dict_set_dynstr_with_alloc(snap_vol->dict,
+ "features.cache-invalidation", "on");
+ ret = gd_ganesha_send_dbus(clonename, "on");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_EXPORT_FILE_CREATE_FAIL,
+ "Dynamic export addition/deletion failed."
+ " Please see log file for details. Clone name = %s",
+ clonename);
+ goto out;
+ }
+ }
+ if (!glusterd_is_ganesha_cluster() &&
+ glusterd_check_ganesha_export(snap_vol)) {
+ /* This happens when a snapshot was created when Ganesha was
+ * enabled globally. Then Ganesha disabled from the cluster.
+ * In such cases, we will have the volume level option set
+ * on dict, So we have to disable it as it doesn't make sense
+ * to keep the option.
+ */
+
+ ret = dict_set_dynstr(snap_vol->dict, "ganesha.enable", "off");
+ if (ret)
+ goto out;
+ }
+
ret = glusterd_store_volinfo(snap_vol, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_SET_FAIL,
@@ -5346,8 +5436,31 @@ out:
for (i = 0; unsupported_opt[i].key; i++)
GF_FREE(unsupported_opt[i].value);
- if (snap_vol)
+ if (snap_vol) {
+ if (glusterd_is_ganesha_cluster() &&
+ glusterd_check_ganesha_export(snap_vol)) {
+ if (is_origin_glusterd(dict)) {
+ ret = manage_export_config(clonename, "on", NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_EXPORT_FILE_CREATE_FAIL,
+ "Failed to create"
+ "export file for NFS-Ganesha\n");
+ }
+ }
+
+ ret = gd_ganesha_send_dbus(clonename, "off");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_EXPORT_FILE_CREATE_FAIL,
+ "Dynamic export addition/deletion failed."
+ " Please see log file for details. Clone name = %s",
+ clonename);
+ }
+ }
+
glusterd_snap_volume_remove(rsp_dict, snap_vol, _gf_true, _gf_true);
+ }
snap_vol = NULL;
}
@@ -5399,6 +5512,8 @@ glusterd_snapshot_activate_deactivate_prevalidate(dict_t *dict,
"Snapshot (%s) does not "
"exist.",
snapname);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND,
+ "Snapname=%s", snapname, NULL);
*op_errno = EG_NOSNAP;
ret = -1;
goto out;
@@ -7204,11 +7319,15 @@ glusterd_get_brick_lvm_details(dict_t *rsp_dict,
if (token != NULL) {
value = gf_strdup(token);
if (!value) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "token=%s", token, NULL);
ret = -1;
goto end;
}
ret = snprintf(key, sizeof(key), "%s.data", key_prefix);
if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL,
+ NULL);
goto end;
}
@@ -7223,11 +7342,15 @@ glusterd_get_brick_lvm_details(dict_t *rsp_dict,
if (token != NULL) {
value = gf_strdup(token);
if (!value) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "token=%s", token, NULL);
ret = -1;
goto end;
}
ret = snprintf(key, sizeof(key), "%s.lvsize", key_prefix);
if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL,
+ NULL);
goto end;
}
@@ -7287,6 +7410,7 @@ glusterd_get_single_brick_status(char **op_errstr, dict_t *rsp_dict,
keylen = snprintf(key, sizeof(key), "%s.brick%d.path", keyprefix, index);
if (keylen < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -7294,11 +7418,14 @@ glusterd_get_single_brick_status(char **op_errstr, dict_t *rsp_dict,
ret = snprintf(brick_path, sizeof(brick_path), "%s:%s", brickinfo->hostname,
brickinfo->path);
if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
}
value = gf_strdup(brick_path);
if (!value) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "brick_path=%s", brick_path, NULL);
ret = -1;
goto out;
}
@@ -7374,6 +7501,8 @@ glusterd_get_single_brick_status(char **op_errstr, dict_t *rsp_dict,
index);
if (keylen < 0) {
ret = -1;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL,
+ NULL);
goto out;
}
@@ -7459,6 +7588,7 @@ glusterd_get_single_snap_status(char **op_errstr, dict_t *rsp_dict,
{
keylen = snprintf(key, sizeof(key), "%s.vol%d", keyprefix, volcount);
if (keylen < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -7482,6 +7612,7 @@ glusterd_get_single_snap_status(char **op_errstr, dict_t *rsp_dict,
}
keylen = snprintf(brickkey, sizeof(brickkey), "%s.brickcount", key);
if (keylen < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
}
@@ -7496,6 +7627,7 @@ glusterd_get_single_snap_status(char **op_errstr, dict_t *rsp_dict,
keylen = snprintf(key, sizeof(key), "%s.volcount", keyprefix);
if (keylen < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -7535,6 +7667,7 @@ glusterd_get_each_snap_object_status(char **op_errstr, dict_t *rsp_dict,
*/
keylen = snprintf(key, sizeof(key), "%s.snapname", keyprefix);
if (keylen < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -7556,6 +7689,7 @@ glusterd_get_each_snap_object_status(char **op_errstr, dict_t *rsp_dict,
keylen = snprintf(key, sizeof(key), "%s.uuid", keyprefix);
if (keylen < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -7639,6 +7773,7 @@ glusterd_get_snap_status_of_volume(char **op_errstr, dict_t *rsp_dict,
{
ret = snprintf(key, sizeof(key), "status.snap%d.snapname", i);
if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
}
@@ -7690,6 +7825,7 @@ glusterd_get_all_snapshot_status(dict_t *dict, char **op_errstr,
{
ret = snprintf(key, sizeof(key), "status.snap%d.snapname", i);
if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
}
@@ -8733,6 +8869,7 @@ glusterd_snapshot_revert_partial_restored_vol(glusterd_volinfo_t *volinfo)
"%s/" GLUSTERD_TRASH "/vols-%s.deleted", priv->workdir,
volinfo->volname);
if ((len < 0) || (len >= sizeof(trash_path))) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -8793,12 +8930,10 @@ glusterd_snapshot_revert_partial_restored_vol(glusterd_volinfo_t *volinfo)
snap_vol->volume_id,
sizeof(snap_vol->volume_id), XATTR_REPLACE);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SETXATTR_FAIL,
- "Failed to set extended "
- "attribute %s on %s. "
- "Reason: %s, snap: %s",
- GF_XATTR_VOL_ID_KEY, brickinfo->path,
- strerror(errno), snap_vol->volname);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SET_XATTR_FAIL,
+ "Attribute=%s, Path=%s, Reason=%s, Snap=%s",
+ GF_XATTR_VOL_ID_KEY, brickinfo->path,
+ strerror(errno), snap_vol->volname, NULL);
goto out;
}
}
@@ -9178,6 +9313,7 @@ glusterd_handle_snapshot_fn(rpcsvc_request_t *req)
ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
if (ret < 0) {
req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index 458df8dbd1d..d94dceb10b7 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -74,7 +74,7 @@ glusterd_replace_slash_with_hyphen(char *str)
while (ptr) {
*ptr = '-';
- ptr = strchr(str, '/');
+ ptr = strchr(ptr, '/');
}
}
@@ -660,85 +660,72 @@ out:
}
static int
-_storeslaves(dict_t *this, char *key, data_t *value, void *data)
-{
- int32_t ret = 0;
- gf_store_handle_t *shandle = NULL;
- xlator_t *xl = NULL;
-
- xl = THIS;
- GF_ASSERT(xl);
-
- shandle = (gf_store_handle_t *)data;
-
- GF_ASSERT(shandle);
- GF_ASSERT(shandle->fd > 0);
- GF_ASSERT(shandle->path);
- GF_ASSERT(key);
- GF_ASSERT(value);
- GF_ASSERT(value->data);
-
- gf_msg_debug(xl->name, 0, "Storing in volinfo:key= %s, val=%s", key,
- value->data);
-
- ret = gf_store_save_value(shandle->fd, key, (char *)value->data);
- if (ret) {
- gf_msg(xl->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_WRITE_FAIL,
- "Unable to write into store"
- " handle for path: %s",
- shandle->path);
- return -1;
- }
- return 0;
-}
-
-int
-_storeopts(dict_t *this, char *key, data_t *value, void *data)
+_storeopts(dict_t *dict_value, char *key, data_t *value, void *data)
{
int32_t ret = 0;
int32_t exists = 0;
+ int32_t option_len = 0;
gf_store_handle_t *shandle = NULL;
- xlator_t *xl = NULL;
+ glusterd_volinfo_data_store_t *dict_data = NULL;
+ xlator_t *this = NULL;
- xl = THIS;
- GF_ASSERT(xl);
+ this = THIS;
+ GF_ASSERT(this);
- shandle = (gf_store_handle_t *)data;
+ dict_data = (glusterd_volinfo_data_store_t *)data;
+ shandle = dict_data->shandle;
GF_ASSERT(shandle);
GF_ASSERT(shandle->fd > 0);
- GF_ASSERT(shandle->path);
GF_ASSERT(key);
GF_ASSERT(value);
GF_ASSERT(value->data);
- if (is_key_glusterd_hooks_friendly(key)) {
- exists = 1;
+ if (dict_data->key_check == 1) {
+ if (is_key_glusterd_hooks_friendly(key)) {
+ exists = 1;
- } else {
- exists = glusterd_check_option_exists(key, NULL);
+ } else {
+ exists = glusterd_check_option_exists(key, NULL);
+ }
}
-
- if (1 == exists) {
- gf_msg_debug(xl->name, 0,
- "Storing in volinfo:key= %s, "
+ if (exists == 1 || dict_data->key_check == 0) {
+ gf_msg_debug(this->name, 0,
+ "Storing in buffer for volinfo:key= %s, "
"val=%s",
key, value->data);
-
} else {
- gf_msg_debug(xl->name, 0, "Discarding:key= %s, val=%s", key,
+ gf_msg_debug(this->name, 0, "Discarding:key= %s, val=%s", key,
value->data);
return 0;
}
- ret = gf_store_save_value(shandle->fd, key, (char *)value->data);
- if (ret) {
- gf_msg(xl->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_WRITE_FAIL,
- "Unable to write into store"
- " handle for path: %s",
- shandle->path);
+ /*
+ * The option_len considers the length of the key value
+ * pair and along with that '=' and '\n', but as value->len
+ * already considers a NULL at the end of the data, adding
+ * just 1.
+ */
+ option_len = strlen(key) + value->len + 1;
+
+ if ((VOLINFO_BUFFER_SIZE - dict_data->buffer_len - 1) < option_len) {
+ ret = gf_store_save_items(shandle->fd, dict_data->buffer);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, NULL);
+ return -1;
+ }
+ dict_data->buffer_len = 0;
+ dict_data->buffer[0] = '\0';
+ }
+ ret = snprintf(dict_data->buffer + dict_data->buffer_len, option_len + 1,
+ "%s=%s\n", key, value->data);
+ if (ret < 0 || ret > option_len + 1) {
+ gf_smsg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_COPY_FAIL, NULL);
return -1;
}
+
+ dict_data->buffer_len += ret;
+
return 0;
}
@@ -1013,7 +1000,7 @@ glusterd_store_create_snap_dir(glusterd_snap_t *snap)
return ret;
}
-int32_t
+static int32_t
glusterd_store_volinfo_write(int fd, glusterd_volinfo_t *volinfo)
{
int32_t ret = -1;
@@ -1021,19 +1008,47 @@ glusterd_store_volinfo_write(int fd, glusterd_volinfo_t *volinfo)
GF_ASSERT(fd > 0);
GF_ASSERT(volinfo);
GF_ASSERT(volinfo->shandle);
+ xlator_t *this = NULL;
+ glusterd_volinfo_data_store_t *dict_data = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
shandle = volinfo->shandle;
+
+ dict_data = GF_CALLOC(1, sizeof(glusterd_volinfo_data_store_t),
+ gf_gld_mt_volinfo_dict_data_t);
+ if (dict_data == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL);
+ return -1;
+ }
+
ret = glusterd_volume_exclude_options_write(fd, volinfo);
- if (ret)
+ if (ret) {
goto out;
+ }
+
+ dict_data->shandle = shandle;
+ dict_data->key_check = 1;
shandle->fd = fd;
- dict_foreach(volinfo->dict, _storeopts, shandle);
+ dict_foreach(volinfo->dict, _storeopts, (void *)dict_data);
+
+ dict_data->key_check = 0;
+ dict_foreach(volinfo->gsync_slaves, _storeopts, (void *)dict_data);
+
+ if (dict_data->buffer_len > 0) {
+ ret = gf_store_save_items(fd, dict_data->buffer);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, NULL);
+ goto out;
+ }
+ }
- dict_foreach(volinfo->gsync_slaves, _storeslaves, shandle);
shandle->fd = 0;
out:
- gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ GF_FREE(dict_data);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
return ret;
}
@@ -1274,14 +1289,6 @@ out:
return ret;
}
-static int
-_gd_store_rebalance_dict(dict_t *dict, char *key, data_t *value, void *data)
-{
- int fd = *(int *)data;
-
- return gf_store_save_value(fd, key, value->data);
-}
-
int32_t
glusterd_store_node_state_write(int fd, glusterd_volinfo_t *volinfo)
{
@@ -1289,6 +1296,12 @@ glusterd_store_node_state_write(int fd, glusterd_volinfo_t *volinfo)
char buf[PATH_MAX];
char uuid[UUID_SIZE + 1];
uint total_len = 0;
+ glusterd_volinfo_data_store_t *dict_data = NULL;
+ gf_store_handle_t shandle;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
GF_ASSERT(fd > 0);
GF_ASSERT(volinfo);
@@ -1328,14 +1341,33 @@ glusterd_store_node_state_write(int fd, glusterd_volinfo_t *volinfo)
}
ret = gf_store_save_items(fd, buf);
- if (ret)
+ if (ret) {
goto out;
+ }
if (volinfo->rebal.dict) {
- dict_foreach(volinfo->rebal.dict, _gd_store_rebalance_dict, &fd);
+ dict_data = GF_CALLOC(1, sizeof(glusterd_volinfo_data_store_t),
+ gf_gld_mt_volinfo_dict_data_t);
+ if (dict_data == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL);
+ return -1;
+ }
+ dict_data->shandle = &shandle;
+ shandle.fd = fd;
+ dict_foreach(volinfo->rebal.dict, _storeopts, (void *)dict_data);
+ if (dict_data->buffer_len > 0) {
+ ret = gf_store_save_items(fd, dict_data->buffer);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED,
+ NULL);
+ goto out;
+ ;
+ }
+ }
}
out:
- gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ GF_FREE(dict_data);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
return ret;
}
@@ -1781,8 +1813,9 @@ glusterd_store_delete_snap(glusterd_snap_t *snap)
goto out;
}
- GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
- while (entry) {
+ while ((entry = sys_readdir(dir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
len = snprintf(path, PATH_MAX, "%s/%s", delete_path, entry->d_name);
if ((len < 0) || (len >= PATH_MAX)) {
goto stat_failed;
@@ -1812,7 +1845,6 @@ glusterd_store_delete_snap(glusterd_snap_t *snap)
ret ? "Failed to remove" : "Removed", entry->d_name);
stat_failed:
memset(path, 0, sizeof(path));
- GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
}
ret = sys_closedir(dir);
@@ -2309,7 +2341,7 @@ glusterd_store_retrieve_snapd(glusterd_volinfo_t *volinfo)
ret = 0;
out:
- if (gf_store_iter_destroy(iter)) {
+ if (gf_store_iter_destroy(&iter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
@@ -2642,6 +2674,13 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo)
brick_count++;
}
+ if (gf_store_iter_destroy(&tmpiter)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
+ "Failed to destroy store iter");
+ ret = -1;
+ goto out;
+ }
+
ret = gf_store_iter_new(volinfo->shandle, &tmpiter);
if (ret)
@@ -2816,13 +2855,13 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo)
ret = 0;
out:
- if (gf_store_iter_destroy(tmpiter)) {
+ if (gf_store_iter_destroy(&tmpiter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
}
- if (gf_store_iter_destroy(iter)) {
+ if (gf_store_iter_destroy(&iter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
@@ -2955,7 +2994,7 @@ glusterd_store_retrieve_node_state(glusterd_volinfo_t *volinfo)
ret = 0;
out:
- if (gf_store_iter_destroy(iter)) {
+ if (gf_store_iter_destroy(&iter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
@@ -3231,7 +3270,7 @@ glusterd_store_update_volinfo(glusterd_volinfo_t *volinfo)
ret = 0;
out:
- if (gf_store_iter_destroy(iter)) {
+ if (gf_store_iter_destroy(&iter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
@@ -3336,20 +3375,6 @@ glusterd_store_set_options_path(glusterd_conf_t *conf, char *path, size_t len)
snprintf(path, len, "%s/options", conf->workdir);
}
-int
-_store_global_opts(dict_t *this, char *key, data_t *value, void *data)
-{
- gf_store_handle_t *shandle = data;
-
- if (gf_store_save_value(shandle->fd, key, (char *)value->data)) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_WRITE_FAIL,
- "Unable to write into store handle for key : %s, value %s", key,
- (char *)value->data);
- }
-
- return 0;
-}
-
int32_t
glusterd_store_options(xlator_t *this, dict_t *opts)
{
@@ -3358,13 +3383,15 @@ glusterd_store_options(xlator_t *this, dict_t *opts)
char path[PATH_MAX] = {0};
int fd = -1;
int32_t ret = -1;
+ glusterd_volinfo_data_store_t *dict_data = NULL;
conf = this->private;
glusterd_store_set_options_path(conf, path, sizeof(path));
ret = gf_store_handle_new(path, &shandle);
- if (ret)
+ if (ret) {
goto out;
+ }
fd = gf_store_mkstemp(shandle);
if (fd <= 0) {
@@ -3372,15 +3399,30 @@ glusterd_store_options(xlator_t *this, dict_t *opts)
goto out;
}
+ dict_data = GF_CALLOC(1, sizeof(glusterd_volinfo_data_store_t),
+ gf_gld_mt_volinfo_dict_data_t);
+ if (dict_data == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL);
+ return -1;
+ }
+ dict_data->shandle = shandle;
shandle->fd = fd;
- dict_foreach(opts, _store_global_opts, shandle);
- shandle->fd = 0;
+ dict_foreach(opts, _storeopts, (void *)dict_data);
+ if (dict_data->buffer_len > 0) {
+ ret = gf_store_save_items(fd, dict_data->buffer);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, NULL);
+ goto out;
+ }
+ }
+
ret = gf_store_rename_tmppath(shandle);
- if (ret)
- goto out;
out:
- if ((ret < 0) && (fd > 0))
+ shandle->fd = 0;
+ GF_FREE(dict_data);
+ if ((ret < 0) && (fd > 0)) {
gf_store_unlink_tmppath(shandle);
+ }
gf_store_handle_destroy(shandle);
return ret;
}
@@ -3426,7 +3468,7 @@ glusterd_store_retrieve_options(xlator_t *this)
goto out;
ret = 0;
out:
- (void)gf_store_iter_destroy(iter);
+ (void)gf_store_iter_destroy(&iter);
gf_store_handle_destroy(shandle);
return ret;
}
@@ -3478,28 +3520,28 @@ glusterd_store_retrieve_volumes(xlator_t *this, glusterd_snap_t *snap)
goto out;
}
- GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
-
- while (entry) {
+ while ((entry = sys_readdir(dir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
if (snap && ((!strcmp(entry->d_name, "geo-replication")) ||
(!strcmp(entry->d_name, "info"))))
- goto next;
+ continue;
len = snprintf(entry_path, PATH_MAX, "%s/%s", path, entry->d_name);
- if ((len < 0) || (len >= PATH_MAX)) {
- goto next;
- }
+ if ((len < 0) || (len >= PATH_MAX))
+ continue;
+
ret = sys_lstat(entry_path, &st);
if (ret == -1) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY,
"Failed to stat entry %s : %s", path, strerror(errno));
- goto next;
+ continue;
}
if (!S_ISDIR(st.st_mode)) {
gf_msg_debug(this->name, 0, "%s is not a valid volume",
entry->d_name);
- goto next;
+ continue;
}
volinfo = glusterd_store_retrieve_volume(entry->d_name, snap);
@@ -3522,8 +3564,6 @@ glusterd_store_retrieve_volumes(xlator_t *this, glusterd_snap_t *snap)
glusterd_store_create_nodestate_sh_on_absence(volinfo);
glusterd_store_perform_node_state_store(volinfo);
}
- next:
- GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
}
ret = 0;
@@ -3878,7 +3918,7 @@ glusterd_store_update_snap(glusterd_snap_t *snap)
ret = 0;
out:
- if (gf_store_iter_destroy(iter)) {
+ if (gf_store_iter_destroy(&iter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
@@ -4073,9 +4113,9 @@ glusterd_store_retrieve_snaps(xlator_t *this)
goto out;
}
- GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
-
- while (entry) {
+ while ((entry = sys_readdir(dir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
if (strcmp(entry->d_name, GLUSTERD_MISSED_SNAPS_LIST_FILE)) {
ret = glusterd_store_retrieve_snap(entry->d_name);
if (ret) {
@@ -4084,7 +4124,6 @@ glusterd_store_retrieve_snaps(xlator_t *this)
goto out;
}
}
- GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
}
/* Retrieve missed_snaps_list */
@@ -4399,7 +4438,7 @@ glusterd_store_create_peer_shandle(glusterd_peerinfo_t *peerinfo)
static int32_t
glusterd_store_peer_write(int fd, glusterd_peerinfo_t *peerinfo)
{
- char buf[128];
+ char buf[PATH_MAX];
uint total_len = 0;
int32_t ret = 0;
int32_t i = 1;
@@ -4408,7 +4447,7 @@ glusterd_store_peer_write(int fd, glusterd_peerinfo_t *peerinfo)
ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%s\n%s=%d\n",
GLUSTERD_STORE_KEY_PEER_UUID, uuid_utoa(peerinfo->uuid),
GLUSTERD_STORE_KEY_PEER_STATE, peerinfo->state.state);
- if (ret < 0 || ret >= sizeof(buf)) {
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
ret = -1;
goto out;
}
@@ -4419,7 +4458,7 @@ glusterd_store_peer_write(int fd, glusterd_peerinfo_t *peerinfo)
ret = snprintf(buf + total_len, sizeof(buf) - total_len,
GLUSTERD_STORE_KEY_PEER_HOSTNAME "%d=%s\n", i,
hostname->hostname);
- if (ret < 0 || ret >= sizeof(buf)) {
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
ret = -1;
goto out;
}
@@ -4531,11 +4570,9 @@ glusterd_store_retrieve_peers(xlator_t *this)
goto out;
}
- for (;;) {
- GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
- if (!entry) {
- break;
- }
+ while ((entry = sys_readdir(dir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
if (gf_uuid_parse(entry->d_name, tmp_uuid) != 0) {
gf_log(this->name, GF_LOG_WARNING, "skipping non-peer file %s",
entry->d_name);
@@ -4623,7 +4660,7 @@ glusterd_store_retrieve_peers(xlator_t *this)
is_ok = _gf_true;
next:
- (void)gf_store_iter_destroy(iter);
+ (void)gf_store_iter_destroy(&iter);
if (!is_ok) {
gf_log(this->name, GF_LOG_WARNING,
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h
index 04070549678..83f4df0783e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.h
+++ b/xlators/mgmt/glusterd/src/glusterd-store.h
@@ -29,7 +29,7 @@ typedef enum glusterd_store_ver_ac_ {
} glusterd_volinfo_ver_ac_t;
#define UUID_SIZE 36
-
+#define VOLINFO_BUFFER_SIZE 4093
#define GLUSTERD_STORE_UUID_KEY "UUID"
#define GLUSTERD_STORE_KEY_VOL_TYPE "type"
@@ -112,6 +112,19 @@ typedef enum glusterd_store_ver_ac_ {
#define GLUSTERD_STORE_KEY_GANESHA_GLOBAL "nfs-ganesha"
+/*
+ * The structure is responsible for handling the parameter for writes into
+ * the buffer before it is finally written to the file. The writes will be
+ * of the form of key-value pairs.
+ */
+struct glusterd_volinfo_data_store_ {
+ gf_store_handle_t *shandle; /*Contains fd and path of the file */
+ int16_t buffer_len;
+ char key_check; /* flag to check if key is to be validated before write*/
+ char buffer[VOLINFO_BUFFER_SIZE];
+};
+typedef struct glusterd_volinfo_data_store_ glusterd_volinfo_data_store_t;
+
int32_t
glusterd_store_volinfo(glusterd_volinfo_t *volinfo,
glusterd_volinfo_ver_ac_t ac);
diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
index 18990fe365b..ca845903c4f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
@@ -239,8 +239,10 @@ glusterd_svc_check_topology_identical(char *svc_name,
int tmpclean = 0;
int tmpfd = -1;
- if ((!identical) || (!this) || (!this->private))
+ if ((!identical) || (!this) || (!this->private)) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
conf = this->private;
GF_VALIDATE_OR_GOTO(this->name, conf, out);
@@ -358,8 +360,10 @@ glusterd_volume_svc_check_topology_identical(
int tmpclean = 0;
int tmpfd = -1;
- if ((!identical) || (!this) || (!this->private))
+ if ((!identical) || (!this) || (!this->private)) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
conf = this->private;
GF_VALIDATE_OR_GOTO(this->name, conf, out);
@@ -634,7 +638,9 @@ my_callback(struct rpc_req *req, struct iovec *iov, int count, void *v_frame)
conf = this->private;
GF_VALIDATE_OR_GOTO(this->name, conf, out);
- GF_ATOMIC_DEC(conf->blockers);
+ if (GF_ATOMIC_DEC(conf->blockers) == 0) {
+ synccond_broadcast(&conf->cond_blockers);
+ }
STACK_DESTROY(frame->root);
out:
@@ -722,7 +728,9 @@ out:
if (volinfo)
glusterd_volinfo_unref(volinfo);
- GF_ATOMIC_DEC(conf->blockers);
+ if (GF_ATOMIC_DEC(conf->blockers) == 0) {
+ synccond_broadcast(&conf->cond_blockers);
+ }
STACK_DESTROY(frame->root);
return 0;
}
@@ -785,12 +793,16 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags,
frame = create_frame(this, this->ctx->pool);
if (!frame) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_FRAME_CREATE_FAIL,
+ NULL);
goto *errlbl;
}
if (op == GLUSTERD_SVC_ATTACH) {
dict = dict_new();
if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
ret = -ENOMEM;
goto *errlbl;
}
@@ -808,6 +820,7 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags,
file_len = stbuf.st_size;
volfile_content = GF_MALLOC(file_len + 1, gf_common_mt_char);
if (!volfile_content) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
ret = -ENOMEM;
goto *errlbl;
}
@@ -834,10 +847,8 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags,
ret = dict_allocate_and_serialize(dict, &brick_req.dict.dict_val,
&brick_req.dict.dict_len);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "Failed to serialize dict "
- "to request buffer");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto *errlbl;
}
}
@@ -969,7 +980,7 @@ glusterd_attach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int flags)
* TBD: see if there's a better way
*/
synclock_unlock(&conf->big_lock);
- sleep(1);
+ synctask_sleep(1);
synclock_lock(&conf->big_lock);
}
ret = -1;
@@ -1023,7 +1034,7 @@ glusterd_detach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int sig)
* TBD: see if there's a better way
*/
synclock_unlock(&conf->big_lock);
- sleep(1);
+ synctask_sleep(1);
synclock_lock(&conf->big_lock);
}
ret = -1;
diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
index 99119d69e45..18b3fb13630 100644
--- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
+++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
@@ -162,6 +162,9 @@ glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline)
char *localtime_logging = NULL;
char *log_level = NULL;
char daemon_log_level[30] = {0};
+ char msg[1024] = {
+ 0,
+ };
int32_t len = 0;
this = THIS;
@@ -187,7 +190,7 @@ glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline)
runinit(&runner);
- if (this->ctx->cmd_args.valgrind) {
+ if (this->ctx->cmd_args.vgtool != _gf_none) {
len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s.log",
svc->proc.logdir, svc->name);
if ((len < 0) || (len >= PATH_MAX)) {
@@ -195,9 +198,13 @@ glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline)
goto unlock;
}
- runner_add_args(&runner, "valgrind", "--leak-check=full",
- "--trace-children=yes", "--track-origins=yes",
- NULL);
+ if (this->ctx->cmd_args.vgtool == _gf_memcheck)
+ runner_add_args(&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ else
+ runner_add_args(&runner, "valgrind", "--tool=drd", NULL);
+
runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
}
@@ -226,8 +233,8 @@ glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline)
if (cmdline)
dict_foreach(cmdline, svc_add_args, (void *)&runner);
- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_START_SUCCESS,
- "Starting %s service", svc->name);
+ snprintf(msg, sizeof(msg), "Starting %s service", svc->name);
+ runner_log(&runner, this->name, GF_LOG_DEBUG, msg);
if (flags == PROC_START_NO_WAIT) {
ret = runner_run_nowait(&runner);
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
index b7039f83885..b73d37ad08e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
@@ -406,8 +406,11 @@ gd_syncop_mgmt_v3_lock(glusterd_op_t op, dict_t *op_ctx,
ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
&req.dict.dict_len);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
gf_uuid_copy(req.uuid, my_uuid);
gf_uuid_copy(req.txn_id, txn_id);
@@ -507,8 +510,11 @@ gd_syncop_mgmt_v3_unlock(dict_t *op_ctx, glusterd_peerinfo_t *peerinfo,
ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
&req.dict.dict_len);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
gf_uuid_copy(req.uuid, my_uuid);
gf_uuid_copy(req.txn_id, txn_id);
@@ -842,16 +848,21 @@ gd_syncop_mgmt_stage_op(glusterd_peerinfo_t *peerinfo, struct syncargs *args,
uuid_t *peerid = NULL;
req = GF_CALLOC(1, sizeof(*req), gf_gld_mt_mop_stage_req_t);
- if (!req)
+ if (!req) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
+ }
gf_uuid_copy(req->uuid, my_uuid);
req->op = op;
ret = dict_allocate_and_serialize(dict_out, &req->buf.buf_val,
&req->buf.buf_len);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
if (ret)
@@ -903,6 +914,8 @@ _gd_syncop_brick_op_cbk(struct rpc_req *req, struct iovec *iov, int count,
if (rsp.output.output_len) {
args->dict = dict_new();
if (!args->dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
ret = -1;
args->op_errno = ENOMEM;
goto out;
@@ -910,8 +923,11 @@ _gd_syncop_brick_op_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = dict_unserialize(rsp.output.output_val, rsp.output.output_len,
&args->dict);
- if (ret < 0)
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_UNSERIALIZE_FAIL, NULL);
goto out;
+ }
}
args->op_ret = rsp.op_ret;
@@ -1152,16 +1168,21 @@ gd_syncop_mgmt_commit_op(glusterd_peerinfo_t *peerinfo, struct syncargs *args,
uuid_t *peerid = NULL;
req = GF_CALLOC(1, sizeof(*req), gf_gld_mt_mop_commit_req_t);
- if (!req)
+ if (!req) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
+ }
gf_uuid_copy(req->uuid, my_uuid);
req->op = op;
ret = dict_allocate_and_serialize(dict_out, &req->buf.buf_val,
&req->buf.buf_len);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
if (ret)
@@ -1278,8 +1299,10 @@ gd_stage_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
GF_ASSERT(conf);
rsp_dict = dict_new();
- if (!rsp_dict)
+ if (!rsp_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
if ((op == GD_OP_CREATE_VOLUME) || (op == GD_OP_ADD_BRICK) ||
(op == GD_OP_START_VOLUME))
@@ -1408,6 +1431,7 @@ gd_commit_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
rsp_dict = dict_new();
if (!rsp_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -1464,8 +1488,11 @@ commit_done:
if (op == GD_OP_STATUS_VOLUME) {
ret = dict_get_uint32(req_dict, "cmd", &cmd);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=cmd", NULL);
goto out;
+ }
if (origin_glusterd) {
if ((cmd & GF_CLI_STATUS_ALL)) {
@@ -1691,10 +1718,12 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
rpc_clnt_t *rpc = NULL;
dict_t *rsp_dict = NULL;
int32_t cmd = GF_OP_CMD_NONE;
+ glusterd_volinfo_t *volinfo = NULL;
this = THIS;
rsp_dict = dict_new();
if (!rsp_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -1722,18 +1751,28 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
cds_list_for_each_entry_safe(pending_node, tmp, &selected, list)
{
rpc = glusterd_pending_node_get_rpc(pending_node);
+ /* In the case of rebalance if the rpc object is null, we try to
+ * create the rpc object. if the rebalance daemon is down, it returns
+ * -1. otherwise, rpc object will be created and referenced.
+ */
if (!rpc) {
- if (pending_node->type == GD_NODE_REBALANCE) {
- ret = 0;
- glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx);
+ if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) {
+ volinfo = pending_node->node;
+ ret = glusterd_rebalance_rpc_create(volinfo);
+ if (ret) {
+ ret = 0;
+ glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx);
+ goto out;
+ } else {
+ rpc = glusterd_defrag_rpc_get(volinfo->rebal.defrag);
+ }
+ } else {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE,
+ "Brick Op failed "
+ "due to rpc failure.");
goto out;
}
-
- ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE,
- "Brick Op failed "
- "due to rpc failure.");
- goto out;
}
ret = gd_syncop_mgmt_brick_op(rpc, pending_node, op, req_dict, op_ctx,
@@ -1759,7 +1798,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
pending_node = NULL;
ret = 0;
out:
- if (pending_node)
+ if (pending_node && pending_node->node)
glusterd_pending_node_put_rpc(pending_node);
if (rsp_dict)
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.h b/xlators/mgmt/glusterd/src/glusterd-syncop.h
index ce4a940c7a0..a265f2135c6 100644
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.h
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.h
@@ -32,7 +32,7 @@
ret = gd_syncop_submit_request(rpc, req, stb, cookie, prog, procnum, \
cbk, (xdrproc_t)xdrproc); \
if (!ret) \
- synctask_yield(stb->task); \
+ synctask_yield(stb->task, NULL); \
else \
gf_asprintf(&stb->errstr, \
"%s failed. Check log file" \
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index c947e6c926b..90ef2cf4c9c 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -79,6 +79,14 @@
#include <sys/sockio.h>
#endif
+#ifdef __FreeBSD__
+#include <sys/sysctl.h>
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <libprocstat.h>
+#include <libutil.h>
+#endif
+
#define NFS_PROGRAM 100003
#define NFSV3_VERSION 3
@@ -443,6 +451,8 @@ glusterd_submit_request(struct rpc_clnt *rpc, void *req, call_frame_t *frame,
if (!iobref) {
iobref = iobref_new();
if (!iobref) {
+ gf_smsg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
}
@@ -645,6 +655,7 @@ glusterd_volinfo_new(glusterd_volinfo_t **volinfo)
new_volinfo->dict = dict_new();
if (!new_volinfo->dict) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
GF_FREE(new_volinfo);
goto out;
@@ -652,6 +663,7 @@ glusterd_volinfo_new(glusterd_volinfo_t **volinfo)
new_volinfo->gsync_slaves = dict_new();
if (!new_volinfo->gsync_slaves) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
dict_unref(new_volinfo->dict);
GF_FREE(new_volinfo);
goto out;
@@ -659,6 +671,7 @@ glusterd_volinfo_new(glusterd_volinfo_t **volinfo)
new_volinfo->gsync_active_slaves = dict_new();
if (!new_volinfo->gsync_active_slaves) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
dict_unref(new_volinfo->dict);
dict_unref(new_volinfo->gsync_slaves);
GF_FREE(new_volinfo);
@@ -675,7 +688,9 @@ glusterd_volinfo_new(glusterd_volinfo_t **volinfo)
glusterd_gfproxydsvc_build(&new_volinfo->gfproxyd.svc);
glusterd_shdsvc_build(&new_volinfo->shd.svc);
+ pthread_mutex_init(&new_volinfo->store_volinfo_lock, NULL);
pthread_mutex_init(&new_volinfo->reflock, NULL);
+
*volinfo = glusterd_volinfo_ref(new_volinfo);
ret = 0;
@@ -956,7 +971,10 @@ glusterd_volinfo_delete(glusterd_volinfo_t *volinfo)
glusterd_auth_cleanup(volinfo);
glusterd_shd_svcproc_cleanup(&volinfo->shd);
+ pthread_mutex_destroy(&volinfo->store_volinfo_lock);
pthread_mutex_destroy(&volinfo->reflock);
+ LOCK_DESTROY(&volinfo->lock);
+
GF_FREE(volinfo);
ret = 0;
out:
@@ -1107,7 +1125,8 @@ glusterd_get_brick_mount_dir(char *brickpath, char *hostname, char *mount_dir)
}
brick_dir = &brickpath[strlen(mnt_pt)];
- brick_dir++;
+ if (brick_dir[0] == '/')
+ brick_dir++;
snprintf(mount_dir, VALID_GLUSTERD_PATHMAX, "/%s", brick_dir);
}
@@ -1352,6 +1371,10 @@ glusterd_validate_and_create_brickpath(glusterd_brickinfo_t *brickinfo,
"Reason : %s ",
brickinfo->hostname, brickinfo->path,
strerror(errno));
+ gf_smsg(
+ "glusterd", GF_LOG_ERROR, errno, GD_MSG_CREATE_BRICK_DIR_FAILED,
+ "Brick_hostname=%s, Brick_path=%s, Reason=%s",
+ brickinfo->hostname, brickinfo->path, strerror(errno), NULL);
goto out;
}
} else {
@@ -1364,6 +1387,9 @@ glusterd_validate_and_create_brickpath(glusterd_brickinfo_t *brickinfo,
"lstat failed on %s. "
"Reason : %s",
brickinfo->path, strerror(errno));
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_LSTAT_FAIL,
+ "Failed on Brick_path=%s, Reason=%s", brickinfo->path,
+ strerror(errno), NULL);
goto out;
}
@@ -1372,6 +1398,8 @@ glusterd_validate_and_create_brickpath(glusterd_brickinfo_t *brickinfo,
"The provided path %s "
"which is already present, is not a directory",
brickinfo->path);
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "Brick_path=%s", brickinfo->path, NULL);
ret = -1;
goto out;
}
@@ -1388,6 +1416,8 @@ glusterd_validate_and_create_brickpath(glusterd_brickinfo_t *brickinfo,
"lstat failed on /. "
"Reason : %s",
strerror(errno));
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_LSTAT_FAIL,
+ "Failed on /, Reason=%s", strerror(errno), NULL);
goto out;
}
@@ -1397,6 +1427,9 @@ glusterd_validate_and_create_brickpath(glusterd_brickinfo_t *brickinfo,
"lstat failed on %s. "
"Reason : %s",
parentdir, strerror(errno));
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_LSTAT_FAIL,
+ "Failed on parentdir=%s, Reason=%s", parentdir, strerror(errno),
+ NULL);
goto out;
}
if (strncmp(volname, GLUSTER_SHARED_STORAGE,
@@ -1407,6 +1440,8 @@ glusterd_validate_and_create_brickpath(glusterd_brickinfo_t *brickinfo,
len = snprintf(msg, sizeof(msg),
"Brick isn't allowed to be "
"created inside glusterd's working directory.");
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_BRICK_CREATION_FAIL,
+ NULL);
ret = -1;
goto out;
}
@@ -1422,6 +1457,10 @@ glusterd_validate_and_create_brickpath(glusterd_brickinfo_t *brickinfo,
"command if you want to override this "
"behavior.",
brickinfo->hostname, brickinfo->path);
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_BRICK_CREATE_MNTPNT,
+ "Use 'force' at the end of the command if you want to "
+ "override this behavior, Brick_hostname=%s, Brick_path=%s",
+ brickinfo->hostname, brickinfo->path, NULL);
ret = -1;
goto out;
} else if (parent_st.st_dev == root_st.st_dev) {
@@ -1435,6 +1474,10 @@ glusterd_validate_and_create_brickpath(glusterd_brickinfo_t *brickinfo,
"command if you want to override this "
"behavior.",
brickinfo->hostname, brickinfo->path);
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_BRICK_CREATE_ROOT,
+ "Use 'force' at the end of the command if you want to "
+ "override this behavior, Brick_hostname=%s, Brick_path=%s",
+ brickinfo->hostname, brickinfo->path, NULL);
/* If --wignore-partition flag is used, ignore warnings
* related to bricks being on root partition when 'force'
@@ -1466,6 +1509,10 @@ glusterd_validate_and_create_brickpath(glusterd_brickinfo_t *brickinfo,
".glusterfs directory for brick %s:%s. "
"Reason : %s ",
brickinfo->hostname, brickinfo->path, strerror(errno));
+ gf_smsg("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_CREATE_GLUSTER_DIR_FAILED,
+ "Brick_hostname=%s, Brick_path=%s, Reason=%s",
+ brickinfo->hostname, brickinfo->path, strerror(errno), NULL);
goto out;
}
@@ -1608,8 +1655,10 @@ glusterd_volinfo_find_by_volume_id(uuid_t volume_id,
glusterd_volinfo_t *voliter = NULL;
glusterd_conf_t *priv = NULL;
- if (!volume_id)
+ if (!volume_id) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
return -1;
+ }
this = THIS;
priv = this->private;
@@ -1881,8 +1930,11 @@ glusterd_brick_connect(glusterd_volinfo_t *volinfo,
* connections is too long for unix domain socket connections.
*/
options = dict_new();
- if (!options)
+ if (!options) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
goto out;
+ }
ret = rpc_transport_unix_options_build(options, socketpath, 600);
if (ret)
@@ -2025,8 +2077,8 @@ glusterd_volume_start_glusterfs(glusterd_volinfo_t *volinfo,
retry:
runinit(&runner);
- if (this->ctx->cmd_args.valgrind) {
- /* Run bricks with valgrind */
+ if (this->ctx->cmd_args.vgtool != _gf_none) {
+ /* Run bricks with valgrind. */
if (volinfo->logdir) {
len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s-%s.log",
volinfo->logdir, volinfo->volname, exp_path);
@@ -2040,8 +2092,13 @@ retry:
goto out;
}
- runner_add_args(&runner, "valgrind", "--leak-check=full",
- "--trace-children=yes", "--track-origins=yes", NULL);
+ if (this->ctx->cmd_args.vgtool == _gf_memcheck)
+ runner_add_args(&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ else
+ runner_add_args(&runner, "valgrind", "--tool=drd", NULL);
+
runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
}
@@ -2154,7 +2211,7 @@ retry:
if (is_brick_mx_enabled())
runner_add_arg(&runner, "--brick-mux");
- runner_log(&runner, "", 0, "Starting GlusterFS");
+ runner_log(&runner, "", GF_LOG_DEBUG, "Starting GlusterFS");
brickinfo->port = port;
brickinfo->rdma_port = rdma_port;
@@ -2163,7 +2220,10 @@ retry:
if (wait) {
synclock_unlock(&priv->big_lock);
+ errno = 0;
ret = runner_run(&runner);
+ if (errno != 0)
+ ret = errno;
synclock_lock(&priv->big_lock);
if (ret == EADDRINUSE) {
@@ -2745,6 +2805,15 @@ glusterd_volume_compute_cksum(glusterd_volinfo_t *volinfo, char *cksum_path,
ret = -1;
goto out;
}
+ } else if (priv->op_version < GD_OP_VERSION_7_0) {
+ ret = get_checksum_for_path(filepath, &cksum, priv->op_version);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CKSUM_GET_FAIL,
+ "unable to get "
+ "checksum for path: %s",
+ filepath);
+ goto out;
+ }
}
ret = get_checksum_for_file(fd, &cksum, priv->op_version);
@@ -2864,13 +2933,19 @@ glusterd_add_bricks_hname_path_to_dict(dict_t *dict,
{
ret = snprintf(key, sizeof(key), "%d-hostname", index);
ret = dict_set_strn(dict, key, ret, brickinfo->hostname);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
ret = snprintf(key, sizeof(key), "%d-path", index);
ret = dict_set_strn(dict, key, ret, brickinfo->path);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
index++;
}
@@ -2998,11 +3073,16 @@ glusterd_add_volume_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
goto out;
ret = gd_add_vol_snap_details_to_dict(dict, pfx, volinfo);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "vol snap details", NULL);
goto out;
+ }
volume_id_str = gf_strdup(uuid_utoa(volinfo->volume_id));
if (!volume_id_str) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "volume id=%s", volinfo->volume_id, NULL);
ret = -1;
goto out;
}
@@ -3035,6 +3115,8 @@ glusterd_add_volume_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
rebalance_id_str = gf_strdup(uuid_utoa(volinfo->rebal.rebalance_id));
if (!rebalance_id_str) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "rebalance_id=%s", volinfo->rebal.rebalance_id, NULL);
ret = -1;
goto out;
}
@@ -3186,6 +3268,9 @@ out:
GF_FREE(rebalance_id_str);
GF_FREE(rb_id_str);
+ if (key[0] != '\0' && ret != 0)
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
gf_msg_debug(this->name, 0, "Returning with %d", ret);
return ret;
}
@@ -3244,29 +3329,44 @@ glusterd_vol_add_quota_conf_to_dict(glusterd_volinfo_t *volinfo, dict_t *load,
snprintf(key, sizeof(key) - 1, "%s.gfid%d", key_prefix, gfid_idx);
ret = dict_set_dynstr_with_alloc(load, key, uuid_utoa(buf));
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
snprintf(key, sizeof(key) - 1, "%s.gfid-type%d", key_prefix, gfid_idx);
ret = dict_set_int8(load, key, type);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
}
ret = snprintf(key, sizeof(key), "%s.gfid-count", key_prefix);
ret = dict_set_int32n(load, key, ret, gfid_idx);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
snprintf(key, sizeof(key), "%s.quota-cksum", key_prefix);
ret = dict_set_uint32(load, key, volinfo->quota_conf_cksum);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
snprintf(key, sizeof(key), "%s.quota-version", key_prefix);
ret = dict_set_uint32(load, key, volinfo->quota_conf_version);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
ret = 0;
out:
@@ -3599,8 +3699,11 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
snprintf(key_prefix, sizeof(key_prefix), "volume%d", count);
keylen = snprintf(key, sizeof(key), "%s.name", key_prefix);
ret = dict_get_strn(peer_data, key, keylen, &volname);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
@@ -3617,8 +3720,11 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
keylen = snprintf(key, sizeof(key), "%s.version", key_prefix);
ret = dict_get_int32n(peer_data, key, keylen, &version);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
if (version > volinfo->version) {
// Mismatch detected
@@ -3626,6 +3732,7 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
"Version of volume %s differ. local version = %d, "
"remote version = %d on peer %s",
volinfo->volname, volinfo->version, version, hostname);
+ GF_ATOMIC_INIT(volinfo->volpeerupdate, 1);
*status = GLUSTERD_VOL_COMP_UPDATE_REQ;
goto out;
} else if (version < volinfo->version) {
@@ -3637,8 +3744,11 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
//
snprintf(key, sizeof(key), "%s.ckusm", key_prefix);
ret = dict_get_uint32(peer_data, key, &cksum);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
if (cksum != volinfo->cksum) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CKSUM_VERS_MISMATCH,
@@ -4164,8 +4274,11 @@ glusterd_import_quota_conf(dict_t *peer_data, int vol_idx,
keylen = snprintf(key, sizeof(key), "%s.gfid-count", key_prefix);
ret = dict_get_int32n(peer_data, key, keylen, &gfid_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
ret = glusterd_quota_conf_write_header(fd);
if (ret)
@@ -4175,8 +4288,11 @@ glusterd_import_quota_conf(dict_t *peer_data, int vol_idx,
keylen = snprintf(key, sizeof(key) - 1, "%s.gfid%d", key_prefix,
gfid_idx);
ret = dict_get_strn(peer_data, key, keylen, &gfid_str);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
snprintf(key, sizeof(key) - 1, "%s.gfid-type%d", key_prefix, gfid_idx);
ret = dict_get_int8(peer_data, key, &gfid_type);
@@ -4237,18 +4353,23 @@ gd_import_friend_volume_rebal_dict(dict_t *dict, int count,
GF_ASSERT(dict);
GF_ASSERT(volinfo);
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
snprintf(key_prefix, sizeof(key_prefix), "volume%d", count);
ret = snprintf(key, sizeof(key), "%s.rebal-dict-count", key_prefix);
ret = dict_get_int32n(dict, key, ret, &dict_count);
if (ret) {
/* Older peers will not have this dict */
+ gf_smsg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", key, NULL);
ret = 0;
goto out;
}
volinfo->rebal.dict = dict_new();
if (!volinfo->rebal.dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -4258,7 +4379,7 @@ gd_import_friend_volume_rebal_dict(dict_t *dict, int count,
out:
if (ret && volinfo->rebal.dict)
dict_unref(volinfo->rebal.dict);
- gf_msg_debug(THIS->name, 0, "Returning with %d", ret);
+ gf_msg_debug(this->name, 0, "Returning with %d", ret);
return ret;
}
@@ -4746,7 +4867,7 @@ glusterd_volinfo_stop_stale_bricks(glusterd_volinfo_t *new_volinfo,
* brick multiplexing enabled, then stop the brick process
*/
if (ret || (new_brickinfo->snap_status == -1) ||
- is_brick_mx_enabled()) {
+ GF_ATOMIC_GET(old_volinfo->volpeerupdate)) {
/*TODO: may need to switch to 'atomic' flavour of
* brick_stop, once we make peer rpc program also
* synctask enabled*/
@@ -4915,8 +5036,15 @@ glusterd_import_friend_volume(dict_t *peer_data, int count)
ret = snprintf(key, sizeof(key), "volume%d.update", count);
ret = dict_get_int32n(peer_data, key, ret, &update);
- if (ret || !update) {
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+
+ if (!update) {
/* if update is 0 that means the volume is not imported */
+ gf_smsg(this->name, GF_LOG_INFO, 0, GD_MSG_VOLUME_NOT_IMPORTED, NULL);
goto out;
}
@@ -5020,18 +5148,25 @@ glusterd_import_friend_volumes_synctask(void *opaque)
goto out;
peer_data = dict_new();
- if (!peer_data)
+ if (!peer_data) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
ret = dict_unserialize(arg->dict_buf, arg->dictlen, &peer_data);
if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
errno = ENOMEM;
goto out;
}
ret = dict_get_int32n(peer_data, "count", SLEN("count"), &count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=count", NULL);
goto out;
+ }
synclock_lock(&conf->big_lock);
@@ -5040,22 +5175,22 @@ glusterd_import_friend_volumes_synctask(void *opaque)
* restarted (refer glusterd_restart_bricks ())
*/
while (conf->restart_bricks) {
- synclock_unlock(&conf->big_lock);
- sleep(2);
- synclock_lock(&conf->big_lock);
+ synccond_wait(&conf->cond_restart_bricks, &conf->big_lock);
}
conf->restart_bricks = _gf_true;
while (i <= count) {
ret = glusterd_import_friend_volume(peer_data, i);
if (ret) {
- conf->restart_bricks = _gf_false;
- goto out;
+ break;
}
i++;
}
- glusterd_svcs_manager(NULL);
+ if (i > count) {
+ glusterd_svcs_manager(NULL);
+ }
conf->restart_bricks = _gf_false;
+ synccond_broadcast(&conf->cond_restart_bricks);
out:
if (peer_data)
dict_unref(peer_data);
@@ -5079,8 +5214,11 @@ glusterd_import_friend_volumes(dict_t *peer_data)
GF_ASSERT(peer_data);
ret = dict_get_int32n(peer_data, "count", SLEN("count"), &count);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=count", NULL);
goto out;
+ }
while (i <= count) {
ret = glusterd_import_friend_volume(peer_data, i);
@@ -5099,11 +5237,16 @@ glusterd_get_global_server_quorum_ratio(dict_t *opts, double *quorum)
{
int ret = -1;
char *quorum_str = NULL;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
ret = dict_get_strn(opts, GLUSTERD_QUORUM_RATIO_KEY,
SLEN(GLUSTERD_QUORUM_RATIO_KEY), &quorum_str);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", GLUSTERD_QUORUM_RATIO_KEY, NULL);
goto out;
+ }
ret = gf_string2percent(quorum_str, quorum);
if (ret)
@@ -5118,11 +5261,16 @@ glusterd_get_global_opt_version(dict_t *opts, uint32_t *version)
{
int ret = -1;
char *version_str = NULL;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
ret = dict_get_strn(opts, GLUSTERD_GLOBAL_OPT_VERSION,
SLEN(GLUSTERD_GLOBAL_OPT_VERSION), &version_str);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", GLUSTERD_GLOBAL_OPT_VERSION, NULL);
goto out;
+ }
ret = gf_string2uint(version_str, version);
if (ret)
@@ -5171,13 +5319,17 @@ glusterd_import_global_opts(dict_t *friend_data)
SLEN("global-opt-count"), &count);
if (ret) {
// old version peer
+ gf_smsg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=global-opt-count", NULL);
ret = 0;
goto out;
}
import_options = dict_new();
- if (!import_options)
+ if (!import_options) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
ret = import_prdict_dict(friend_data, import_options, "key", "val", count,
"global");
if (ret) {
@@ -5248,8 +5400,11 @@ glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, char *hostname)
}
ret = dict_get_int32n(peer_data, "count", SLEN("count"), &count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=count", NULL);
goto out;
+ }
while (i <= count) {
ret = glusterd_compare_friend_volume(peer_data, i, status, hostname);
@@ -5517,13 +5672,19 @@ glusterd_add_node_to_dict(char *server, dict_t *dict, int count,
else if (!strcmp(server, priv->scrub_svc.name))
ret = dict_set_nstrn(dict, key, keylen, "Scrubber Daemon",
SLEN("Scrubber Daemon"));
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "brick%d.path", count);
ret = dict_set_dynstrn(dict, key, keylen, gf_strdup(uuid_utoa(MY_UUID)));
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
#ifdef BUILD_GNFS
/* Port is available only for the NFS server.
@@ -5534,26 +5695,38 @@ glusterd_add_node_to_dict(char *server, dict_t *dict, int count,
if (dict_getn(vol_opts, "nfs.port", SLEN("nfs.port"))) {
ret = dict_get_int32n(vol_opts, "nfs.port", SLEN("nfs.port"),
&port);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=nfs.port", NULL);
goto out;
+ }
} else
port = GF_NFS3_PORT;
}
#endif
keylen = snprintf(key, sizeof(key), "brick%d.port", count);
ret = dict_set_int32n(dict, key, keylen, port);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "brick%d.pid", count);
ret = dict_set_int32n(dict, key, keylen, pid);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "brick%d.status", count);
ret = dict_set_int32n(dict, key, keylen, running);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
out:
gf_msg_debug(THIS->name, 0, "Returning %d", ret);
@@ -5735,7 +5908,9 @@ my_callback(struct rpc_req *req, struct iovec *iov, int count, void *v_frame)
call_frame_t *frame = v_frame;
glusterd_conf_t *conf = frame->this->private;
- GF_ATOMIC_DEC(conf->blockers);
+ if (GF_ATOMIC_DEC(conf->blockers) == 0) {
+ synccond_broadcast(&conf->cond_blockers);
+ }
STACK_DESTROY(frame->root);
return 0;
@@ -5837,7 +6012,9 @@ attach_brick_callback(struct rpc_req *req, struct iovec *iov, int count,
}
}
out:
- GF_ATOMIC_DEC(conf->blockers);
+ if (GF_ATOMIC_DEC(conf->blockers) == 0) {
+ synccond_broadcast(&conf->cond_blockers);
+ }
STACK_DESTROY(frame->root);
return 0;
}
@@ -5893,12 +6070,15 @@ send_attach_req(xlator_t *this, struct rpc_clnt *rpc, char *path,
iobref = iobref_new();
if (!iobref) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto *errlbl;
}
errlbl = &&free_iobref;
frame = create_frame(this, this->ctx->pool);
if (!frame) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_FRAME_CREATE_FAIL,
+ NULL);
goto *errlbl;
}
@@ -5929,7 +6109,6 @@ send_attach_req(xlator_t *this, struct rpc_clnt *rpc, char *path,
GF_ATOMIC_INC(conf->blockers);
ret = rpc_clnt_submit(rpc, &gd_brick_prog, op, cbkfn, &iov, 1, NULL, 0,
iobref, frame, NULL, 0, NULL, 0, NULL);
- return ret;
free_iobref:
iobref_unref(iobref);
@@ -5938,7 +6117,7 @@ maybe_free_iobuf:
iobuf_unref(iobuf);
}
err:
- return -1;
+ return ret;
}
extern size_t
@@ -6022,7 +6201,7 @@ attach_brick(xlator_t *this, glusterd_brickinfo_t *brickinfo,
* TBD: see if there's a better way
*/
synclock_unlock(&conf->big_lock);
- sleep(1);
+ synctask_sleep(1);
synclock_lock(&conf->big_lock);
}
@@ -6162,7 +6341,7 @@ find_compat_brick_in_vol(glusterd_conf_t *conf,
"brick %s is still"
" starting, waiting for 2 seconds ",
other_brick->path);
- sleep(2);
+ synctask_sleep(2);
synclock_lock(&conf->big_lock);
retries--;
}
@@ -6266,7 +6445,6 @@ find_compatible_brick(glusterd_conf_t *conf, glusterd_volinfo_t *volinfo,
int
glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len)
{
- char fname[128] = "";
char buf[1024] = "";
char cmdline[2048] = "";
xlator_t *this = NULL;
@@ -6281,6 +6459,22 @@ glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len)
this = THIS;
GF_ASSERT(this);
+#ifdef __FreeBSD__
+ blen = sizeof(buf);
+ int mib[4];
+
+ mib[0] = CTL_KERN;
+ mib[1] = KERN_PROC;
+ mib[2] = KERN_PROC_ARGS;
+ mib[3] = pid;
+
+ if (sys_sysctl(mib, 4, buf, &blen, NULL, blen) != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "brick process %d is not running",
+ pid);
+ return ret;
+ }
+#else
+ char fname[128] = "";
snprintf(fname, sizeof(fname), "/proc/%d/cmdline", pid);
if (sys_access(fname, R_OK) != 0) {
@@ -6297,6 +6491,7 @@ glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len)
strerror(errno), fname);
return ret;
}
+#endif
/* convert cmdline to single string */
for (i = 0, j = 0; i < blen; i++) {
@@ -6345,6 +6540,43 @@ glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len)
char *
search_brick_path_from_proc(pid_t brick_pid, char *brickpath)
{
+ char *brick_path = NULL;
+#ifdef __FreeBSD__
+ struct filestat *fst;
+ struct procstat *ps;
+ struct kinfo_proc *kp;
+ struct filestat_list *head;
+
+ ps = procstat_open_sysctl();
+ if (ps == NULL)
+ goto out;
+
+ kp = kinfo_getproc(brick_pid);
+ if (kp == NULL)
+ goto out;
+
+ head = procstat_getfiles(ps, (void *)kp, 0);
+ if (head == NULL)
+ goto out;
+
+ STAILQ_FOREACH(fst, head, next)
+ {
+ if (fst->fs_fd < 0)
+ continue;
+
+ if (!strcmp(fst->fs_path, brickpath)) {
+ brick_path = gf_strdup(fst->fs_path);
+ break;
+ }
+ }
+
+out:
+ if (head != NULL)
+ procstat_freefiles(ps, head);
+ if (kp != NULL)
+ free(kp);
+ procstat_close(ps);
+#else
struct dirent *dp = NULL;
DIR *dirp = NULL;
size_t len = 0;
@@ -6355,7 +6587,6 @@ search_brick_path_from_proc(pid_t brick_pid, char *brickpath)
0,
},
};
- char *brick_path = NULL;
if (!brickpath)
goto out;
@@ -6391,7 +6622,9 @@ search_brick_path_from_proc(pid_t brick_pid, char *brickpath)
}
}
out:
- sys_closedir(dirp);
+ if (dirp)
+ sys_closedir(dirp);
+#endif
return brick_path;
}
@@ -6419,8 +6652,10 @@ glusterd_brick_start(glusterd_volinfo_t *volinfo,
GF_ASSERT(this);
conf = this->private;
- if ((!brickinfo) || (!volinfo))
+ if ((!brickinfo) || (!volinfo)) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
if (gf_uuid_is_null(brickinfo->uuid)) {
ret = glusterd_resolve_brick(brickinfo);
@@ -6445,7 +6680,8 @@ glusterd_brick_start(glusterd_volinfo_t *volinfo,
* three different triggers for an attempt to start the brick process
* due to the quorum handling code in glusterd_friend_sm.
*/
- if (brickinfo->status == GF_BRICK_STARTING || brickinfo->start_triggered) {
+ if (brickinfo->status == GF_BRICK_STARTING || brickinfo->start_triggered ||
+ GF_ATOMIC_GET(volinfo->volpeerupdate)) {
gf_msg_debug(this->name, 0,
"brick %s is already in starting "
"phase",
@@ -6651,9 +6887,7 @@ glusterd_restart_bricks(void *opaque)
* glusterd_compare_friend_data ())
*/
while (conf->restart_bricks) {
- synclock_unlock(&conf->big_lock);
- sleep(2);
- synclock_lock(&conf->big_lock);
+ synccond_wait(&conf->cond_restart_bricks, &conf->big_lock);
}
conf->restart_bricks = _gf_true;
@@ -6767,9 +7001,12 @@ glusterd_restart_bricks(void *opaque)
ret = 0;
out:
- GF_ATOMIC_DEC(conf->blockers);
conf->restart_done = _gf_true;
conf->restart_bricks = _gf_false;
+ if (GF_ATOMIC_DEC(conf->blockers) == 0) {
+ synccond_broadcast(&conf->cond_blockers);
+ }
+ synccond_broadcast(&conf->cond_restart_bricks);
return_block:
return ret;
@@ -7107,22 +7344,26 @@ glusterd_get_brick_root(char *path, char **mount_point)
char *mnt_pt = NULL;
struct stat brickstat = {0};
struct stat buf = {0};
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
- if (!path)
+ if (!path) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto err;
+ }
mnt_pt = gf_strdup(path);
- if (!mnt_pt)
+ if (!mnt_pt) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto err;
+ }
if (sys_stat(mnt_pt, &brickstat))
goto err;
while ((ptr = strrchr(mnt_pt, '/')) && ptr != mnt_pt) {
*ptr = '\0';
if (sys_stat(mnt_pt, &buf)) {
- gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
- "error in "
- "stat: %s",
- strerror(errno));
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "Error in stat=%s", strerror(errno), NULL);
goto err;
}
@@ -7134,10 +7375,8 @@ glusterd_get_brick_root(char *path, char **mount_point)
if (ptr == mnt_pt) {
if (sys_stat("/", &buf)) {
- gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
- "error in "
- "stat: %s",
- strerror(errno));
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "Error in stat=%s", strerror(errno), NULL);
goto err;
}
if (brickstat.st_dev == buf.st_dev)
@@ -7202,11 +7441,16 @@ glusterd_add_inode_size_to_dict(dict_t *dict, int count)
};
struct fs_info *fs = NULL;
static dict_t *cached_fs = NULL;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
ret = snprintf(key, sizeof(key), "brick%d.device", count);
ret = dict_get_strn(dict, key, ret, &device);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
if (cached_fs) {
if (dict_get_str(cached_fs, device, &cur_word) == 0) {
@@ -7218,8 +7462,11 @@ glusterd_add_inode_size_to_dict(dict_t *dict, int count)
ret = snprintf(key, sizeof(key), "brick%d.fs_name", count);
ret = dict_get_strn(dict, key, ret, &fs_name);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
runinit(&runner);
runner_redir(&runner, STDOUT_FILENO, RUN_PIPE);
@@ -7228,11 +7475,9 @@ glusterd_add_inode_size_to_dict(dict_t *dict, int count)
if (strcmp(fs_name, fs->fs_type_name) == 0) {
if (!fs->fs_tool_name) {
/* dynamic inodes */
- gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_INODE_SIZE_GET_FAIL,
- "the "
- "brick on %s (%s) uses dynamic inode "
- "sizes",
- device, fs_name);
+ gf_smsg(this->name, GF_LOG_INFO, 0, GD_MSG_INODE_SIZE_GET_FAIL,
+ "The brick on device uses dynamic inode sizes",
+ "Device=%s (%s)", device, fs_name, NULL);
cur_word = "N/A";
goto cached;
}
@@ -7246,19 +7491,17 @@ glusterd_add_inode_size_to_dict(dict_t *dict, int count)
runner_add_arg(&runner, fs->fs_tool_arg);
runner_add_arg(&runner, device);
} else {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_INODE_SIZE_GET_FAIL,
- "could not find %s to get"
- "inode size for %s (%s): %s package missing?",
- fs->fs_tool_name, device, fs_name, fs->fs_tool_pkg);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_INODE_SIZE_GET_FAIL,
+ "Could not find tool to get inode size for device", "Tool=%s",
+ fs->fs_tool_name, "Device=%s (%s)", device, fs_name,
+ "Missing package=%s ?", fs->fs_tool_pkg, NULL);
goto out;
}
ret = runner_start(&runner);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_CMD_EXEC_FAIL,
- "failed to execute "
- "\"%s\"",
- fs->fs_tool_name);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_CMD_EXEC_FAIL,
+ "Failed to execute \"%s\"", fs->fs_tool_name, NULL);
/*
* Runner_start might return an error after the child has
* been forked, e.g. if the program isn't there. In that
@@ -7286,21 +7529,22 @@ glusterd_add_inode_size_to_dict(dict_t *dict, int count)
ret = runner_end(&runner);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_CMD_EXEC_FAIL,
- "%s exited with non-zero exit status", fs->fs_tool_name);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_CMD_EXEC_FAIL,
+ "Tool exited with non-zero exit status", "Tool=%s",
+ fs->fs_tool_name, NULL);
goto out;
}
if (!cur_word) {
ret = -1;
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_INODE_SIZE_GET_FAIL,
- "Unable to retrieve inode size using %s", fs->fs_tool_name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_INODE_SIZE_GET_FAIL,
+ "Using Tool=%s", fs->fs_tool_name, NULL);
goto out;
}
if (dict_set_dynstr_with_alloc(cached_fs, device, cur_word)) {
/* not fatal if not entered into the cache */
- gf_msg_debug(THIS->name, 0, "failed to cache fs inode size for %s",
+ gf_msg_debug(this->name, 0, "failed to cache fs inode size for %s",
device);
}
@@ -7311,8 +7555,7 @@ cached:
out:
if (ret)
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_INODE_SIZE_GET_FAIL,
- "failed to get inode size");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INODE_SIZE_GET_FAIL, NULL);
return ret;
}
@@ -7368,16 +7611,23 @@ glusterd_add_brick_mount_details(glusterd_brickinfo_t *brickinfo, dict_t *dict,
struct mntent save_entry = {0};
char *mnt_pt = NULL;
struct mntent *entry = NULL;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
snprintf(base_key, sizeof(base_key), "brick%d", count);
ret = glusterd_get_brick_root(brickinfo->path, &mnt_pt);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BRICKPATH_ROOT_GET_FAIL,
+ NULL);
goto out;
+ }
entry = glusterd_get_mnt_entry_info(mnt_pt, buff, sizeof(buff),
&save_entry);
if (!entry) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GET_MNT_ENTRY_INFO_FAIL,
+ NULL);
ret = -1;
goto out;
}
@@ -7386,15 +7636,21 @@ glusterd_add_brick_mount_details(glusterd_brickinfo_t *brickinfo, dict_t *dict,
snprintf(key, sizeof(key), "%s.device", base_key);
ret = dict_set_dynstr_with_alloc(dict, key, entry->mnt_fsname);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
/* fs type */
snprintf(key, sizeof(key), "%s.fs_name", base_key);
ret = dict_set_dynstr_with_alloc(dict, key, entry->mnt_type);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
/* mount options */
snprintf(key, sizeof(key), "%s.mnt_options", base_key);
@@ -7488,43 +7744,61 @@ glusterd_add_brick_detail_to_dict(glusterd_volinfo_t *volinfo,
block_size = brickstat.f_bsize;
snprintf(key, sizeof(key), "%s.block_size", base_key);
ret = dict_set_uint64(dict, key, block_size);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
/* free space in brick */
memfree = brickstat.f_bfree * brickstat.f_bsize;
snprintf(key, sizeof(key), "%s.free", base_key);
ret = dict_set_uint64(dict, key, memfree);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
/* total space of brick */
memtotal = brickstat.f_blocks * brickstat.f_bsize;
snprintf(key, sizeof(key), "%s.total", base_key);
ret = dict_set_uint64(dict, key, memtotal);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
/* inodes: total and free counts only for ext2/3/4 and xfs */
inodes_total = brickstat.f_files;
if (inodes_total) {
snprintf(key, sizeof(key), "%s.total_inodes", base_key);
ret = dict_set_uint64(dict, key, inodes_total);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
}
inodes_free = brickstat.f_ffree;
if (inodes_free) {
snprintf(key, sizeof(key), "%s.free_inodes", base_key);
ret = dict_set_uint64(dict, key, inodes_free);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
}
ret = glusterd_add_brick_mount_details(brickinfo, dict, count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_ADD_BRICK_MNT_INFO_FAIL,
+ NULL);
goto out;
+ }
ret = glusterd_add_inode_size_to_dict(dict, count);
out:
@@ -7630,8 +7904,11 @@ glusterd_add_brick_to_dict(glusterd_volinfo_t *volinfo,
ret = dict_set_int32n(dict, key, keylen, brick_online);
out:
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
gf_msg_debug(this->name, 0, "Returning %d", ret);
+ }
return ret;
}
@@ -7712,8 +7989,10 @@ glusterd_brick_stop(glusterd_volinfo_t *volinfo,
conf = this->private;
GF_ASSERT(conf);
- if ((!brickinfo) || (!volinfo))
+ if ((!brickinfo) || (!volinfo)) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
if (gf_uuid_is_null(brickinfo->uuid)) {
ret = glusterd_resolve_brick(brickinfo);
@@ -7856,8 +8135,10 @@ glusterd_rb_check_bricks(glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *src,
rb = &volinfo->rep_brick;
- if (!rb->src_brick || !rb->dst_brick)
+ if (!rb->src_brick || !rb->dst_brick) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
return -1;
+ }
if (strcmp(rb->src_brick->hostname, src->hostname) ||
strcmp(rb->src_brick->path, src->path)) {
@@ -8003,6 +8284,8 @@ glusterd_check_and_set_brick_xattr(char *host, char *path, uuid_t uuid,
char msg[2048] = "";
gf_boolean_t in_use = _gf_false;
int flags = 0;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
/* Check for xattr support in backend fs */
ret = sys_lsetxattr(path, "trusted.glusterfs.test", "working", 8, 0);
@@ -8013,6 +8296,8 @@ glusterd_check_and_set_brick_xattr(char *host, char *path, uuid_t uuid,
" extended attributes failed, reason:"
" %s.",
host, path, strerror(errno));
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SET_XATTR_BRICK_FAIL,
+ "Host=%s, Path=%s", host, path, NULL);
goto out;
} else {
@@ -8022,6 +8307,8 @@ glusterd_check_and_set_brick_xattr(char *host, char *path, uuid_t uuid,
"Removing test extended"
" attribute failed, reason: %s",
strerror(errno));
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_REMOVE_XATTR_FAIL,
+ NULL);
goto out;
}
}
@@ -8044,6 +8331,8 @@ glusterd_check_and_set_brick_xattr(char *host, char *path, uuid_t uuid,
"Failed to set extended "
"attributes %s, reason: %s",
GF_XATTR_VOL_ID_KEY, strerror(errno));
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SET_XATTR_FAIL,
+ "Attriutes=%s", GF_XATTR_VOL_ID_KEY, NULL);
goto out;
}
@@ -8063,7 +8352,7 @@ glusterd_sm_tr_log_transition_add_to_dict(dict_t *dict,
int ret = -1;
char key[64] = "";
int keylen;
- char timestr[64] = "";
+ char timestr[GF_TIMESTR_SIZE] = "";
char *str = NULL;
GF_ASSERT(dict);
@@ -8095,6 +8384,9 @@ glusterd_sm_tr_log_transition_add_to_dict(dict_t *dict,
goto out;
out:
+ if (key[0] != '\0' && ret != 0)
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
gf_msg_debug("glusterd", 0, "returning %d", ret);
return ret;
}
@@ -8203,7 +8495,8 @@ glusterd_sm_tr_log_transition_add(glusterd_sm_tr_log_t *log, int old_state,
transitions[next].old_state = old_state;
transitions[next].new_state = new_state;
transitions[next].event = event;
- time(&transitions[next].time);
+ transitions[next].time = gf_time();
+
log->current = next;
if (log->count < log->size)
log->count++;
@@ -8319,8 +8612,10 @@ glusterd_get_local_brickpaths(glusterd_volinfo_t *volinfo, char **pathlist)
int i = 0;
glusterd_brickinfo_t *brickinfo = NULL;
- if ((!volinfo) || (!pathlist))
+ if ((!volinfo) || (!pathlist)) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
path_tokens = GF_CALLOC(sizeof(char *), volinfo->brick_count,
gf_gld_mt_charptr);
@@ -8774,6 +9069,8 @@ glusterd_nfs_statedump(char *options, int option_cnt, char **op_errstr)
snprintf(msg, sizeof(msg),
"for nfs statedump, options should"
" be after the key nfs");
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ENTRY,
+ "Options misplaced", NULL);
*op_errstr = gf_strdup(msg);
ret = -1;
goto out;
@@ -8841,6 +9138,8 @@ glusterd_client_statedump(char *volname, char *options, int option_cnt,
dup_options = gf_strdup(options);
if (!dup_options) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "options=%s", options, NULL);
goto out;
}
option = strtok_r(dup_options, " ", &tmpptr);
@@ -8848,6 +9147,8 @@ glusterd_client_statedump(char *volname, char *options, int option_cnt,
snprintf(msg, sizeof(msg),
"for gluster client statedump, options "
"should be after the key 'client'");
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ENTRY,
+ "Options misplaced", NULL);
*op_errstr = gf_strdup(msg);
ret = -1;
goto out;
@@ -8855,6 +9156,8 @@ glusterd_client_statedump(char *volname, char *options, int option_cnt,
target_ip = strtok_r(NULL, " ", &tmpptr);
if (target_ip == NULL) {
snprintf(msg, sizeof(msg), "ip address not specified");
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ENTRY, msg,
+ NULL);
*op_errstr = gf_strdup(msg);
ret = -1;
goto out;
@@ -8863,6 +9166,8 @@ glusterd_client_statedump(char *volname, char *options, int option_cnt,
pid = strtok_r(NULL, " ", &tmpptr);
if (pid == NULL) {
snprintf(msg, sizeof(msg), "pid not specified");
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ENTRY, msg,
+ NULL);
*op_errstr = gf_strdup(msg);
ret = -1;
goto out;
@@ -8903,6 +9208,8 @@ glusterd_quotad_statedump(char *options, int option_cnt, char **op_errstr)
snprintf(msg, sizeof(msg),
"for quotad statedump, options "
"should be after the key 'quotad'");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ENTRY,
+ "Options misplaced", NULL);
*op_errstr = gf_strdup(msg);
ret = -1;
goto out;
@@ -9765,6 +10072,8 @@ glusterd_append_gsync_status(dict_t *dst, dict_t *src)
ret = dict_get_strn(src, "gsync-status", SLEN("gsync-status"), &stop_msg);
if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=gsync-status", NULL);
ret = 0;
goto out;
}
@@ -10019,8 +10328,11 @@ glusterd_sync_use_rsp_dict(dict_t *aggr, dict_t *rsp_dict)
int ret = 0;
GF_ASSERT(rsp_dict);
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
if (!rsp_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
}
@@ -10068,6 +10380,8 @@ glusterd_profile_volume_use_rsp_dict(dict_t *aggr, dict_t *rsp_dict)
ret = dict_get_int32n(rsp_dict, "count", SLEN("count"), &brick_count);
if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=count", NULL);
ret = 0; // no bricks in the rsp
goto out;
}
@@ -10347,6 +10661,8 @@ glusterd_volume_status_copy_to_op_ctx_dict(dict_t *aggr, dict_t *rsp_dict)
glusterd_volinfo_t *volinfo = NULL;
GF_ASSERT(rsp_dict);
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
if (aggr) {
ctx_dict = aggr;
@@ -10356,8 +10672,11 @@ glusterd_volume_status_copy_to_op_ctx_dict(dict_t *aggr, dict_t *rsp_dict)
}
ret = dict_get_int32n(ctx_dict, "cmd", SLEN("cmd"), &cmd);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "Key=cmd",
+ NULL);
goto out;
+ }
if (cmd & GF_CLI_STATUS_ALL && is_origin_glusterd(ctx_dict)) {
ret = dict_get_int32n(rsp_dict, "vol_count", SLEN("vol_count"),
@@ -10365,18 +10684,27 @@ glusterd_volume_status_copy_to_op_ctx_dict(dict_t *aggr, dict_t *rsp_dict)
if (ret == 0) {
ret = dict_set_int32n(ctx_dict, "vol_count", SLEN("vol_count"),
vol_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Key=vol_count", NULL);
goto out;
+ }
for (i = 0; i < vol_count; i++) {
keylen = snprintf(key, sizeof(key), "vol%d", i);
ret = dict_get_strn(rsp_dict, key, keylen, &volname);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
ret = dict_set_strn(ctx_dict, key, keylen, volname);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
}
} else {
/* Ignore the error as still the aggregation applies in
@@ -10390,6 +10718,8 @@ glusterd_volume_status_copy_to_op_ctx_dict(dict_t *aggr, dict_t *rsp_dict)
ret = dict_get_int32n(rsp_dict, "count", SLEN("count"), &rsp_node_count);
if (ret) {
+ gf_smsg(this->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, "Key=count",
+ NULL);
ret = 0; // no bricks in the rsp
goto out;
}
@@ -10397,8 +10727,8 @@ glusterd_volume_status_copy_to_op_ctx_dict(dict_t *aggr, dict_t *rsp_dict)
ret = dict_get_int32n(rsp_dict, "other-count", SLEN("other-count"),
&rsp_other_count);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Failed to get other count from rsp_dict");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=other-count", NULL);
goto out;
}
@@ -10408,18 +10738,27 @@ glusterd_volume_status_copy_to_op_ctx_dict(dict_t *aggr, dict_t *rsp_dict)
if (!dict_getn(ctx_dict, "brick-index-max", SLEN("brick-index-max"))) {
ret = dict_get_int32n(rsp_dict, "brick-index-max",
SLEN("brick-index-max"), &brick_index_max);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=brick-index-max", NULL);
goto out;
+ }
ret = dict_set_int32n(ctx_dict, "brick-index-max",
SLEN("brick-index-max"), brick_index_max);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Key=brick-index-max", NULL);
goto out;
+ }
} else {
ret = dict_get_int32n(ctx_dict, "brick-index-max",
SLEN("brick-index-max"), &brick_index_max);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=brick-index-max", NULL);
goto out;
+ }
}
rsp_ctx.count = node_count;
@@ -10432,45 +10771,45 @@ glusterd_volume_status_copy_to_op_ctx_dict(dict_t *aggr, dict_t *rsp_dict)
ret = dict_set_int32n(ctx_dict, "count", SLEN("count"),
node_count + rsp_node_count);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
- "Failed to update node count");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Key=count", NULL);
goto out;
}
ret = dict_set_int32n(ctx_dict, "other-count", SLEN("other-count"),
(other_count + rsp_other_count));
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
- "Failed to update other-count");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Key=other-count", NULL);
goto out;
}
ret = dict_get_strn(ctx_dict, "volname", SLEN("volname"), &volname);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
- "Failed to get volname");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Key=volname", NULL);
goto out;
}
ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
- "Failed to get volinfo for volume: %s", volname);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Volume=%s", volname, NULL);
goto out;
}
ret = dict_set_int32n(ctx_dict, "hot_brick_count", SLEN("hot_brick_count"),
hot_brick_count);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
- "Failed to update hot_brick_count");
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=hot_brick_count", NULL);
goto out;
}
ret = dict_set_int32n(ctx_dict, "type", SLEN("type"), type);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
- "Failed to update type");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=type", NULL);
goto out;
}
@@ -12912,7 +13251,7 @@ glusterd_get_value_for_vme_entry(struct volopt_map_entry *vme, char **def_val)
ret = xlator_option_info_list(&vol_opt_handle, key, &local_def_val, &descr);
if (ret) {
/*Swallow Error if option not found*/
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GET_KEY_FAILED,
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_GET_KEY_FAILED,
"Failed to get option for %s "
"key",
key);
@@ -13069,7 +13408,9 @@ glusterd_get_global_options_for_all_vols(rpcsvc_request_t *req, dict_t *ctx,
gf_asprintf(&def_val, "%d", priv->op_version);
need_free = _gf_true;
} else {
- def_val = valid_all_vol_opts[i].dflt_val;
+ gf_asprintf(&def_val, "%s (DEFAULT)",
+ valid_all_vol_opts[i].dflt_val);
+ need_free = _gf_true;
}
}
@@ -13155,6 +13496,7 @@ glusterd_get_default_val_for_volopt(dict_t *ctx, gf_boolean_t all_opts,
int count = 0;
xlator_t *this = NULL;
char *def_val = NULL;
+ char *def_val_str = NULL;
char dict_key[50] = "";
int keylen;
gf_boolean_t key_found = _gf_false;
@@ -13215,7 +13557,13 @@ glusterd_get_default_val_for_volopt(dict_t *ctx, gf_boolean_t all_opts,
goto out;
}
sprintf(dict_key, "value%d", count);
- ret = dict_set_dynstr_with_alloc(ctx, dict_key, def_val);
+ if (get_value_vme) { // the value was never changed - DEFAULT is used
+ gf_asprintf(&def_val_str, "%s (DEFAULT)", def_val);
+ ret = dict_set_dynstr_with_alloc(ctx, dict_key, def_val_str);
+ GF_FREE(def_val_str);
+ def_val_str = NULL;
+ } else
+ ret = dict_set_dynstr_with_alloc(ctx, dict_key, def_val);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
"Failed to "
@@ -13740,23 +14088,24 @@ glusterd_handle_replicate_brick_ops(glusterd_volinfo_t *volinfo,
char vpath[PATH_MAX] = "";
char *volfileserver = NULL;
- priv = THIS->private;
- GF_VALIDATE_OR_GOTO(THIS->name, priv, out);
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
dirty[2] = hton32(1);
ret = sys_lsetxattr(brickinfo->path, GF_AFR_DIRTY, dirty, sizeof(dirty), 0);
if (ret == -1) {
- gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_SETXATTR_FAIL,
- "Failed to set extended"
- " attribute %s : %s.",
- GF_AFR_DIRTY, strerror(errno));
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SET_XATTR_FAIL,
+ "Attribute=%s", GF_AFR_DIRTY, "Reason=%s", strerror(errno),
+ NULL);
goto out;
}
if (mkdtemp(tmpmount) == NULL) {
- gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
- "failed to create a temporary mount directory.");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED,
+ NULL);
ret = -1;
goto out;
}
@@ -13767,7 +14116,7 @@ glusterd_handle_replicate_brick_ops(glusterd_volinfo_t *volinfo,
switch (op) {
case GD_OP_REPLACE_BRICK:
- if (dict_get_strn(THIS->options, "transport.socket.bind-address",
+ if (dict_get_strn(this->options, "transport.socket.bind-address",
SLEN("transport.socket.bind-address"),
&volfileserver) != 0)
volfileserver = "localhost";
@@ -13810,7 +14159,7 @@ glusterd_handle_replicate_brick_ops(glusterd_volinfo_t *volinfo,
ret = runner_run(&runner);
if (ret) {
- gf_log(THIS->name, GF_LOG_ERROR,
+ gf_log(this->name, GF_LOG_ERROR,
"mount command"
" failed.");
goto lock;
@@ -13820,19 +14169,18 @@ glusterd_handle_replicate_brick_ops(glusterd_volinfo_t *volinfo,
(op == GD_OP_REPLACE_BRICK) ? GF_AFR_REPLACE_BRICK : GF_AFR_ADD_BRICK,
brickinfo->brick_id, sizeof(brickinfo->brick_id), 0);
if (ret == -1)
- gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_SETXATTR_FAIL,
- "Failed to set extended"
- " attribute %s : %s",
- (op == GD_OP_REPLACE_BRICK) ? GF_AFR_REPLACE_BRICK
- : GF_AFR_ADD_BRICK,
- strerror(errno));
- gf_umount_lazy(THIS->name, tmpmount, 1);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SET_XATTR_FAIL,
+ "Attribute=%s, Reason=%s",
+ (op == GD_OP_REPLACE_BRICK) ? GF_AFR_REPLACE_BRICK
+ : GF_AFR_ADD_BRICK,
+ strerror(errno), NULL);
+ gf_umount_lazy(this->name, tmpmount, 1);
lock:
synclock_lock(&priv->big_lock);
out:
if (pid)
GF_FREE(pid);
- gf_msg_debug("glusterd", 0, "Returning with ret");
+ gf_msg_debug(this->name, 0, "Returning with ret");
return ret;
}
@@ -14041,6 +14389,8 @@ glusterd_brick_op_prerequisites(dict_t *dict, char **op, glusterd_op_t *gd_op,
"brick: %s does not exist in "
"volume: %s",
*src_brick, *volname);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NOT_FOUND,
+ "Brick=%s, Volume=%s", *src_brick, *volname, NULL);
*op_errstr = gf_strdup(msg);
goto out;
}
@@ -14265,8 +14615,11 @@ glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
keylen = snprintf(key, sizeof(key), "brick%d.hostname", count);
ret = dict_set_nstrn(dict, key, keylen, "Self-heal Daemon",
SLEN("Self-heal Daemon"));
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "brick%d.path", count);
uuid_str = gf_strdup(uuid_utoa(MY_UUID));
@@ -14275,8 +14628,11 @@ glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
goto out;
}
ret = dict_set_dynstrn(dict, key, keylen, uuid_str);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
goto out;
+ }
uuid_str = NULL;
/* shd doesn't have a port. but the cli needs a port key with
@@ -14285,8 +14641,11 @@ glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
keylen = snprintf(key, sizeof(key), "brick%d.port", count);
ret = dict_set_int32n(dict, key, keylen, 0);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
goto out;
+ }
pidfile = volinfo->shd.svc.proc.pidfile;
@@ -14297,8 +14656,11 @@ glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
pid = -1;
keylen = snprintf(key, sizeof(key), "brick%d.pid", count);
ret = dict_set_int32n(dict, key, keylen, pid);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "brick%d.status", count);
ret = dict_set_int32n(dict, key, keylen, brick_online);
@@ -14313,3 +14675,372 @@ out:
return ret;
}
+
+static gf_ai_compare_t
+glusterd_compare_addrinfo(struct addrinfo *first, struct addrinfo *next)
+{
+ int ret = -1;
+ struct addrinfo *tmp1 = NULL;
+ struct addrinfo *tmp2 = NULL;
+ char firstip[NI_MAXHOST] = {0.};
+ char nextip[NI_MAXHOST] = {
+ 0,
+ };
+
+ for (tmp1 = first; tmp1 != NULL; tmp1 = tmp1->ai_next) {
+ ret = getnameinfo(tmp1->ai_addr, tmp1->ai_addrlen, firstip, NI_MAXHOST,
+ NULL, 0, NI_NUMERICHOST);
+ if (ret)
+ return GF_AI_COMPARE_ERROR;
+ for (tmp2 = next; tmp2 != NULL; tmp2 = tmp2->ai_next) {
+ ret = getnameinfo(tmp2->ai_addr, tmp2->ai_addrlen, nextip,
+ NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
+ if (ret)
+ return GF_AI_COMPARE_ERROR;
+ if (!strcmp(firstip, nextip)) {
+ return GF_AI_COMPARE_MATCH;
+ }
+ }
+ }
+ return GF_AI_COMPARE_NO_MATCH;
+}
+
+/* Check for non optimal brick order for Replicate/Disperse :
+ * Checks if bricks belonging to a replicate or disperse
+ * volume are present on the same server
+ */
+int32_t
+glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type,
+ char **volname, char **brick_list,
+ int32_t *brick_count, int32_t sub_count)
+{
+ int ret = -1;
+ int i = 0;
+ int j = 0;
+ int k = 0;
+ xlator_t *this = NULL;
+ addrinfo_list_t *ai_list = NULL;
+ addrinfo_list_t *ai_list_tmp1 = NULL;
+ addrinfo_list_t *ai_list_tmp2 = NULL;
+ char *brick = NULL;
+ char *brick_list_dup = NULL;
+ char *brick_list_ptr = NULL;
+ char *tmpptr = NULL;
+ struct addrinfo *ai_info = NULL;
+ char brick_addr[128] = {
+ 0,
+ };
+ int addrlen = 0;
+
+ const char failed_string[2048] =
+ "Failed to perform brick order "
+ "check. Use 'force' at the end of the command"
+ " if you want to override this behavior. ";
+ const char found_string[2048] =
+ "Multiple bricks of a %s "
+ "volume are present on the same server. This "
+ "setup is not optimal. Bricks should be on "
+ "different nodes to have best fault tolerant "
+ "configuration. Use 'force' at the end of the "
+ "command if you want to override this "
+ "behavior. ";
+
+ this = THIS;
+
+ GF_ASSERT(this);
+
+ ai_list = MALLOC(sizeof(addrinfo_list_t));
+ ai_list->info = NULL;
+ CDS_INIT_LIST_HEAD(&ai_list->list);
+
+ if (!(*volname)) {
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &(*volname));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+ }
+
+ if (!(*brick_list)) {
+ ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &(*brick_list));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Bricks check : Could not "
+ "retrieve bricks list");
+ goto out;
+ }
+ }
+
+ if (!(*brick_count)) {
+ ret = dict_get_int32n(dict, "count", SLEN("count"), &(*brick_count));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Bricks check : Could not "
+ "retrieve brick count");
+ goto out;
+ }
+ }
+
+ brick_list_dup = brick_list_ptr = gf_strdup(*brick_list);
+ /* Resolve hostnames and get addrinfo */
+ while (i < *brick_count) {
+ ++i;
+ brick = strtok_r(brick_list_dup, " \n", &tmpptr);
+ brick_list_dup = tmpptr;
+ if (brick == NULL)
+ goto check_failed;
+ tmpptr = strrchr(brick, ':');
+ if (tmpptr == NULL)
+ goto check_failed;
+ addrlen = strlen(brick) - strlen(tmpptr);
+ strncpy(brick_addr, brick, addrlen);
+ brick_addr[addrlen] = '\0';
+ ret = getaddrinfo(brick_addr, NULL, NULL, &ai_info);
+ if (ret != 0) {
+ ret = 0;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_RESOLVE_FAIL,
+ "unable to resolve host name for addr %s", brick_addr);
+ goto out;
+ }
+ ai_list_tmp1 = MALLOC(sizeof(addrinfo_list_t));
+ if (ai_list_tmp1 == NULL) {
+ ret = 0;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "failed to allocate "
+ "memory");
+ freeaddrinfo(ai_info);
+ goto out;
+ }
+ ai_list_tmp1->info = ai_info;
+ cds_list_add_tail(&ai_list_tmp1->list, &ai_list->list);
+ ai_list_tmp1 = NULL;
+ }
+
+ i = 0;
+ ai_list_tmp1 = cds_list_entry(ai_list->list.next, addrinfo_list_t, list);
+
+ if (*brick_count < sub_count) {
+ sub_count = *brick_count;
+ }
+
+ /* Check for bad brick order */
+ while (i < *brick_count) {
+ ++i;
+ ai_info = ai_list_tmp1->info;
+ ai_list_tmp1 = cds_list_entry(ai_list_tmp1->list.next, addrinfo_list_t,
+ list);
+ if (0 == i % sub_count) {
+ j = 0;
+ continue;
+ }
+ ai_list_tmp2 = ai_list_tmp1;
+ k = j;
+ while (k < sub_count - 1) {
+ ++k;
+ ret = glusterd_compare_addrinfo(ai_info, ai_list_tmp2->info);
+ if (GF_AI_COMPARE_ERROR == ret)
+ goto check_failed;
+ if (GF_AI_COMPARE_MATCH == ret)
+ goto found_bad_brick_order;
+ ai_list_tmp2 = cds_list_entry(ai_list_tmp2->list.next,
+ addrinfo_list_t, list);
+ }
+ ++j;
+ }
+ gf_msg_debug(this->name, 0, "Brick order okay");
+ ret = 0;
+ goto out;
+
+check_failed:
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER_CHECK_FAIL,
+ "Failed bad brick order check");
+ snprintf(err_str, sizeof(failed_string), failed_string);
+ ret = -1;
+ goto out;
+
+found_bad_brick_order:
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_BAD_BRKORDER,
+ "Bad brick order found");
+ if (type == GF_CLUSTER_TYPE_DISPERSE) {
+ snprintf(err_str, sizeof(found_string), found_string, "disperse");
+ } else {
+ snprintf(err_str, sizeof(found_string), found_string, "replicate");
+ }
+
+ ret = -1;
+out:
+ ai_list_tmp2 = NULL;
+ GF_FREE(brick_list_ptr);
+ cds_list_for_each_entry(ai_list_tmp1, &ai_list->list, list)
+ {
+ if (ai_list_tmp1->info)
+ freeaddrinfo(ai_list_tmp1->info);
+ free(ai_list_tmp2);
+ ai_list_tmp2 = ai_list_tmp1;
+ }
+ free(ai_list);
+ free(ai_list_tmp2);
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+static gf_boolean_t
+search_peer_in_auth_list(char *peer_hostname, char *auth_allow_list)
+{
+ if (strstr(auth_allow_list, peer_hostname)) {
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+/* glusterd_add_peers_to_auth_list() adds peers into auth.allow list
+ * if auth.allow list is not empty. This is called for add-brick and
+ * replica brick operations to avoid failing the temporary mount. New
+ * volfiles will be generated and clients are notified reg new volfiles.
+ */
+void
+glusterd_add_peers_to_auth_list(char *volname)
+{
+ int ret = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ int32_t len = 0;
+ char *auth_allow_list = NULL;
+ char *new_auth_allow_list = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ GF_VALIDATE_OR_GOTO(this->name, volname, out);
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ "Unable to find volume: %s", volname);
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(volinfo->dict, "auth.allow", &auth_allow_list);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_FAILED,
+ "auth allow list is not set");
+ goto out;
+ }
+ cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
+ {
+ len += strlen(peerinfo->hostname);
+ }
+ len += strlen(auth_allow_list) + 1;
+
+ new_auth_allow_list = GF_CALLOC(1, len, gf_common_mt_char);
+
+ new_auth_allow_list = strncat(new_auth_allow_list, auth_allow_list,
+ strlen(auth_allow_list));
+ cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
+ {
+ ret = search_peer_in_auth_list(peerinfo->hostname, new_auth_allow_list);
+ if (!ret) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "peer %s not found in auth.allow list", peerinfo->hostname);
+ new_auth_allow_list = strcat(new_auth_allow_list, ",");
+ new_auth_allow_list = strncat(new_auth_allow_list,
+ peerinfo->hostname,
+ strlen(peerinfo->hostname));
+ }
+ }
+ if (strcmp(new_auth_allow_list, auth_allow_list) != 0) {
+ /* In case, new_auth_allow_list is not same as auth_allow_list,
+ * we need to update the volinfo->dict with new_auth_allow_list.
+ * we delete the auth_allow_list and replace it with
+ * new_auth_allow_list. for reverting the changes in post commit, we
+ * keep the copy of auth_allow_list as old_auth_allow_list in
+ * volinfo->dict.
+ */
+ dict_del_sizen(volinfo->dict, "auth.allow");
+ ret = dict_set_strn(volinfo->dict, "auth.allow", SLEN("auth.allow"),
+ new_auth_allow_list);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Unable to set new auth.allow list");
+ goto out;
+ }
+ ret = dict_set_strn(volinfo->dict, "old.auth.allow",
+ SLEN("old.auth.allow"), auth_allow_list);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Unable to set old auth.allow list");
+ goto out;
+ }
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "failed to create volfiles");
+ goto out;
+ }
+ }
+out:
+ GF_FREE(new_auth_allow_list);
+ return;
+}
+
+int
+glusterd_replace_old_auth_allow_list(char *volname)
+{
+ int ret = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ char *old_auth_allow_list = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_VALIDATE_OR_GOTO(this->name, volname, out);
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ "Unable to find volume: %s", volname);
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(volinfo->dict, "old.auth.allow",
+ &old_auth_allow_list);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_FAILED,
+ "old auth allow list is not set, no need to replace the list");
+ ret = 0;
+ goto out;
+ }
+
+ dict_del_sizen(volinfo->dict, "auth.allow");
+ ret = dict_set_strn(volinfo->dict, "auth.allow", SLEN("auth.allow"),
+ old_auth_allow_list);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Unable to replace auth.allow list");
+ goto out;
+ }
+
+ dict_del_sizen(volinfo->dict, "old.auth.allow");
+
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "failed to create volfiles");
+ goto out;
+ }
+ ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_STORE_FAIL,
+ "failed to store volinfo");
+ goto out;
+ }
+out:
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index b58f158fd14..bf6ac295e26 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -857,4 +857,9 @@ search_brick_path_from_proc(pid_t brick_pid, char *brickpath);
int32_t
glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
int32_t count);
+int32_t
+glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type,
+ char **volname, char **bricks, int32_t *brick_count,
+ int32_t sub_count);
+
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index a085a0ff8d6..8d6fb5e0fac 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -90,6 +90,8 @@ xlator_instantiate_va(const char *type, const char *format, va_list arg)
xlator_t *xl = NULL;
char *volname = NULL;
int ret = 0;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
ret = gf_vasprintf(&volname, format, arg);
if (ret < 0) {
@@ -99,14 +101,21 @@ xlator_instantiate_va(const char *type, const char *format, va_list arg)
}
xl = GF_CALLOC(1, sizeof(*xl), gf_common_mt_xlator_t);
- if (!xl)
+ if (!xl) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto error;
+ }
ret = xlator_set_type_virtual(xl, type);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_XLATOR_SET_OPT_FAIL,
+ NULL);
goto error;
+ }
xl->options = dict_new();
- if (!xl->options)
+ if (!xl->options) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto error;
+ }
xl->name = volname;
CDS_INIT_LIST_HEAD(&xl->volume_options);
@@ -115,8 +124,8 @@ xlator_instantiate_va(const char *type, const char *format, va_list arg)
return xl;
error:
- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_XLATOR_CREATE_FAIL,
- "creating xlator of type %s failed", type);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_XLATOR_CREATE_FAIL, "Type=%s",
+ type, NULL);
GF_FREE(volname);
if (xl)
xlator_destroy(xl);
@@ -865,6 +874,8 @@ _xl_link_children(xlator_t *parent, xlator_t *children, size_t child_count)
xlator_t *trav = NULL;
size_t seek = 0;
int ret = -1;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
if (child_count == 0)
goto out;
@@ -873,9 +884,12 @@ _xl_link_children(xlator_t *parent, xlator_t *children, size_t child_count)
;
for (; child_count--; trav = trav->prev) {
ret = volgen_xlator_link(parent, trav);
- gf_msg_debug(THIS->name, 0, "%s:%s", parent->name, trav->name);
- if (ret)
+ gf_msg_debug(this->name, 0, "%s:%s", parent->name, trav->name);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_XLATOR_LINK_FAIL,
+ NULL);
goto out;
+ }
}
ret = 0;
out:
@@ -933,8 +947,10 @@ volgen_apply_filters(char *orig_volfile)
entry = sys_readdir(filterdir, scratch);
- if (!entry || errno != 0)
+ if (!entry || errno != 0) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_READ_ERROR, NULL);
break;
+ }
if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0)
continue;
@@ -1472,14 +1488,22 @@ volgen_graph_set_xl_options(volgen_graph_t *graph, dict_t *dict)
}; /* for posix* -> *posix* */
char *loglevel = NULL;
xlator_t *trav = NULL;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
ret = dict_get_str_sizen(dict, "xlator", &xlator);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=xlator", NULL);
goto out;
+ }
ret = dict_get_str_sizen(dict, "loglevel", &loglevel);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=loglevel", NULL);
goto out;
+ }
snprintf(xlator_match, 1024, "*%s", xlator);
@@ -1579,14 +1603,22 @@ gfproxy_server_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
char *password = NULL;
/*int rclusters = 0;*/
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
/* We are a trusted client */
ret = dict_set_uint32(set_dict, "trusted-client", GF_CLIENT_TRUSTED);
- if (ret != 0)
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=trusted-client", NULL);
goto out;
+ }
ret = dict_set_int32_sizen(set_dict, "gfproxy-server", 1);
- if (ret != 0)
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=gfproxy-server", NULL);
goto out;
+ }
/* Build the client section of the graph first */
build_client_graph(graph, volinfo, set_dict);
@@ -1647,11 +1679,13 @@ brick_graph_add_posix(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
xlator_t *this = NULL;
glusterd_conf_t *priv = NULL;
- if (!graph || !volinfo || !set_dict || !brickinfo)
+ this = THIS;
+
+ if (!graph || !volinfo || !set_dict || !brickinfo) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
- this = THIS;
- GF_VALIDATE_OR_GOTO("glusterd", this, out);
priv = this->private;
GF_VALIDATE_OR_GOTO("glusterd", priv, out);
@@ -1716,9 +1750,13 @@ brick_graph_add_selinux(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
{
xlator_t *xl = NULL;
int ret = -1;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
- if (!graph || !volinfo)
+ if (!graph || !volinfo) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
xl = volgen_graph_add(graph, "features/selinux", volinfo->volname);
if (!xl)
@@ -1785,8 +1823,10 @@ brick_graph_add_bitrot_stub(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
char *value = NULL;
xlator_t *this = THIS;
- if (!graph || !volinfo || !set_dict || !brickinfo)
+ if (!graph || !volinfo || !set_dict || !brickinfo) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
xl = volgen_graph_add(graph, "features/bitrot-stub", volinfo->volname);
if (!xl)
@@ -1821,9 +1861,13 @@ brick_graph_add_changelog(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
};
int ret = -1;
int32_t len = 0;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
- if (!graph || !volinfo || !set_dict || !brickinfo)
+ if (!graph || !volinfo || !set_dict || !brickinfo) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
xl = volgen_graph_add(graph, "features/changelog", volinfo->volname);
if (!xl)
@@ -1836,6 +1880,7 @@ brick_graph_add_changelog(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
len = snprintf(changelog_basepath, sizeof(changelog_basepath), "%s/%s",
brickinfo->path, ".glusterfs/changelogs");
if ((len < 0) || (len >= sizeof(changelog_basepath))) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -1865,14 +1910,31 @@ brick_graph_add_acl(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
{
xlator_t *xl = NULL;
int ret = -1;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
- if (!graph || !volinfo || !set_dict)
+ if (!graph || !volinfo || !set_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
- xl = volgen_graph_add(graph, "features/access-control", volinfo->volname);
- if (!xl)
+ ret = dict_get_str_boolean(set_dict, "features.acl", 1);
+ if (!ret) {
+ /* Skip creating this volume if option is disabled */
+ /* By default, this is 'true' */
goto out;
+ } else if (ret < 0) {
+ /* lets not treat this as error, as this option is not critical,
+ and implemented for debug help */
+ gf_log(THIS->name, GF_LOG_INFO,
+ "failed to get 'features.acl' flag from dict");
+ }
+ xl = volgen_graph_add(graph, "features/access-control", volinfo->volname);
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
ret = 0;
out:
return ret;
@@ -1884,9 +1946,13 @@ brick_graph_add_locks(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
{
xlator_t *xl = NULL;
int ret = -1;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
- if (!graph || !volinfo || !set_dict)
+ if (!graph || !volinfo || !set_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
xl = volgen_graph_add(graph, "features/locks", volinfo->volname);
if (!xl)
@@ -1903,9 +1969,13 @@ brick_graph_add_iot(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
{
xlator_t *xl = NULL;
int ret = -1;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
- if (!graph || !volinfo || !set_dict)
+ if (!graph || !volinfo || !set_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
xl = volgen_graph_add(graph, "performance/io-threads", volinfo->volname);
if (!xl)
@@ -1921,9 +1991,12 @@ brick_graph_add_barrier(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
{
xlator_t *xl = NULL;
int ret = -1;
+ xlator_t *this = THIS;
- if (!graph || !volinfo)
+ if (!graph || !volinfo) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
xl = volgen_graph_add(graph, "features/barrier", volinfo->volname);
if (!xl)
@@ -1940,9 +2013,13 @@ brick_graph_add_sdfs(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
{
xlator_t *xl = NULL;
int ret = -1;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
- if (!graph || !volinfo)
+ if (!graph || !volinfo) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
if (!dict_get_str_boolean(set_dict, "features.sdfs", 0)) {
/* update only if option is enabled */
@@ -1970,9 +2047,13 @@ brick_graph_add_namespace(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
{
xlator_t *xl = NULL;
int ret = -1;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
- if (!graph || !volinfo || !set_dict)
+ if (!graph || !volinfo || !set_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
ret = dict_get_str_boolean(set_dict, "features.tag-namespaces", 0);
if (ret == -1)
@@ -2025,9 +2106,13 @@ brick_graph_add_index(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
char index_basepath[PATH_MAX] = {0};
int ret = -1;
int32_t len = 0;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
- if (!graph || !volinfo || !brickinfo || !set_dict)
+ if (!graph || !volinfo || !brickinfo || !set_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
xl = volgen_graph_add(graph, "features/index", volinfo->volname);
if (!xl)
@@ -2036,6 +2121,7 @@ brick_graph_add_index(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
len = snprintf(index_basepath, sizeof(index_basepath), "%s/%s",
brickinfo->path, ".glusterfs/indices");
if ((len < 0) || (len >= sizeof(index_basepath))) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
}
@@ -2082,9 +2168,13 @@ brick_graph_add_marker(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
char buf[32] = {
0,
};
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
- if (!graph || !volinfo || !set_dict)
+ if (!graph || !volinfo || !set_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
xl = volgen_graph_add(graph, "features/marker", volinfo->volname);
if (!xl)
@@ -2115,9 +2205,13 @@ brick_graph_add_quota(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
int ret = -1;
xlator_t *xl = NULL;
char *value = NULL;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
- if (!graph || !volinfo || !set_dict)
+ if (!graph || !volinfo || !set_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
xl = volgen_graph_add(graph, "features/quota", volinfo->volname);
if (!xl)
@@ -2143,9 +2237,13 @@ brick_graph_add_ro(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
{
int ret = -1;
xlator_t *xl = NULL;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
- if (!graph || !volinfo || !set_dict)
+ if (!graph || !volinfo || !set_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
if (dict_get_str_boolean(set_dict, "features.read-only", 0) &&
(dict_get_str_boolean(set_dict, "features.worm", 0) ||
@@ -2175,9 +2273,13 @@ brick_graph_add_worm(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
{
int ret = -1;
xlator_t *xl = NULL;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
- if (!graph || !volinfo || !set_dict)
+ if (!graph || !volinfo || !set_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
if (dict_get_str_boolean(set_dict, "features.read-only", 0) &&
(dict_get_str_boolean(set_dict, "features.worm", 0) ||
@@ -2204,9 +2306,13 @@ brick_graph_add_cdc(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
{
int ret = -1;
xlator_t *xl = NULL;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
- if (!graph || !volinfo || !set_dict)
+ if (!graph || !volinfo || !set_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
/* Check for compress volume option, and add it to the graph on
* server side */
@@ -2236,8 +2342,10 @@ brick_graph_add_io_stats(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
xlator_t *this = THIS;
glusterd_conf_t *priv = this->private;
- if (!graph || !set_dict || !brickinfo)
+ if (!graph || !set_dict || !brickinfo) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
xl = volgen_graph_add_as(graph, "debug/io-stats", brickinfo->path);
if (!xl)
@@ -2265,9 +2373,13 @@ brick_graph_add_upcall(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
{
xlator_t *xl = NULL;
int ret = -1;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
- if (!graph || !volinfo || !set_dict)
+ if (!graph || !volinfo || !set_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
xl = volgen_graph_add(graph, "features/upcall", volinfo->volname);
if (!xl) {
@@ -2287,9 +2399,13 @@ brick_graph_add_leases(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
{
xlator_t *xl = NULL;
int ret = -1;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
- if (!graph || !volinfo || !set_dict)
+ if (!graph || !volinfo || !set_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
xl = volgen_graph_add(graph, "features/leases", volinfo->volname);
if (!xl) {
@@ -2319,9 +2435,13 @@ brick_graph_add_server(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
char *volname = NULL;
char *address_family_data = NULL;
int32_t len = 0;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
- if (!graph || !volinfo || !set_dict || !brickinfo)
+ if (!graph || !volinfo || !set_dict || !brickinfo) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
get_vol_transport_type(volinfo, transt);
@@ -2430,13 +2550,20 @@ brick_graph_add_pump(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
char *password = NULL;
char *ptranst = NULL;
char *address_family_data = NULL;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
- if (!graph || !volinfo || !set_dict)
+ if (!graph || !volinfo || !set_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
ret = dict_get_int32(volinfo->dict, "enable-pump", &pump);
- if (ret == -ENOENT)
+ if (ret == -ENOENT) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=enable-pump", NULL);
ret = pump = 0;
+ }
if (ret)
return -1;
@@ -3234,11 +3361,20 @@ volgen_link_bricks(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
j);
j++;
}
+
if (!xl) {
ret = -1;
goto out;
}
+ if (strncmp(xl_type, "performance/readdir-ahead",
+ SLEN("performance/readdir-ahead")) == 0) {
+ ret = xlator_set_fixed_option(xl, "performance.readdir-ahead",
+ "on");
+ if (ret)
+ goto out;
+ }
+
ret = volgen_xlator_link(xl, trav);
if (ret)
goto out;
@@ -3466,13 +3602,13 @@ volgen_graph_build_readdir_ahead(volgen_graph_t *graph,
int32_t clusters = 0;
if (graph->type == GF_QUOTAD || graph->type == GF_SNAPD ||
- !glusterd_volinfo_get_boolean(volinfo, VKEY_PARALLEL_READDIR) ||
- !glusterd_volinfo_get_boolean(volinfo, VKEY_READDIR_AHEAD))
+ !glusterd_volinfo_get_boolean(volinfo, VKEY_PARALLEL_READDIR))
goto out;
clusters = volgen_link_bricks_from_list_tail(
graph, volinfo, "performance/readdir-ahead", "%s-readdir-ahead-%d",
child_count, 1);
+
out:
return clusters;
}
@@ -3674,6 +3810,38 @@ out:
}
static int
+set_volfile_id_option(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ int clusters)
+{
+ xlator_t *xlator = NULL;
+ int i = 0;
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ if (conf->op_version < GD_OP_VERSION_9_0)
+ return 0;
+ xlator = first_of(graph);
+
+ for (i = 0; i < clusters; i++) {
+ ret = xlator_set_fixed_option(xlator, "volume-id",
+ uuid_utoa(volinfo->volume_id));
+ if (ret)
+ goto out;
+
+ xlator = xlator->next;
+ }
+
+out:
+ return ret;
+}
+
+static int
volgen_graph_build_afr_clusters(volgen_graph_t *graph,
glusterd_volinfo_t *volinfo)
{
@@ -3715,6 +3883,13 @@ volgen_graph_build_afr_clusters(volgen_graph_t *graph,
clusters = -1;
goto out;
}
+
+ ret = set_volfile_id_option(graph, volinfo, clusters);
+ if (ret) {
+ clusters = -1;
+ goto out;
+ }
+
if (!volinfo->arbiter_count && !volinfo->thin_arbiter_count)
goto out;
@@ -4419,11 +4594,15 @@ nfs_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme,
volinfo = param;
- if (!volinfo || (volinfo->volname[0] == '\0'))
+ if (!volinfo || (volinfo->volname[0] == '\0')) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
return 0;
+ }
- if (!vme || !(vme->option))
+ if (!vme || !(vme->option)) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
return 0;
+ }
xl = first_of(graph);
@@ -4554,8 +4733,11 @@ prepare_shd_volume_options(glusterd_volinfo_t *volinfo, dict_t *mod_dict,
goto out;
ret = dict_set_uint32(set_dict, "trusted-client", GF_CLIENT_TRUSTED);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=trusted-client", NULL);
goto out;
+ }
dict_copy(volinfo->dict, set_dict);
if (mod_dict)
@@ -4742,6 +4924,7 @@ build_shd_graph(glusterd_volinfo_t *volinfo, volgen_graph_t *graph,
set_dict = dict_new();
if (!set_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -ENOMEM;
goto out;
}
@@ -4880,25 +5063,40 @@ build_nfs_graph(volgen_graph_t *graph, dict_t *mod_dict)
ret = dict_set_sizen_str_sizen(set_dict, "performance.stat-prefetch",
"off");
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=performance.stat-prefetch", NULL);
goto out;
+ }
ret = dict_set_sizen_str_sizen(set_dict,
"performance.client-io-threads", "off");
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=performance.client-io-threads", NULL);
goto out;
+ }
ret = dict_set_str_sizen(set_dict, "client-transport-type", nfs_xprt);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=client-transport-type", NULL);
goto out;
+ }
ret = dict_set_uint32(set_dict, "trusted-client", GF_CLIENT_TRUSTED);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=trusted-client", NULL);
goto out;
+ }
ret = dict_set_sizen_str_sizen(set_dict, "nfs-volume-file", "yes");
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=nfs-volume-file", NULL);
goto out;
+ }
if (mod_dict && (data = dict_get_sizen(mod_dict, "volume-name"))) {
volname = data->data;
@@ -5140,8 +5338,11 @@ build_quotad_graph(volgen_graph_t *graph, dict_t *mod_dict)
continue;
ret = dict_set_uint32(set_dict, "trusted-client", GF_CLIENT_TRUSTED);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=trusted-client", NULL);
goto out;
+ }
dict_copy(voliter->dict, set_dict);
if (mod_dict)
@@ -5337,14 +5538,21 @@ glusterd_generate_client_per_brick_volfile(glusterd_volinfo_t *volinfo)
int ret = -1;
char *ssl_str = NULL;
gf_boolean_t ssl_bool = _gf_false;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
ret = dict_set_uint32(dict, "trusted-client", GF_CLIENT_TRUSTED);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=trusted-client", NULL);
goto free_dict;
+ }
if (dict_get_str_sizen(volinfo->dict, "client.ssl", &ssl_str) == 0) {
if (gf_string2boolean(ssl_str, &ssl_bool) == 0) {
@@ -5426,17 +5634,25 @@ generate_dummy_client_volfiles(glusterd_volinfo_t *volinfo)
enumerate_transport_reqs(volinfo->transport_type, types);
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
for (i = 0; types[i]; i++) {
ret = dict_set_str(dict, "client-transport-type", types[i]);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=client-transport-type", NULL);
goto out;
+ }
type = transport_str_to_type(types[i]);
ret = dict_set_uint32(dict, "trusted-client", GF_CLIENT_OTHER);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=trusted-client", NULL);
goto out;
+ }
ret = glusterd_get_dummy_client_filepath(filepath, volinfo, type);
if (ret) {
@@ -5497,17 +5713,25 @@ generate_client_volfiles(glusterd_volinfo_t *volinfo,
enumerate_transport_reqs(volinfo->transport_type, types);
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
for (i = 0; types[i]; i++) {
ret = dict_set_str(dict, "client-transport-type", types[i]);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=client-transport-type", NULL);
goto out;
+ }
type = transport_str_to_type(types[i]);
ret = dict_set_uint32(dict, "trusted-client", client_type);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=trusted-client", NULL);
goto out;
+ }
if (client_type == GF_CLIENT_TRUSTED) {
ret = glusterd_get_trusted_client_filepath(filepath, volinfo, type);
@@ -5657,10 +5881,15 @@ prepare_bitrot_scrub_volume_options(glusterd_volinfo_t *volinfo,
dict_t *mod_dict, dict_t *set_dict)
{
int ret = 0;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
ret = dict_set_uint32(set_dict, "trusted-client", GF_CLIENT_TRUSTED);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=trusted-client", NULL);
goto out;
+ }
dict_copy(volinfo->dict, set_dict);
if (mod_dict)
@@ -5728,6 +5957,7 @@ build_bitd_volume_graph(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
set_dict = dict_new();
if (!set_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -6134,8 +6364,11 @@ validate_shdopts(glusterd_volinfo_t *volinfo, dict_t *val_dict,
goto out;
}
ret = dict_set_int32_sizen(val_dict, "graph-check", 1);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=graph-check", NULL);
goto out;
+ }
ret = build_shd_graph(volinfo, &graph, val_dict);
if (!ret)
ret = graph_reconf_validateopt(&graph.graph, op_errstr);
@@ -6189,6 +6422,8 @@ validate_nfsopts(glusterd_volinfo_t *volinfo, dict_t *val_dict,
"wrong transport "
"type %s",
tt);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_INCOMPATIBLE_VALUE,
+ "Type=%s", tt, NULL);
*op_errstr = gf_strdup(err_str);
ret = -1;
goto out;
@@ -6257,6 +6492,7 @@ validate_brickopts(glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo,
graph.errstr = op_errstr;
full_dict = dict_new();
if (!full_dict) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index 1c5a2508d9c..814ab14fb27 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -37,240 +37,6 @@
#define glusterd_op_start_volume_args_get(dict, volname, flags) \
glusterd_op_stop_volume_args_get(dict, volname, flags)
-gf_ai_compare_t
-glusterd_compare_addrinfo(struct addrinfo *first, struct addrinfo *next)
-{
- int ret = -1;
- struct addrinfo *tmp1 = NULL;
- struct addrinfo *tmp2 = NULL;
- char firstip[NI_MAXHOST] = {0.};
- char nextip[NI_MAXHOST] = {
- 0,
- };
-
- for (tmp1 = first; tmp1 != NULL; tmp1 = tmp1->ai_next) {
- ret = getnameinfo(tmp1->ai_addr, tmp1->ai_addrlen, firstip, NI_MAXHOST,
- NULL, 0, NI_NUMERICHOST);
- if (ret)
- return GF_AI_COMPARE_ERROR;
- for (tmp2 = next; tmp2 != NULL; tmp2 = tmp2->ai_next) {
- ret = getnameinfo(tmp2->ai_addr, tmp2->ai_addrlen, nextip,
- NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
- if (ret)
- return GF_AI_COMPARE_ERROR;
- if (!strcmp(firstip, nextip)) {
- return GF_AI_COMPARE_MATCH;
- }
- }
- }
- return GF_AI_COMPARE_NO_MATCH;
-}
-
-/* Check for non optimal brick order for replicate :
- * Checks if bricks belonging to a replicate volume
- * are present on the same server
- */
-int32_t
-glusterd_check_brick_order(dict_t *dict, char *err_str)
-{
- int ret = -1;
- int i = 0;
- int j = 0;
- int k = 0;
- xlator_t *this = NULL;
- addrinfo_list_t *ai_list = NULL;
- addrinfo_list_t *ai_list_tmp1 = NULL;
- addrinfo_list_t *ai_list_tmp2 = NULL;
- char *brick = NULL;
- char *brick_list = NULL;
- char *brick_list_dup = NULL;
- char *brick_list_ptr = NULL;
- char *tmpptr = NULL;
- char *volname = NULL;
- int32_t brick_count = 0;
- int32_t type = GF_CLUSTER_TYPE_NONE;
- int32_t sub_count = 0;
- struct addrinfo *ai_info = NULL;
- char brick_addr[128] = {
- 0,
- };
- int addrlen = 0;
-
- const char failed_string[2048] =
- "Failed to perform brick order "
- "check. Use 'force' at the end of the command"
- " if you want to override this behavior. ";
- const char found_string[2048] =
- "Multiple bricks of a %s "
- "volume are present on the same server. This "
- "setup is not optimal. Bricks should be on "
- "different nodes to have best fault tolerant "
- "configuration. Use 'force' at the end of the "
- "command if you want to override this "
- "behavior. ";
-
- this = THIS;
-
- GF_ASSERT(this);
-
- ai_list = MALLOC(sizeof(addrinfo_list_t));
- ai_list->info = NULL;
- CDS_INIT_LIST_HEAD(&ai_list->list);
-
- ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Unable to get volume name");
- goto out;
- }
-
- ret = dict_get_int32n(dict, "type", SLEN("type"), &type);
- if (ret) {
- snprintf(err_str, 512, "Unable to get type of volume %s", volname);
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, "%s",
- err_str);
- goto out;
- }
-
- ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &brick_list);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Bricks check : Could not "
- "retrieve bricks list");
- goto out;
- }
-
- ret = dict_get_int32n(dict, "count", SLEN("count"), &brick_count);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Bricks check : Could not "
- "retrieve brick count");
- goto out;
- }
-
- if (type != GF_CLUSTER_TYPE_DISPERSE) {
- ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"),
- &sub_count);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Bricks check : Could"
- " not retrieve replica count");
- goto out;
- }
- gf_msg_debug(this->name, 0,
- "Replicate cluster type "
- "found. Checking brick order.");
- } else {
- ret = dict_get_int32n(dict, "disperse-count", SLEN("disperse-count"),
- &sub_count);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Bricks check : Could"
- " not retrieve disperse count");
- goto out;
- }
- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DISPERSE_CLUSTER_FOUND,
- "Disperse cluster type"
- " found. Checking brick order.");
- }
-
- brick_list_dup = brick_list_ptr = gf_strdup(brick_list);
- /* Resolve hostnames and get addrinfo */
- while (i < brick_count) {
- ++i;
- brick = strtok_r(brick_list_dup, " \n", &tmpptr);
- brick_list_dup = tmpptr;
- if (brick == NULL)
- goto check_failed;
- tmpptr = strrchr(brick, ':');
- if (tmpptr == NULL)
- goto check_failed;
- addrlen = strlen(brick) - strlen(tmpptr);
- strncpy(brick_addr, brick, addrlen);
- brick_addr[addrlen] = '\0';
- ret = getaddrinfo(brick_addr, NULL, NULL, &ai_info);
- if (ret != 0) {
- ret = 0;
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_RESOLVE_FAIL,
- "unable to resolve host name for addr %s", brick_addr);
- goto out;
- }
- ai_list_tmp1 = MALLOC(sizeof(addrinfo_list_t));
- if (ai_list_tmp1 == NULL) {
- ret = 0;
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
- "failed to allocate "
- "memory");
- freeaddrinfo(ai_info);
- goto out;
- }
- ai_list_tmp1->info = ai_info;
- cds_list_add_tail(&ai_list_tmp1->list, &ai_list->list);
- ai_list_tmp1 = NULL;
- }
-
- i = 0;
- ai_list_tmp1 = cds_list_entry(ai_list->list.next, addrinfo_list_t, list);
-
- /* Check for bad brick order */
- while (i < brick_count) {
- ++i;
- ai_info = ai_list_tmp1->info;
- ai_list_tmp1 = cds_list_entry(ai_list_tmp1->list.next, addrinfo_list_t,
- list);
- if (0 == i % sub_count) {
- j = 0;
- continue;
- }
- ai_list_tmp2 = ai_list_tmp1;
- k = j;
- while (k < sub_count - 1) {
- ++k;
- ret = glusterd_compare_addrinfo(ai_info, ai_list_tmp2->info);
- if (GF_AI_COMPARE_ERROR == ret)
- goto check_failed;
- if (GF_AI_COMPARE_MATCH == ret)
- goto found_bad_brick_order;
- ai_list_tmp2 = cds_list_entry(ai_list_tmp2->list.next,
- addrinfo_list_t, list);
- }
- ++j;
- }
- gf_msg_debug(this->name, 0, "Brick order okay");
- ret = 0;
- goto out;
-
-check_failed:
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER_CHECK_FAIL,
- "Failed bad brick order check");
- snprintf(err_str, sizeof(failed_string), failed_string);
- ret = -1;
- goto out;
-
-found_bad_brick_order:
- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_BAD_BRKORDER,
- "Bad brick order found");
- if (type == GF_CLUSTER_TYPE_DISPERSE) {
- snprintf(err_str, sizeof(found_string), found_string, "disperse");
- } else {
- snprintf(err_str, sizeof(found_string), found_string, "replicate");
- }
-
- ret = -1;
-out:
- ai_list_tmp2 = NULL;
- GF_FREE(brick_list_ptr);
- cds_list_for_each_entry(ai_list_tmp1, &ai_list->list, list)
- {
- if (ai_list_tmp1->info)
- freeaddrinfo(ai_list_tmp1->info);
- free(ai_list_tmp2);
- ai_list_tmp2 = ai_list_tmp1;
- }
- free(ai_list_tmp2);
- return ret;
-}
-
int
__glusterd_handle_create_volume(rpcsvc_request_t *req)
{
@@ -809,10 +575,14 @@ glusterd_handle_heal_options_enable_disable(rpcsvc_request_t *req, dict_t *dict,
int ret = 0;
char *key = NULL;
char *value = NULL;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
ret = dict_get_int32n(dict, "heal-op", SLEN("heal-op"),
(int32_t *)&heal_op);
if (ret || (heal_op == GF_SHD_OP_INVALID)) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=heal-op", NULL);
ret = -1;
goto out;
}
@@ -850,21 +620,33 @@ glusterd_handle_heal_options_enable_disable(rpcsvc_request_t *req, dict_t *dict,
} else {
key = "cluster.granular-entry-heal";
ret = dict_set_int8(dict, "is-special-key", 1);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=is-special-key", NULL);
goto out;
+ }
}
ret = dict_set_strn(dict, "key1", SLEN("key1"), key);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=key1", NULL);
goto out;
+ }
ret = dict_set_strn(dict, "value1", SLEN("value1"), value);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=value1", NULL);
goto out;
+ }
ret = dict_set_int32n(dict, "count", SLEN("count"), 1);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=count", NULL);
goto out;
+ }
ret = glusterd_op_begin_synctask(req, GD_OP_SET_VOLUME, dict);
@@ -888,18 +670,19 @@ __glusterd_handle_cli_heal_volume(rpcsvc_request_t *req)
0,
};
+ this = THIS;
+ GF_ASSERT(this);
+
GF_ASSERT(req);
ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
if (ret < 0) {
// failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
- this = THIS;
- GF_ASSERT(this);
-
if (cli_req.dict.dict_len) {
/* Unserialize the dictionary */
dict = dict_new();
@@ -960,8 +743,11 @@ __glusterd_handle_cli_heal_volume(rpcsvc_request_t *req)
goto out;
ret = dict_set_int32n(dict, "count", SLEN("count"), volinfo->brick_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=count", NULL);
goto out;
+ }
ret = glusterd_op_begin_synctask(req, GD_OP_HEAL_VOLUME, dict);
@@ -1013,6 +799,7 @@ __glusterd_handle_cli_statedump_volume(rpcsvc_request_t *req)
ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
if (ret < 0) {
req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
if (cli_req.dict.dict_len) {
@@ -1107,6 +894,8 @@ glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr,
int32_t local_brick_count = 0;
int32_t i = 0;
int32_t type = 0;
+ int32_t replica_count = 0;
+ int32_t disperse_count = 0;
char *brick = NULL;
char *tmpptr = NULL;
xlator_t *this = NULL;
@@ -1201,16 +990,44 @@ glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr,
}
if (!is_force) {
- if ((type == GF_CLUSTER_TYPE_REPLICATE) ||
- (type == GF_CLUSTER_TYPE_DISPERSE)) {
- ret = glusterd_check_brick_order(dict, msg);
+ if (type == GF_CLUSTER_TYPE_REPLICATE) {
+ ret = dict_get_int32n(dict, "replica-count",
+ SLEN("replica-count"), &replica_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Bricks check : Could"
+ " not retrieve replica count");
+ goto out;
+ }
+ gf_msg_debug(this->name, 0,
+ "Replicate cluster type "
+ "found. Checking brick order.");
+ ret = glusterd_check_brick_order(dict, msg, type, &volname,
+ &bricks, &brick_count,
+ replica_count);
+ } else if (type == GF_CLUSTER_TYPE_DISPERSE) {
+ ret = dict_get_int32n(dict, "disperse-count",
+ SLEN("disperse-count"), &disperse_count);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER,
- "Not "
- "creating volume because of "
- "bad brick order");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Bricks check : Could"
+ " not retrieve disperse count");
goto out;
}
+ gf_msg_debug(this->name, 0,
+ "Disperse cluster type"
+ " found. Checking brick order.");
+ ret = glusterd_check_brick_order(dict, msg, type, &volname,
+ &bricks, &brick_count,
+ disperse_count);
+ }
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER,
+ "Not creating the volume because of "
+ "bad brick order. %s",
+ msg);
+ *op_errstr = gf_strdup(msg);
+ goto out;
}
}
}
@@ -1325,20 +1142,32 @@ glusterd_op_stop_volume_args_get(dict_t *dict, char **volname, int *flags)
this = THIS;
GF_ASSERT(this);
- if (!dict || !volname || !flags)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ARGUMENT, NULL);
+ goto out;
+ }
+
+ if (!volname) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
+
+ if (!flags) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ARGUMENT, NULL);
+ goto out;
+ }
ret = dict_get_strn(dict, "volname", SLEN("volname"), volname);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Unable to get volume name");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=volname", NULL);
goto out;
}
ret = dict_get_int32n(dict, "flags", SLEN("flags"), flags);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Unable to get flags");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=flags", NULL);
goto out;
}
out:
@@ -1351,27 +1180,29 @@ glusterd_op_statedump_volume_args_get(dict_t *dict, char **volname,
{
int ret = -1;
- if (!dict || !volname || !options || !option_cnt)
+ if (!dict || !volname || !options || !option_cnt) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
ret = dict_get_strn(dict, "volname", SLEN("volname"), volname);
if (ret) {
- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Unable to get volname");
+ gf_smsg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=volname", NULL);
goto out;
}
ret = dict_get_strn(dict, "options", SLEN("options"), options);
if (ret) {
- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Unable to get options");
+ gf_smsg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=options", NULL);
goto out;
}
ret = dict_get_int32n(dict, "option_cnt", SLEN("option_cnt"), option_cnt);
if (ret) {
- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Unable to get option count");
+ gf_smsg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=option_cnt", NULL);
goto out;
}
@@ -1598,8 +1429,13 @@ glusterd_op_stage_stop_volume(dict_t *dict, char **op_errstr)
GF_ASSERT(this);
ret = glusterd_op_stop_volume_args_get(dict, &volname, &flags);
- if (ret)
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Failed to get details of volume %s",
+ volname);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_STOP_ARGS_GET_FAILED,
+ "Volume name=%s", volname, NULL);
goto out;
+ }
ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
@@ -1881,14 +1717,15 @@ glusterd_op_stage_heal_volume(dict_t *dict, char **op_errstr)
if (!glusterd_is_volume_started(volinfo)) {
ret = -1;
snprintf(msg, sizeof(msg), "Volume %s is not started.", volname);
- gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_STARTED, "%s",
- msg);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_STARTED,
+ "Volume=%s", volname, NULL);
*op_errstr = gf_strdup(msg);
goto out;
}
opt_dict = volinfo->dict;
if (!opt_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, NULL);
ret = 0;
goto out;
}
@@ -1944,6 +1781,8 @@ glusterd_op_stage_statedump_volume(dict_t *dict, char **op_errstr)
ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
snprintf(msg, sizeof(msg), FMTSTR_CHECK_VOL_EXISTS, volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL,
+ "Volume=%s", volname, NULL);
goto out;
}
@@ -2107,8 +1946,6 @@ glusterd_op_create_volume(dict_t *dict, char **op_errstr)
goto out;
}
- pthread_mutex_init(&volinfo->store_volinfo_lock, NULL);
-
ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
if (ret) {
@@ -3060,33 +2897,35 @@ glusterd_op_clearlocks_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
char *mntpt = NULL;
char **xl_opts = NULL;
glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Failed to get volume name");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=volname", NULL);
goto out;
}
gf_msg_debug("glusterd", 0, "Performing clearlocks on volume %s", volname);
ret = dict_get_strn(dict, "path", SLEN("path"), &path);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Failed to get path");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "Key=path",
+ NULL);
goto out;
}
ret = dict_get_strn(dict, "kind", SLEN("kind"), &kind);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Failed to get kind");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "Key=kind",
+ NULL);
goto out;
}
ret = dict_get_strn(dict, "type", SLEN("type"), &type);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Failed to get type");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "Key=type",
+ NULL);
goto out;
}
@@ -3094,10 +2933,9 @@ glusterd_op_clearlocks_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
if (ret)
ret = 0;
- gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_CLRCLK_VOL_REQ_RCVD,
- "Received clear-locks request for "
- "volume %s with kind %s type %s and options %s",
- volname, kind, type, opts);
+ gf_smsg(this->name, GF_LOG_INFO, 0, GD_MSG_CLRCLK_VOL_REQ_RCVD,
+ "Volume=%s, Kind=%s, Type=%s, Options=%s", volname, kind, type,
+ opts, NULL);
if (opts)
ret = gf_asprintf(&cmd_str, GF_XATTR_CLRLK_CMD ".t%s.k%s.%s", type,
@@ -3110,22 +2948,25 @@ glusterd_op_clearlocks_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
snprintf(msg, sizeof(msg), "Volume %s doesn't exist.", volname);
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "%s", msg);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "Volume=%s",
+ volname, NULL);
goto out;
}
xl_opts = GF_CALLOC(volinfo->brick_count + 1, sizeof(char *),
gf_gld_mt_charptr);
- if (!xl_opts)
+ if (!xl_opts) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
+ }
ret = glusterd_clearlocks_get_local_client_ports(volinfo, xl_opts);
if (ret) {
snprintf(msg, sizeof(msg),
"Couldn't get port numbers of "
"local bricks");
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRK_PORT_NUM_GET_FAIL, "%s",
- msg);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_PORT_NUM_GET_FAIL,
+ NULL);
goto out;
}
@@ -3134,8 +2975,8 @@ glusterd_op_clearlocks_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
snprintf(msg, sizeof(msg),
"Creating mount directory "
"for clear-locks failed.");
- gf_msg(THIS->name, GF_LOG_ERROR, 0,
- GD_MSG_CLRLOCKS_MOUNTDIR_CREATE_FAIL, "%s", msg);
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CLRLOCKS_MOUNTDIR_CREATE_FAIL, NULL);
goto out;
}
@@ -3144,16 +2985,15 @@ glusterd_op_clearlocks_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
snprintf(msg, sizeof(msg),
"Failed to mount clear-locks "
"maintenance client.");
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_CLRLOCKS_CLNT_MOUNT_FAIL,
- "%s", msg);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_CLRLOCKS_CLNT_MOUNT_FAIL,
+ NULL);
goto out;
}
ret = glusterd_clearlocks_send_cmd(volinfo, cmd_str, path, result, msg,
sizeof(msg), mntpt);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_CLRCLK_SND_CMD_FAIL, "%s",
- msg);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_CLRCLK_SND_CMD_FAIL, NULL);
goto umount;
}
@@ -3164,16 +3004,16 @@ glusterd_op_clearlocks_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
snprintf(msg, sizeof(msg),
"Failed to set clear-locks "
"result");
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "%s", msg);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Key=lk-summary", NULL);
}
umount:
glusterd_clearlocks_unmount(volinfo, mntpt);
if (glusterd_clearlocks_rmdir_mount(volinfo, mntpt))
- gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_CLRLOCKS_CLNT_UMOUNT_FAIL,
- "Couldn't unmount "
- "clear-locks mount point");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_CLRLOCKS_CLNT_UMOUNT_FAIL,
+ NULL);
out:
if (ret)
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index 04ec9a6e571..398b4d76f52 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -8,9 +8,10 @@ later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
+#include <glusterfs/syscall.h>
#include "glusterd-volgen.h"
#include "glusterd-utils.h"
-#include "sys/stat.h"
+
static int
validate_cache_max_min_size(glusterd_volinfo_t *volinfo, dict_t *dict,
char *key, char *value, char **op_errstr)
@@ -790,7 +791,7 @@ static int
is_directory(const char *path)
{
struct stat statbuf;
- if (stat(path, &statbuf) != 0)
+ if (sys_stat(path, &statbuf) != 0)
return 0;
return S_ISDIR(statbuf.st_mode);
}
@@ -1265,10 +1266,21 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.option = "priority",
.op_version = 1,
.flags = VOLOPT_FLAG_CLIENT_OPT},
- {.key = "performance.cache-size",
+ {.key = "performance.io-cache-size",
.voltype = "performance/io-cache",
- .op_version = 1,
+ .option = "cache-size",
+ .op_version = GD_OP_VERSION_8_0,
.flags = VOLOPT_FLAG_CLIENT_OPT},
+ {
+ .key = "performance.cache-size",
+ .voltype = "performance/io-cache",
+ .op_version = 1,
+ .flags = VOLOPT_FLAG_CLIENT_OPT,
+ .description = "Deprecated option. Use performance.io-cache-size "
+ "to adjust the cache size of the io-cache translator, "
+ "and use performance.quick-read-cache-size to adjust "
+ "the cache size of the quick-read translator.",
+ },
/* IO-threads xlator options */
{.key = "performance.io-thread-count",
@@ -1308,16 +1320,29 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.voltype = "performance/io-cache",
.option = "pass-through",
.op_version = GD_OP_VERSION_4_1_0},
+ {.key = "performance.quick-read-cache-size",
+ .voltype = "performance/quick-read",
+ .option = "cache-size",
+ .op_version = GD_OP_VERSION_8_0,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
{.key = "performance.cache-size",
.voltype = "performance/quick-read",
.type = NO_DOC,
.op_version = 1,
.flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = "performance.quick-read-cache-timeout",
+ .voltype = "performance/quick-read",
+ .option = "cache-timeout",
+ .op_version = GD_OP_VERSION_8_0,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
{.key = "performance.qr-cache-timeout",
.voltype = "performance/quick-read",
.option = "cache-timeout",
.op_version = 1,
- .flags = VOLOPT_FLAG_CLIENT_OPT},
+ .flags = VOLOPT_FLAG_CLIENT_OPT,
+ .description =
+ "Deprecated option. Use performance.quick-read-cache-timeout "
+ "instead."},
{.key = "performance.quick-read-cache-invalidation",
.voltype = "performance/quick-read",
.option = "quick-read-cache-invalidation",
@@ -1788,7 +1813,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
{.key = "performance.readdir-ahead",
.voltype = "performance/readdir-ahead",
.option = "!perf",
- .value = "on",
+ .value = "off",
.op_version = 3,
.description = "enable/disable readdir-ahead translator in the volume.",
.flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT},
@@ -2461,7 +2486,6 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.voltype = "storage/posix",
.op_version = GD_OP_VERSION_4_1_0,
},
- {.key = "storage.bd-aio", .voltype = "storage/bd", .op_version = 3},
{.key = "config.memory-accounting",
.voltype = "mgmt/glusterd",
.option = "!config",
@@ -3103,4 +3127,20 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.voltype = "features/cloudsync",
.op_version = GD_OP_VERSION_7_0,
.flags = VOLOPT_FLAG_CLIENT_OPT},
+ {
+ .key = "features.acl",
+ .voltype = "features/access-control",
+ .value = "enable",
+ .option = "!features",
+ .op_version = GD_OP_VERSION_8_0,
+ .description = "(WARNING: for debug purpose only) enable/disable "
+ "access-control xlator in volume",
+ .type = NO_DOC,
+ },
+
+ {.key = "cluster.use-anonymous-inode",
+ .voltype = "cluster/replicate",
+ .op_version = GD_OP_VERSION_9_0,
+ .value = "yes",
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
{.key = NULL}};
diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
index cd2c5da628b..7a86c2997b1 100644
--- a/xlators/mgmt/glusterd/src/glusterd.c
+++ b/xlators/mgmt/glusterd/src/glusterd.c
@@ -67,7 +67,7 @@ extern struct rpcsvc_program gd_svc_cli_trusted_progs;
extern struct rpc_clnt_program gd_brick_prog;
extern struct rpcsvc_program glusterd_mgmt_hndsk_prog;
-extern char snap_mount_dir[PATH_MAX];
+extern char snap_mount_dir[VALID_GLUSTERD_PATHMAX];
rpcsvc_cbk_program_t glusterd_cbk_prog = {
.progname = "Gluster Callback",
@@ -202,8 +202,10 @@ glusterd_options_init(xlator_t *this)
priv = this->private;
priv->opts = dict_new();
- if (!priv->opts)
+ if (!priv->opts) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
ret = glusterd_store_retrieve_options(this);
if (ret == 0) {
@@ -247,6 +249,7 @@ glusterd_client_statedump_submit_req(char *volname, char *target_ip, char *pid)
GF_ASSERT(conf);
if (target_ip == NULL || pid == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
ret = -1;
goto out;
}
@@ -447,14 +450,19 @@ glusterd_rpcsvc_options_build(dict_t *options)
{
int ret = 0;
uint32_t backlog = 0;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
ret = dict_get_uint32(options, "transport.listen-backlog", &backlog);
if (ret) {
backlog = GLUSTERFS_SOCKET_LISTEN_BACKLOG;
ret = dict_set_uint32(options, "transport.listen-backlog", backlog);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=transport.listen-backlog", NULL);
goto out;
+ }
}
gf_msg_debug("glusterd", 0, "listen-backlog value: %d", backlog);
@@ -574,6 +582,7 @@ glusterd_crt_georep_folders(char *georepdir, glusterd_conf_t *conf)
len = snprintf(georepdir, PATH_MAX, "%s/" GEOREP, conf->workdir);
if ((len < 0) || (len >= PATH_MAX)) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -585,9 +594,11 @@ glusterd_crt_georep_folders(char *georepdir, glusterd_conf_t *conf)
}
ret = dict_get_str(THIS->options, GEOREP "-log-group", &greplg_s);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=log-group", NULL);
ret = 0;
- else {
+ } else {
gr = getgrnam(greplg_s);
if (!gr) {
gf_msg("glusterd", GF_LOG_CRITICAL, 0, GD_MSG_LOGGROUP_INVALID,
@@ -628,6 +639,7 @@ glusterd_crt_georep_folders(char *georepdir, glusterd_conf_t *conf)
}
len = snprintf(logdir, PATH_MAX, "%s/" GEOREP "-slaves", conf->logdir);
if ((len < 0) || (len >= PATH_MAX)) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -654,6 +666,7 @@ glusterd_crt_georep_folders(char *georepdir, glusterd_conf_t *conf)
len = snprintf(logdir, PATH_MAX, "%s/" GEOREP "-slaves/mbr", conf->logdir);
if ((len < 0) || (len >= PATH_MAX)) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -1045,6 +1058,8 @@ _install_mount_spec(dict_t *opts, char *key, data_t *value, void *data)
int rv = 0;
gf_mount_spec_t *mspec = NULL;
char *user = NULL;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
label = strtail(key, "mountbroker.");
@@ -1059,8 +1074,10 @@ _install_mount_spec(dict_t *opts, char *key, data_t *value, void *data)
return 0;
mspec = GF_CALLOC(1, sizeof(*mspec), gf_gld_mt_mount_spec);
- if (!mspec)
+ if (!mspec) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto err;
+ }
mspec->label = label;
if (georep) {
@@ -1116,8 +1133,10 @@ glusterd_init_uds_listener(xlator_t *this)
GF_ASSERT(this);
options = dict_new();
- if (!options)
+ if (!options) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
sock_data = dict_get(this->options, "glusterd-sockfile");
(void)snprintf(sockfile, sizeof(sockfile), "%s",
@@ -1404,7 +1423,7 @@ init(xlator_t *this)
char *mountbroker_root = NULL;
int i = 0;
int total_transport = 0;
- gf_boolean_t valgrind = _gf_false;
+ gf_valgrind_tool vgtool;
char *valgrind_str = NULL;
char *transport_type = NULL;
char var_run_dir[PATH_MAX] = {
@@ -1417,6 +1436,14 @@ init(xlator_t *this)
int32_t len = 0;
int op_version = 0;
+#if defined(RUN_WITH_MEMCHECK)
+ vgtool = _gf_memcheck;
+#elif defined(RUN_WITH_DRD)
+ vgtool = _gf_drd;
+#else
+ vgtool = _gf_none;
+#endif
+
#ifndef GF_DARWIN_HOST_OS
{
struct rlimit lim;
@@ -1424,9 +1451,8 @@ init(xlator_t *this)
lim.rlim_max = 65536;
if (setrlimit(RLIMIT_NOFILE, &lim) == -1) {
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_SETXATTR_FAIL,
- "Failed to set 'ulimit -n "
- " 65536'");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SET_XATTR_FAIL,
+ "Failed to set 'ulimit -n 65536'", NULL);
} else {
gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_FILE_DESC_LIMIT_SET,
"Maximum allowed open file descriptors "
@@ -1872,6 +1898,9 @@ init(xlator_t *this)
(void)strncpy(conf->logdir, logdir, sizeof(conf->logdir));
synclock_init(&conf->big_lock, SYNC_LOCK_RECURSIVE);
+ synccond_init(&conf->cond_restart_bricks);
+ synccond_init(&conf->cond_restart_shd);
+ synccond_init(&conf->cond_blockers);
pthread_mutex_init(&conf->xprt_lock, NULL);
INIT_LIST_HEAD(&conf->xprt_list);
pthread_mutex_init(&conf->import_volumes, NULL);
@@ -1904,18 +1933,24 @@ init(xlator_t *this)
}
/* Set option to run bricks on valgrind if enabled in glusterd.vol */
- this->ctx->cmd_args.valgrind = valgrind;
+ this->ctx->cmd_args.vgtool = vgtool;
ret = dict_get_str(this->options, "run-with-valgrind", &valgrind_str);
if (ret < 0) {
gf_msg_debug(this->name, 0, "cannot get run-with-valgrind value");
}
if (valgrind_str) {
- if (gf_string2boolean(valgrind_str, &valgrind)) {
+ gf_boolean_t vg = _gf_false;
+
+ if (!strcmp(valgrind_str, "memcheck"))
+ this->ctx->cmd_args.vgtool = _gf_memcheck;
+ else if (!strcmp(valgrind_str, "drd"))
+ this->ctx->cmd_args.vgtool = _gf_drd;
+ else if (!gf_string2boolean(valgrind_str, &vg))
+ this->ctx->cmd_args.vgtool = (vg ? _gf_memcheck : _gf_none);
+ else
gf_msg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ENTRY,
- "run-with-valgrind value not a boolean string");
- } else {
- this->ctx->cmd_args.valgrind = valgrind;
- }
+ "run-with-valgrind is neither boolean"
+ " nor one of 'memcheck' or 'drd'");
}
/* Store ping-timeout in conf */
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index d7e4da8425a..cc4f98ecf47 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -209,6 +209,9 @@ typedef struct {
gf_boolean_t restart_done;
dict_t *opts;
synclock_t big_lock;
+ synccond_t cond_restart_bricks;
+ synccond_t cond_restart_shd;
+ synccond_t cond_blockers;
rpcsvc_t *uds_rpc; /* RPCSVC for the unix domain socket */
uint32_t base_port;
uint32_t max_port;
@@ -510,6 +513,10 @@ struct glusterd_volinfo_ {
* volfile generation code, we are
* temporarily appending either "-hot"
* or "-cold" */
+ gf_atomic_t volpeerupdate;
+ /* Flag to check about volume has received updates
+ from peer
+ */
};
typedef enum gd_snap_status_ {
@@ -1190,6 +1197,8 @@ glusterd_op_set_ganesha(dict_t *dict, char **errstr);
int
ganesha_manage_export(dict_t *dict, char *value,
gf_boolean_t update_cache_invalidation, char **op_errstr);
+int
+gd_ganesha_send_dbus(char *volname, char *value);
gf_boolean_t
glusterd_is_ganesha_cluster();
gf_boolean_t
@@ -1211,6 +1220,9 @@ int
glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr);
int
+glusterd_set_rebalance_id_for_remove_brick(dict_t *req_dict, dict_t *rsp_dict);
+
+int
glusterd_set_rebalance_id_in_rsp_dict(dict_t *req_dict, dict_t *rsp_dict);
int
@@ -1354,4 +1366,10 @@ glusterd_options_init(xlator_t *this);
int32_t
glusterd_recreate_volfiles(glusterd_conf_t *conf);
+void
+glusterd_add_peers_to_auth_list(char *volname);
+
+int
+glusterd_replace_old_auth_allow_list(char *volname);
+
#endif
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 7949241ab0f..0e22fe411ee 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -8,6 +8,8 @@
cases as published by the Free Software Foundation.
*/
+#include <config.h>
+
#include <sys/wait.h>
#include "fuse-bridge.h"
#include <glusterfs/glusterfs.h>
@@ -177,7 +179,7 @@ fusedump_gettime(struct fusedump_timespec *fts)
0,
};
- clock_gettime(CLOCK_REALTIME, &ts);
+ timespec_now_realtime(&ts);
fts->sec = ts.tv_sec;
fts->nsec = ts.tv_nsec;
@@ -225,14 +227,30 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count,
if (res == -1) {
const char *errdesc = NULL;
gf_loglevel_t loglevel = GF_LOG_ERROR;
+ gf_boolean_t errno_degraded = _gf_false;
+ gf_boolean_t errno_promoted = _gf_false;
+
+#define ACCOUNT_ERRNO(eno) \
+ do { \
+ if (errno_degraded) { \
+ pthread_mutex_lock(&priv->fusedev_errno_cnt_mutex); \
+ { \
+ if (!++priv->fusedev_errno_cnt[FUSEDEV_##eno]) \
+ errno_promoted = _gf_true; \
+ } \
+ pthread_mutex_unlock(&priv->fusedev_errno_cnt_mutex); \
+ } \
+ } while (0)
/* If caller masked the errno, then it
* does not indicate an error at the application
* level, so we degrade the log severity to DEBUG.
*/
if (errnomask && errno < ERRNOMASK_MAX &&
- GET_ERRNO_MASK(errnomask, errno))
+ GET_ERRNO_MASK(errnomask, errno)) {
loglevel = GF_LOG_DEBUG;
+ errno_degraded = _gf_true;
+ }
switch (errno) {
/* The listed errnos are FUSE status indicators,
@@ -242,33 +260,43 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count,
*/
case ENOENT:
errdesc = "ENOENT";
+ ACCOUNT_ERRNO(ENOENT);
break;
case ENOTDIR:
errdesc = "ENOTDIR";
+ ACCOUNT_ERRNO(ENOTDIR);
break;
case ENODEV:
errdesc = "ENODEV";
+ ACCOUNT_ERRNO(ENODEV);
break;
case EPERM:
errdesc = "EPERM";
+ ACCOUNT_ERRNO(EPERM);
break;
case ENOMEM:
errdesc = "ENOMEM";
+ ACCOUNT_ERRNO(ENOMEM);
break;
case ENOTCONN:
errdesc = "ENOTCONN";
+ ACCOUNT_ERRNO(ENOTCONN);
break;
case ECONNREFUSED:
errdesc = "ECONNREFUSED";
+ ACCOUNT_ERRNO(ECONNREFUSED);
break;
case EOVERFLOW:
errdesc = "EOVERFLOW";
+ ACCOUNT_ERRNO(EOVERFLOW);
break;
case EBUSY:
errdesc = "EBUSY";
+ ACCOUNT_ERRNO(EBUSY);
break;
case ENOTEMPTY:
errdesc = "ENOTEMPTY";
+ ACCOUNT_ERRNO(ENOTEMPTY);
break;
default:
errdesc = strerror(errno);
@@ -276,7 +304,13 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count,
gf_log_callingfn("glusterfs-fuse", loglevel,
"writing to fuse device failed: %s", errdesc);
+ if (errno_promoted)
+ gf_log("glusterfs-fuse", GF_LOG_WARNING,
+ "writing to fuse device yielded %s %d times", errdesc,
+ UINT8_MAX + 1);
return errno;
+
+#undef ACCOUNT_ERRNO
}
fouh = iov_out[0].iov_base;
@@ -373,7 +407,7 @@ send_fuse_data(xlator_t *this, fuse_in_header_t *finh, void *data, size_t size)
static int32_t
fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
{
-#if FUSE_KERNEL_MINOR_VERSION >= 11
+#if (FUSE_KERNEL_MINOR_VERSION >= 11 && defined(HAVE_FUSE_NOTIFICATIONS))
struct fuse_out_header *fouh = NULL;
struct fuse_notify_inval_entry_out *fnieo = NULL;
fuse_private_t *priv = NULL;
@@ -464,7 +498,7 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
static int32_t
fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
{
-#if FUSE_KERNEL_MINOR_VERSION >= 11
+#if (FUSE_KERNEL_MINOR_VERSION >= 11 && defined(HAVE_FUSE_NOTIFICATIONS))
struct fuse_out_header *fouh = NULL;
struct fuse_notify_inval_inode_out *fniio = NULL;
fuse_private_t *priv = NULL;
@@ -799,18 +833,22 @@ fuse_interrupt_finish_fop(call_frame_t *frame, xlator_t *this,
{
intstat_orig = fir->interrupt_state;
if (fir->interrupt_state == INTERRUPT_NONE) {
- fir->interrupt_state = INTERRUPT_SQUELCHED;
if (sync) {
- while (fir->interrupt_state == INTERRUPT_NONE) {
+ fir->interrupt_state = INTERRUPT_WAITING_HANDLER;
+ while (fir->interrupt_state != INTERRUPT_SQUELCHED) {
pthread_cond_wait(&fir->handler_cond,
&fir->handler_mutex);
}
- }
+ } else
+ fir->interrupt_state = INTERRUPT_SQUELCHED;
}
}
pthread_mutex_unlock(&fir->handler_mutex);
}
+ GF_ASSERT(intstat_orig == INTERRUPT_NONE ||
+ intstat_orig == INTERRUPT_HANDLED ||
+ intstat_orig == INTERRUPT_SQUELCHED);
gf_log("glusterfs-fuse", GF_LOG_DEBUG, "intstat_orig=%d", intstat_orig);
/*
@@ -860,19 +898,29 @@ fuse_interrupt_finish_interrupt(xlator_t *this, fuse_interrupt_record_t *fir,
};
fuse_interrupt_state_t intstat_orig = INTERRUPT_NONE;
+ GF_ASSERT(intstat == INTERRUPT_HANDLED || intstat == INTERRUPT_SQUELCHED);
+
pthread_mutex_lock(&fir->handler_mutex);
{
intstat_orig = fir->interrupt_state;
- if (fir->interrupt_state == INTERRUPT_NONE) {
- fir->interrupt_state = intstat;
- if (sync) {
+ switch (intstat_orig) {
+ case INTERRUPT_NONE:
+ fir->interrupt_state = intstat;
+ break;
+ case INTERRUPT_WAITING_HANDLER:
+ fir->interrupt_state = INTERRUPT_SQUELCHED;
pthread_cond_signal(&fir->handler_cond);
- }
+ break;
+ default:
+ break;
}
finh = fir->fuse_in_header;
}
pthread_mutex_unlock(&fir->handler_mutex);
+ GF_ASSERT(intstat_orig == INTERRUPT_NONE ||
+ (sync && intstat_orig == INTERRUPT_WAITING_HANDLER) ||
+ (!sync && intstat_orig == INTERRUPT_SQUELCHED));
gf_log("glusterfs-fuse", GF_LOG_DEBUG, "intstat_orig=%d", intstat_orig);
/*
@@ -2185,7 +2233,6 @@ fuse_mknod(xlator_t *this, fuse_in_header_t *finh, void *msg,
fuse_state_t *state = NULL;
#if FUSE_KERNEL_MINOR_VERSION >= 12
fuse_private_t *priv = NULL;
- int32_t ret = -1;
priv = this->private;
if (priv->proto_minor < 12)
@@ -2257,7 +2304,6 @@ fuse_mkdir(xlator_t *this, fuse_in_header_t *finh, void *msg,
char *name = (char *)(fmi + 1);
#if FUSE_KERNEL_MINOR_VERSION >= 12
fuse_private_t *priv = NULL;
- int32_t ret = -1;
#endif
fuse_state_t *state;
@@ -2783,7 +2829,6 @@ fuse_create(xlator_t *this, fuse_in_header_t *finh, void *msg,
#if FUSE_KERNEL_MINOR_VERSION >= 12
struct fuse_create_in *fci = msg;
fuse_private_t *priv = NULL;
- int32_t ret = -1;
#else
struct fuse_open_in *fci = msg;
#endif
@@ -3381,6 +3426,8 @@ fuse_release(xlator_t *this, fuse_in_header_t *finh, void *msg,
gf_log("glusterfs-fuse", GF_LOG_TRACE,
"finh->unique: %" PRIu64 ": RELEASE %p", finh->unique, state->fd);
+ fd_close(state->fd);
+
fuse_fd_ctx_destroy(this, state->fd);
fd_unref(fd);
@@ -4690,12 +4737,10 @@ fuse_setlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
fuse_state_t *state = NULL;
int ret = 0;
- ret = fuse_interrupt_finish_fop(frame, this, _gf_false, (void **)&state);
- if (state) {
- GF_FREE(state->name);
- dict_unref(state->xdata);
- GF_FREE(state);
- }
+ ret = fuse_interrupt_finish_fop(frame, this, _gf_true, (void **)&state);
+ GF_FREE(state->name);
+ dict_unref(state->xdata);
+ GF_FREE(state);
if (ret) {
return 0;
}
@@ -4751,28 +4796,12 @@ fuse_setlk_interrupt_handler_cbk(call_frame_t *frame, void *cookie,
int32_t op_errno, dict_t *dict, dict_t *xdata)
{
fuse_interrupt_state_t intstat = INTERRUPT_NONE;
- fuse_interrupt_record_t *fir;
- fuse_state_t *state = NULL;
- int ret = 0;
-
- ret = dict_get_bin(xdata, "fuse-interrupt-record", (void **)&fir);
- if (ret < 0) {
- gf_log("glusterfs-fuse", GF_LOG_ERROR, "interrupt record not found");
-
- goto out;
- }
+ fuse_interrupt_record_t *fir = cookie;
intstat = op_ret >= 0 ? INTERRUPT_HANDLED : INTERRUPT_SQUELCHED;
- fuse_interrupt_finish_interrupt(this, fir, intstat, _gf_false,
- (void **)&state);
- if (state) {
- GF_FREE(state->name);
- dict_unref(state->xdata);
- GF_FREE(state);
- }
+ fuse_interrupt_finish_interrupt(this, fir, intstat, _gf_true, NULL);
-out:
STACK_DESTROY(frame->root);
return 0;
@@ -4810,9 +4839,10 @@ fuse_setlk_interrupt_handler(xlator_t *this, fuse_interrupt_record_t *fir)
frame->op = GF_FOP_GETXATTR;
state->name = xattr_name;
- STACK_WIND(frame, fuse_setlk_interrupt_handler_cbk, state->active_subvol,
- state->active_subvol->fops->fgetxattr, state->fd, xattr_name,
- state->xdata);
+ STACK_WIND_COOKIE(frame, fuse_setlk_interrupt_handler_cbk, fir,
+ state->active_subvol,
+ state->active_subvol->fops->fgetxattr, state->fd,
+ xattr_name, state->xdata);
return;
@@ -4835,15 +4865,9 @@ fuse_setlk_resume(fuse_state_t *state)
fir = fuse_interrupt_record_new(state->finh, fuse_setlk_interrupt_handler);
state_clone = gf_memdup(state, sizeof(*state));
if (state_clone) {
- /*
- * Calling this allocator with fir casted to (char *) seems like
- * an abuse of this API, but in fact the API is stupid to assume
- * a (char *) argument (in the funcion it's casted to (void *)
- * anyway).
- */
- state_clone->xdata = dict_for_key_value(
- "fuse-interrupt-record", (char *)fir, sizeof(*fir), _gf_true);
+ state_clone->xdata = dict_new();
}
+
if (!fir || !state_clone || !state_clone->xdata) {
if (fir) {
GF_FREE(fir);
@@ -4901,7 +4925,7 @@ fuse_setlk(xlator_t *this, fuse_in_header_t *finh, void *msg,
return;
}
-#if FUSE_KERNEL_MINOR_VERSION >= 11
+#if FUSE_KERNEL_MINOR_VERSION >= 11 && defined(HAVE_FUSE_NOTIFICATIONS)
static void *
notify_kernel_loop(void *data)
{
@@ -5148,6 +5172,7 @@ fuse_init(xlator_t *this, fuse_in_header_t *finh, void *msg,
priv->timed_response_fuse_thread_started = _gf_true;
/* Used for 'reverse invalidation of inode' */
+#ifdef HAVE_FUSE_NOTIFICATIONS
if (fini->minor >= 12) {
ret = gf_thread_create(&messenger, NULL, notify_kernel_loop, this,
"fusenoti");
@@ -5159,7 +5184,9 @@ fuse_init(xlator_t *this, fuse_in_header_t *finh, void *msg,
goto out;
}
priv->reverse_fuse_thread_started = _gf_true;
- } else {
+ } else
+#endif
+ {
/*
* FUSE minor < 12 does not implement invalidate notifications.
* This mechanism is required for fopen-keep-cache to operate
@@ -5586,6 +5613,7 @@ fuse_migrate_fd(xlator_t *this, fd_t *basefd, xlator_t *old_subvol,
char create_in_progress = 0;
fuse_fd_ctx_t *basefd_ctx = NULL;
fd_t *oldfd = NULL;
+ dict_t *xdata = NULL;
basefd_ctx = fuse_fd_ctx_get(this, basefd);
GF_VALIDATE_OR_GOTO("glusterfs-fuse", basefd_ctx, out);
@@ -5622,10 +5650,23 @@ fuse_migrate_fd(xlator_t *this, fd_t *basefd, xlator_t *old_subvol,
}
if (oldfd->inode->table->xl == old_subvol) {
- if (IA_ISDIR(oldfd->inode->ia_type))
+ if (IA_ISDIR(oldfd->inode->ia_type)) {
ret = syncop_fsyncdir(old_subvol, oldfd, 0, NULL, NULL);
- else
- ret = syncop_fsync(old_subvol, oldfd, 0, NULL, NULL, NULL, NULL);
+ } else {
+ xdata = dict_new();
+ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) {
+ gf_log("glusterfs-fuse", GF_LOG_WARNING,
+ "last-fsync set failed (%s) on fd (%p)"
+ "(basefd:%p basefd-inode.gfid:%s) "
+ "(old-subvolume:%s-%d new-subvolume:%s-%d)",
+ strerror(ENOMEM), oldfd, basefd,
+ uuid_utoa(basefd->inode->gfid), old_subvol->name,
+ old_subvol->graph->id, new_subvol->name,
+ new_subvol->graph->id);
+ }
+
+ ret = syncop_fsync(old_subvol, oldfd, 0, NULL, NULL, xdata, NULL);
+ }
if (ret < 0) {
gf_log("glusterfs-fuse", GF_LOG_WARNING,
@@ -5680,6 +5721,9 @@ out:
fd_unref(oldfd);
+ if (xdata)
+ dict_unref(xdata);
+
return ret;
}
@@ -5855,7 +5899,9 @@ fuse_graph_sync(xlator_t *this)
new_graph_id = priv->next_graph->id;
priv->next_graph = NULL;
need_first_lookup = 1;
- priv->handle_graph_switch = _gf_true;
+ if (old_subvol) {
+ priv->handle_graph_switch = _gf_true;
+ }
while (!priv->event_recvd) {
ret = pthread_cond_wait(&priv->sync_cond, &priv->sync_mutex);
@@ -5891,13 +5937,6 @@ unlock:
if (winds_on_old_subvol == 0) {
xlator_notify(old_subvol, GF_EVENT_PARENT_DOWN, old_subvol, NULL);
}
- } else {
- pthread_mutex_lock(&priv->sync_mutex);
- {
- priv->handle_graph_switch = _gf_false;
- pthread_cond_broadcast(&priv->migrate_cond);
- }
- pthread_mutex_unlock(&priv->sync_mutex);
}
return 0;
@@ -6284,14 +6323,12 @@ out:
int
dump_history_fuse(circular_buffer_t *cb, void *data)
{
- char timestr[256] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
- gf_time_fmt(timestr, sizeof timestr, cb->tv.tv_sec, gf_timefmt_F_HMS);
+ gf_time_fmt_tv(timestr, sizeof timestr, &cb->tv, gf_timefmt_F_HMS);
- snprintf(timestr + strlen(timestr), 256 - strlen(timestr),
- ".%" GF_PRI_SUSECONDS, cb->tv.tv_usec);
gf_proc_dump_write("TIME", "%s", timestr);
gf_proc_dump_write("message", "%s\n", (char *)cb->data);
@@ -6671,6 +6708,8 @@ init(xlator_t *this_xl)
INIT_LIST_HEAD(&priv->interrupt_list);
pthread_mutex_init(&priv->interrupt_mutex, NULL);
+ pthread_mutex_init(&priv->fusedev_errno_cnt_mutex, NULL);
+
/* get options from option dictionary */
ret = dict_get_str(options, ZR_MOUNTPOINT_OPT, &value_string);
if (ret == -1 || value_string == NULL) {
diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
index f4c6ad8f2fb..4cb94c23cad 100644
--- a/xlators/mount/fuse/src/fuse-bridge.h
+++ b/xlators/mount/fuse/src/fuse-bridge.h
@@ -78,6 +78,20 @@ typedef struct fuse_in_header fuse_in_header_t;
typedef void(fuse_handler_t)(xlator_t *this, fuse_in_header_t *finh, void *msg,
struct iobuf *iobuf);
+enum fusedev_errno {
+ FUSEDEV_ENOENT,
+ FUSEDEV_ENOTDIR,
+ FUSEDEV_ENODEV,
+ FUSEDEV_EPERM,
+ FUSEDEV_ENOMEM,
+ FUSEDEV_ENOTCONN,
+ FUSEDEV_ECONNREFUSED,
+ FUSEDEV_EOVERFLOW,
+ FUSEDEV_EBUSY,
+ FUSEDEV_ENOTEMPTY,
+ FUSEDEV_EMAXPLUS
+};
+
struct fuse_private {
int fd;
uint32_t proto_minor;
@@ -193,6 +207,10 @@ struct fuse_private {
uint32_t lru_limit;
uint32_t invalidate_limit;
uint32_t fuse_dev_eperm_ratelimit_ns;
+
+ /* counters for fusdev errnos */
+ uint8_t fusedev_errno_cnt[FUSEDEV_EMAXPLUS];
+ pthread_mutex_t fusedev_errno_cnt_mutex;
};
typedef struct fuse_private fuse_private_t;
@@ -226,6 +244,7 @@ enum fuse_interrupt_state {
INTERRUPT_NONE,
INTERRUPT_SQUELCHED,
INTERRUPT_HANDLED,
+ INTERRUPT_WAITING_HANDLER,
};
typedef enum fuse_interrupt_state fuse_interrupt_state_t;
struct fuse_interrupt_record;
@@ -341,30 +360,6 @@ typedef struct fuse_graph_switch_args fuse_graph_switch_args_t;
return; \
} \
state->umask = fci->umask; \
- \
- /* TODO: remove this after 3.4.0 release. keeping it for the \
- sake of backward compatibility with old (3.3.[01]) \
- releases till then. */ \
- ret = dict_set_int16(state->xdata, "umask", fci->umask); \
- if (ret < 0) { \
- gf_log("glusterfs-fuse", GF_LOG_WARNING, \
- "%s Failed adding umask" \
- " to request", \
- op); \
- send_fuse_err(this, finh, ENOMEM); \
- free_fuse_state(state); \
- return; \
- } \
- ret = dict_set_int16(state->xdata, "mode", fci->mode); \
- if (ret < 0) { \
- gf_log("glusterfs-fuse", GF_LOG_WARNING, \
- "%s Failed adding mode " \
- "to request", \
- op); \
- send_fuse_err(this, finh, ENOMEM); \
- free_fuse_state(state); \
- return; \
- } \
} \
} while (0)
diff --git a/xlators/mount/fuse/src/fuse-helpers.c b/xlators/mount/fuse/src/fuse-helpers.c
index fd11f2ba652..a2b0ad11fe4 100644
--- a/xlators/mount/fuse/src/fuse-helpers.c
+++ b/xlators/mount/fuse/src/fuse-helpers.c
@@ -139,8 +139,6 @@ get_fuse_state(xlator_t *this, fuse_in_header_t *finh)
return state;
}
-#define FUSE_MAX_AUX_GROUPS \
- 32 /* We can get only up to 32 aux groups from /proc */
void
frame_fill_groups(call_frame_t *frame)
{
@@ -150,8 +148,6 @@ frame_fill_groups(call_frame_t *frame)
char filename[32];
char line[4096];
char *ptr = NULL;
- FILE *fp = NULL;
- int idx = 0;
long int id = 0;
char *saveptr = NULL;
char *endptr = NULL;
@@ -191,45 +187,72 @@ frame_fill_groups(call_frame_t *frame)
call_stack_set_groups(frame->root, ngroups, &mygroups);
} else {
+ FILE *fp = NULL;
+
ret = snprintf(filename, sizeof filename, "/proc/%d/status",
frame->root->pid);
- if (ret >= sizeof filename)
+ if (ret >= sizeof filename) {
+ gf_log(this->name, GF_LOG_ERROR, "procfs path exceeds buffer size");
goto out;
+ }
fp = fopen(filename, "r");
- if (!fp)
+ if (!fp) {
+ gf_log(this->name, GF_LOG_ERROR, "failed to open %s: %s", filename,
+ strerror(errno));
goto out;
+ }
- if (call_stack_alloc_groups(frame->root, ngroups) != 0)
- goto out;
+ for (;;) {
+ gf_boolean_t found_groups = _gf_false;
+ int idx = 0;
- while ((ptr = fgets(line, sizeof line, fp))) {
- if (strncmp(ptr, "Groups:", 7) != 0)
- continue;
+ if (call_stack_alloc_groups(frame->root, ngroups) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to allocate gid buffer");
+ goto out;
+ }
+ while ((ptr = fgets(line, sizeof line, fp))) {
+ if (strncmp(ptr, "Groups:", 7) == 0) {
+ found_groups = _gf_true;
+ break;
+ }
+ }
+ if (!found_groups) {
+ gf_log(this->name, GF_LOG_ERROR, "cannot find gid list in %s",
+ filename);
+ break;
+ }
ptr = line + 8;
for (ptr = strtok_r(ptr, " \t\r\n", &saveptr); ptr;
ptr = strtok_r(NULL, " \t\r\n", &saveptr)) {
errno = 0;
id = strtol(ptr, &endptr, 0);
- if (errno == ERANGE)
- break;
- if (!endptr || *endptr)
+ if (errno == ERANGE || !endptr || *endptr) {
+ gf_log(this->name, GF_LOG_ERROR, "failed to parse %s",
+ filename);
break;
- frame->root->groups[idx++] = id;
- if (idx == FUSE_MAX_AUX_GROUPS)
+ }
+ if (idx < call_stack_groups_capacity(frame->root))
+ frame->root->groups[idx] = id;
+ idx++;
+ if (idx == GF_MAX_AUX_GROUPS)
break;
}
-
- frame->root->ngrps = idx;
- break;
+ if (idx > call_stack_groups_capacity(frame->root)) {
+ ngroups = idx;
+ rewind(fp);
+ } else {
+ frame->root->ngrps = idx;
+ break;
+ }
}
+ out:
+ if (fp)
+ fclose(fp);
}
-
-out:
- if (fp)
- fclose(fp);
#elif defined(GF_SOLARIS_HOST_OS)
char filename[32];
char scratch[128];
@@ -245,7 +268,7 @@ out:
fp = fopen(filename, "r");
if (fp != NULL) {
if (fgets(scratch, sizeof scratch, fp) != NULL) {
- ngrps = MIN(prcred->pr_ngroups, FUSE_MAX_AUX_GROUPS);
+ ngrps = MIN(prcred->pr_ngroups, GF_MAX_AUX_GROUPS);
if (call_stack_alloc_groups(frame->root, ngrps) != 0) {
fclose(fp);
return;
diff --git a/xlators/mount/fuse/utils/mount_glusterfs.in b/xlators/mount/fuse/utils/mount_glusterfs.in
index d43fc97d084..3a5feb606d7 100755
--- a/xlators/mount/fuse/utils/mount_glusterfs.in
+++ b/xlators/mount/fuse/utils/mount_glusterfs.in
@@ -469,6 +469,7 @@ parse_options()
main ()
{
+#if !defined(__FreeBSD__)
## `mount` on OSX specifies options as first argument
echo $1|grep -q -- "-o"
if [ $? -eq 0 ]; then
@@ -478,7 +479,7 @@ main ()
volfile_loc=$1
mount_point=$2
fi
-
+#endif /* __FreeBSD__ */
while getopts "Vo:h" opt; do
case "${opt}" in
o)
@@ -499,6 +500,12 @@ main ()
esac
done
+#ifdef __FreeBSD__
+ shift $((OPTIND - 1))
+ volfile_loc="$1"
+ mount_point="$2"
+#endif /* __FreeBSD__ */
+
[ -r "$volfile_loc" ] || {
# '%' included to support ipv6 link local addresses
server_ip=$(echo "$volfile_loc" | sed -n 's/\([a-zA-Z0-9:%.\-]*\):.*/\1/p');
diff --git a/xlators/nfs/server/src/acl3.c b/xlators/nfs/server/src/acl3.c
index 50cd82dd422..7e3bbf16086 100644
--- a/xlators/nfs/server/src/acl3.c
+++ b/xlators/nfs/server/src/acl3.c
@@ -571,6 +571,12 @@ acl3_setacl_resume(void *carg)
acl3_check_fh_resolve_status(cs, stat, acl3err);
nfs_request_user_init(&nfu, cs->req);
xattr = dict_new();
+ if (xattr == NULL) {
+ gf_msg(GF_NLM, GF_LOG_ERROR, ENOMEM, NFS_MSG_GFID_DICT_CREATE_FAIL,
+ "dict allocation failed");
+ goto acl3err;
+ }
+
if (cs->aclcount)
ret = dict_set_static_bin(xattr, POSIX_ACL_ACCESS_XATTR, cs->aclxattr,
posix_acl_xattr_size(cs->aclcount));
@@ -724,7 +730,6 @@ acl3svc_init(xlator_t *nfsx)
struct nfs_state *nfs = NULL;
dict_t *options = NULL;
int ret = -1;
- char *portstr = NULL;
static gf_boolean_t acl3_inited = _gf_false;
/* Already inited */
@@ -742,12 +747,13 @@ acl3svc_init(xlator_t *nfsx)
acl3prog.private = ns;
options = dict_new();
-
- ret = gf_asprintf(&portstr, "%d", GF_ACL3_PORT);
- if (ret == -1)
+ if (options == NULL) {
+ gf_msg(GF_ACL, GF_LOG_ERROR, ENOMEM, NFS_MSG_GFID_DICT_CREATE_FAIL,
+ "dict allocation failed");
goto err;
+ }
- ret = dict_set_dynstr(options, "transport.socket.listen-port", portstr);
+ ret = dict_set_str(options, "transport.socket.listen-port", GF_ACL3_PORT);
if (ret == -1)
goto err;
ret = dict_set_str(options, "transport-type", "socket");
@@ -783,7 +789,6 @@ acl3svc_init(xlator_t *nfsx)
if (ret == -1) {
gf_msg(GF_ACL, GF_LOG_ERROR, errno, NFS_MSG_LISTENERS_CREATE_FAIL,
"Unable to create listeners");
- dict_unref(options);
goto err;
}
diff --git a/xlators/nfs/server/src/acl3.h b/xlators/nfs/server/src/acl3.h
index dead04b4273..762fbb04a0f 100644
--- a/xlators/nfs/server/src/acl3.h
+++ b/xlators/nfs/server/src/acl3.h
@@ -18,7 +18,7 @@
#define ACL3_SETACL 2
#define ACL3_PROC_COUNT 3
-#define GF_ACL3_PORT 38469
+#define GF_ACL3_PORT "38469"
#define GF_ACL GF_NFS "-ACL"
/* Flags for the getacl/setacl mode */
diff --git a/xlators/nfs/server/src/auth-cache.c b/xlators/nfs/server/src/auth-cache.c
index 64768646074..ffbf5b6cad6 100644
--- a/xlators/nfs/server/src/auth-cache.c
+++ b/xlators/nfs/server/src/auth-cache.c
@@ -189,7 +189,7 @@ out:
static int
_auth_cache_expired(struct auth_cache *cache, struct auth_cache_entry *entry)
{
- return ((time(NULL) - entry->timestamp) > cache->ttl_sec);
+ return ((gf_time() - entry->timestamp) > cache->ttl_sec);
}
/**
@@ -474,7 +474,7 @@ cache_nfs_fh(struct auth_cache *cache, struct nfs3_fh *fh,
goto out;
}
- entry->timestamp = time(NULL);
+ entry->timestamp = gf_time();
/* Update entry->item if it is pointing to a different export_item */
if (entry->item && entry->item != export_item) {
GF_REF_PUT(entry->item);
diff --git a/xlators/nfs/server/src/mount3.c b/xlators/nfs/server/src/mount3.c
index 88d3002a752..a34d9104c17 100644
--- a/xlators/nfs/server/src/mount3.c
+++ b/xlators/nfs/server/src/mount3.c
@@ -4062,6 +4062,11 @@ mnt3svc_init(xlator_t *nfsx)
mnt3prog.private = mstate;
options = dict_new();
+ if (options == NULL) {
+ gf_msg(GF_NFS, GF_LOG_ERROR, ENOMEM, NFS_MSG_GFID_DICT_CREATE_FAIL,
+ "dict allocation failed");
+ goto err;
+ }
ret = gf_asprintf(&portstr, "%d", GF_MOUNTV3_PORT);
if (ret == -1)
@@ -4097,7 +4102,6 @@ mnt3svc_init(xlator_t *nfsx)
if (ret == -1) {
gf_msg(GF_NFS, GF_LOG_ERROR, errno, NFS_MSG_LISTENERS_CREATE_FAIL,
"Unable to create listeners");
- dict_unref(options);
goto err;
}
@@ -4162,6 +4166,11 @@ mnt1svc_init(xlator_t *nfsx)
mnt1prog.private = mstate;
options = dict_new();
+ if (options == NULL) {
+ gf_msg(GF_NFS, GF_LOG_ERROR, ENOMEM, NFS_MSG_GFID_DICT_CREATE_FAIL,
+ "dict allocation failed");
+ goto err;
+ }
ret = gf_asprintf(&portstr, "%d", GF_MOUNTV1_PORT);
if (ret == -1)
diff --git a/xlators/nfs/server/src/mount3udp_svc.c b/xlators/nfs/server/src/mount3udp_svc.c
index 0688779eb65..1a2b0f85453 100644
--- a/xlators/nfs/server/src/mount3udp_svc.c
+++ b/xlators/nfs/server/src/mount3udp_svc.c
@@ -216,7 +216,7 @@ mount3udp_thread(void *argv)
GF_ASSERT(nfsx);
- glusterfs_this_set(nfsx);
+ THIS = nfsx;
transp = svcudp_create(RPC_ANYSOCK);
if (transp == NULL) {
diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c
index cab57bbf3c9..39b73f88ac3 100644
--- a/xlators/nfs/server/src/nfs.c
+++ b/xlators/nfs/server/src/nfs.c
@@ -1157,7 +1157,7 @@ out:
return ret;
}
-int
+static int
nfs_reconfigure_state(xlator_t *this, dict_t *options)
{
int ret = 0;
@@ -1167,8 +1167,8 @@ nfs_reconfigure_state(xlator_t *this, dict_t *options)
gf_boolean_t optbool;
uint32_t optuint32;
struct nfs_state *nfs = NULL;
- char *blacklist_keys[] = {"nfs.port", "nfs.transport-type",
- "nfs.mem-factor", NULL};
+ static char *options_require_restart[] = {"nfs.port", "nfs.transport-type",
+ "nfs.mem-factor", NULL};
GF_VALIDATE_OR_GOTO(GF_NFS, this, out);
GF_VALIDATE_OR_GOTO(GF_NFS, this->private, out);
@@ -1176,14 +1176,14 @@ nfs_reconfigure_state(xlator_t *this, dict_t *options)
nfs = (struct nfs_state *)this->private;
- /* Black listed options can't be reconfigured, they need
+ /* Some listed options can't be reconfigured, they need
* NFS to be restarted. There are two cases 1. SET 2. UNSET.
* 1. SET */
- while (blacklist_keys[keyindx]) {
- if (dict_get(options, blacklist_keys[keyindx])) {
+ while (options_require_restart[keyindx]) {
+ if (dict_get(options, options_require_restart[keyindx])) {
gf_msg(GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_RECONFIG_FAIL,
"Reconfiguring %s needs NFS restart",
- blacklist_keys[keyindx]);
+ options_require_restart[keyindx]);
goto out;
}
keyindx++;
diff --git a/xlators/nfs/server/src/nfs3-helpers.c b/xlators/nfs/server/src/nfs3-helpers.c
index 8a58977b53c..897fb42b071 100644
--- a/xlators/nfs/server/src/nfs3-helpers.c
+++ b/xlators/nfs/server/src/nfs3-helpers.c
@@ -1072,7 +1072,7 @@ nfs3_sattr3_to_setattr_valid(sattr3 *sattr, struct iatt *buf, mode_t *omode)
if (sattr->atime.set_it == SET_TO_SERVER_TIME) {
valid |= GF_SET_ATTR_ATIME;
if (buf)
- buf->ia_atime = time(NULL);
+ buf->ia_atime = gf_time();
}
if (sattr->mtime.set_it == SET_TO_CLIENT_TIME) {
@@ -1084,7 +1084,7 @@ nfs3_sattr3_to_setattr_valid(sattr3 *sattr, struct iatt *buf, mode_t *omode)
if (sattr->mtime.set_it == SET_TO_SERVER_TIME) {
valid |= GF_SET_ATTR_MTIME;
if (buf)
- buf->ia_mtime = time(NULL);
+ buf->ia_mtime = gf_time();
}
return valid;
diff --git a/xlators/nfs/server/src/nfs3.c b/xlators/nfs/server/src/nfs3.c
index 7cfd75f9ed1..f9042bc3b3f 100644
--- a/xlators/nfs/server/src/nfs3.c
+++ b/xlators/nfs/server/src/nfs3.c
@@ -5651,7 +5651,7 @@ nfs3_init_state(xlator_t *nfsx)
goto free_localpool;
}
- nfs3->serverstart = (uint64_t)time(NULL);
+ nfs3->serverstart = (uint64_t)gf_time();
INIT_LIST_HEAD(&nfs3->fdlru);
LOCK_INIT(&nfs3->fdlrulock);
nfs3->fdcount = 0;
diff --git a/xlators/nfs/server/src/nlm4.c b/xlators/nfs/server/src/nlm4.c
index ef65d7f55f8..577e8543966 100644
--- a/xlators/nfs/server/src/nlm4.c
+++ b/xlators/nfs/server/src/nlm4.c
@@ -1011,7 +1011,8 @@ nlm4_establish_callback(nfs3_call_state_t *cs, call_frame_t *cbk_frame)
int port = -1;
struct nlm4_notify_args *ncf = NULL;
- glusterfs_this_set(cs->nfsx);
+ GF_ASSERT(cs->nfsx);
+ THIS = cs->nfsx;
rpc_transport_get_peeraddr(cs->trans, NULL, 0, &sock_union.storage,
sizeof(sock_union.storage));
@@ -1054,6 +1055,12 @@ nlm4_establish_callback(nfs3_call_state_t *cs, call_frame_t *cbk_frame)
}
options = dict_new();
+ if (options == NULL) {
+ gf_msg(GF_NLM, GF_LOG_ERROR, ENOMEM, NFS_MSG_GFID_DICT_CREATE_FAIL,
+ "dict allocation failed");
+ goto err;
+ }
+
ret = dict_set_str(options, "transport-type", "socket");
if (ret == -1) {
gf_msg(GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_DICT_SET_FAILED,
@@ -2592,6 +2599,11 @@ nlm4svc_init(xlator_t *nfsx)
nlm4prog.private = ns;
options = dict_new();
+ if (options == NULL) {
+ gf_msg(GF_NLM, GF_LOG_ERROR, ENOMEM, NFS_MSG_GFID_DICT_CREATE_FAIL,
+ "dict allocation failed");
+ goto err;
+ }
ret = gf_asprintf(&portstr, "%d", GF_NLM4_PORT);
if (ret == -1)
@@ -2633,7 +2645,6 @@ nlm4svc_init(xlator_t *nfsx)
if (ret == -1) {
gf_msg(GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_LISTENERS_CREATE_FAIL,
"Unable to create listeners");
- dict_unref(options);
goto err;
}
INIT_LIST_HEAD(&nlm_client_list);
@@ -2704,7 +2715,7 @@ nlm4svc_init(xlator_t *nfsx)
goto err;
}
- (void)gf_thread_create(&thr, NULL, nsm_thread, (void *)NULL, "nfsnsm");
+ (void)gf_thread_create(&thr, NULL, nsm_thread, nfsx, "nfsnsm");
timeout.tv_sec = nlm_grace_period;
timeout.tv_nsec = 0;
diff --git a/xlators/nfs/server/src/nlmcbk_svc.c b/xlators/nfs/server/src/nlmcbk_svc.c
index d18b86ce8db..eaa7b916190 100644
--- a/xlators/nfs/server/src/nlmcbk_svc.c
+++ b/xlators/nfs/server/src/nlmcbk_svc.c
@@ -84,9 +84,14 @@ nlmcbk_program_0(struct svc_req *rqstp, register SVCXPRT *transp)
void *
nsm_thread(void *argv)
{
+ xlator_t *nfsx = argv;
register SVCXPRT *transp;
int ret = 0;
+ GF_ASSERT(nfsx);
+
+ THIS = nfsx;
+
ret = pmap_unset(NLMCBK_PROGRAM, NLMCBK_V1);
if (ret == 0) {
gf_msg(GF_NLM, GF_LOG_ERROR, 0, NFS_MSG_PMAP_UNSET_FAIL,
diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c
index c007e0a355d..9375d29c17f 100644
--- a/xlators/performance/io-cache/src/io-cache.c
+++ b/xlators/performance/io-cache/src/io-cache.c
@@ -133,23 +133,17 @@ ioc_update_pages(call_frame_t *frame, ioc_inode_t *ioc_inode,
return 0;
}
-int32_t
+static gf_boolean_t
ioc_inode_need_revalidate(ioc_inode_t *ioc_inode)
{
- int8_t need_revalidate = 0;
- struct timeval tv = {
- 0,
- };
ioc_table_t *table = NULL;
+ GF_ASSERT(ioc_inode);
table = ioc_inode->table;
+ GF_ASSERT(table);
- gettimeofday(&tv, NULL);
-
- if (time_elapsed(&tv, &ioc_inode->cache.tv) >= table->cache_timeout)
- need_revalidate = 1;
-
- return need_revalidate;
+ return (gf_time() - ioc_inode->cache.last_revalidate >=
+ table->cache_timeout);
}
/*
@@ -411,9 +405,6 @@ ioc_cache_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
ioc_inode_t *ioc_inode = NULL;
size_t destroy_size = 0;
struct iatt *local_stbuf = NULL;
- struct timeval tv = {
- 0,
- };
local = frame->local;
ioc_inode = local->inode;
@@ -451,10 +442,9 @@ ioc_cache_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret < 0)
local_stbuf = NULL;
- gettimeofday(&tv, NULL);
ioc_inode_lock(ioc_inode);
{
- memcpy(&ioc_inode->cache.tv, &tv, sizeof(struct timeval));
+ ioc_inode->cache.last_revalidate = gf_time();
}
ioc_inode_unlock(ioc_inode);
@@ -1405,9 +1395,6 @@ ioc_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
{
ioc_inode_t *ioc_inode = NULL;
uint64_t tmp_inode = 0;
- struct timeval tv = {
- 0,
- };
inode_ctx_get(fd->inode, this, &tmp_inode);
ioc_inode = (ioc_inode_t *)(long)tmp_inode;
@@ -1418,10 +1405,9 @@ ioc_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
return 0;
}
- gettimeofday(&tv, NULL);
ioc_inode_lock(ioc_inode);
{
- memcpy(&ioc_inode->cache.tv, &tv, sizeof(struct timeval));
+ ioc_inode->cache.last_revalidate = gf_time();
}
ioc_inode_unlock(ioc_inode);
@@ -1945,7 +1931,7 @@ __ioc_cache_dump(ioc_inode_t *ioc_inode, char *prefix)
char key[GF_DUMP_MAX_BUF_LEN] = {
0,
};
- char timestr[256] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
@@ -1955,11 +1941,9 @@ __ioc_cache_dump(ioc_inode_t *ioc_inode, char *prefix)
table = ioc_inode->table;
- if (ioc_inode->cache.tv.tv_sec) {
- gf_time_fmt(timestr, sizeof timestr, ioc_inode->cache.tv.tv_sec,
+ if (ioc_inode->cache.last_revalidate) {
+ gf_time_fmt(timestr, sizeof timestr, ioc_inode->cache.last_revalidate,
gf_timefmt_FT);
- snprintf(timestr + strlen(timestr), sizeof timestr - strlen(timestr),
- ".%" GF_PRI_SUSECONDS, ioc_inode->cache.tv.tv_usec);
gf_proc_dump_write("last-cache-validation-time", "%s", timestr);
}
diff --git a/xlators/performance/io-cache/src/io-cache.h b/xlators/performance/io-cache/src/io-cache.h
index 4303c2fae13..14923c75edc 100644
--- a/xlators/performance/io-cache/src/io-cache.h
+++ b/xlators/performance/io-cache/src/io-cache.h
@@ -117,15 +117,13 @@ struct ioc_page {
struct ioc_cache {
rbthash_table_t *page_table;
struct list_head page_lru;
- time_t mtime; /*
- * seconds component of file mtime
- */
- time_t mtime_nsec; /*
- * nanosecond component of file mtime
- */
- struct timeval tv; /*
- * time-stamp at last re-validate
- */
+ time_t mtime; /*
+ * seconds component of file mtime
+ */
+ time_t mtime_nsec; /*
+ * nanosecond component of file mtime
+ */
+ time_t last_revalidate; /* timestamp at last re-validate */
};
struct ioc_inode {
@@ -270,17 +268,6 @@ ioc_frame_fill(ioc_page_t *page, call_frame_t *frame, off_t offset, size_t size,
pthread_mutex_unlock(&page->page_lock); \
} while (0)
-static inline uint64_t
-time_elapsed(struct timeval *now, struct timeval *then)
-{
- uint64_t sec = now->tv_sec - then->tv_sec;
-
- if (sec)
- return sec;
-
- return 0;
-}
-
ioc_inode_t *
ioc_inode_search(ioc_table_t *table, inode_t *inode);
diff --git a/xlators/performance/io-cache/src/page.c b/xlators/performance/io-cache/src/page.c
index a8edbde23f2..84b1ae6cb20 100644
--- a/xlators/performance/io-cache/src/page.c
+++ b/xlators/performance/io-cache/src/page.c
@@ -413,9 +413,6 @@ ioc_fault_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
ioc_waitq_t *waitq = NULL;
size_t iobref_page_size = 0;
char zero_filled = 0;
- struct timeval tv = {
- 0,
- };
GF_ASSERT(frame);
@@ -431,7 +428,6 @@ ioc_fault_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
zero_filled = ((op_ret >= 0) && (stbuf->ia_mtime == 0));
- gettimeofday(&tv, NULL);
ioc_inode_lock(ioc_inode);
{
if (op_ret == -1 ||
@@ -448,7 +444,7 @@ ioc_fault_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
ioc_inode->cache.mtime_nsec = stbuf->ia_mtime_nsec;
}
- memcpy(&ioc_inode->cache.tv, &tv, sizeof(struct timeval));
+ ioc_inode->cache.last_revalidate = gf_time();
if (op_ret < 0) {
/* error, readv returned -1 */
diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c
index 6fa4d88389c..3d24cc97f4b 100644
--- a/xlators/performance/io-threads/src/io-threads.c
+++ b/xlators/performance/io-threads/src/io-threads.c
@@ -1016,16 +1016,13 @@ static uint32_t THRESH_LIMIT = 1209600; /* SECONDS * (EVENTS-1) */
static void
iot_apply_event(xlator_t *this, threshold_t *thresh)
{
- struct timespec now;
- time_t delta;
+ time_t delta, now = gf_time();
/* Refresh for manual testing/debugging. It's cheap. */
THRESH_LIMIT = THRESH_SECONDS * (THRESH_EVENTS - 1);
- timespec_now(&now);
-
if (thresh->value && thresh->update_time) {
- delta = now.tv_sec - thresh->update_time;
+ delta = now - thresh->update_time;
/* Be careful about underflow. */
if (thresh->value <= delta) {
thresh->value = 0;
@@ -1046,7 +1043,7 @@ iot_apply_event(xlator_t *this, threshold_t *thresh)
kill(getpid(), SIGTRAP);
}
- thresh->update_time = now.tv_sec;
+ thresh->update_time = now;
}
static void *
@@ -1311,7 +1308,7 @@ notify(xlator_t *this, int32_t event, void *data, ...)
/* Wait for draining stub from queue before notify PARENT_DOWN */
stub_cnt = GF_ATOMIC_GET(conf->stub_cnt);
if (stub_cnt) {
- clock_gettime(CLOCK_REALTIME, &sleep_till);
+ timespec_now_realtime(&sleep_till);
sleep_till.tv_sec += 1;
pthread_mutex_lock(&conf->mutex);
{
diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c
index 4c76f3089d5..a405be51f02 100644
--- a/xlators/performance/md-cache/src/md-cache.c
+++ b/xlators/performance/md-cache/src/md-cache.c
@@ -8,7 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#include <glusterfs/timespec.h>
#include <glusterfs/glusterfs.h>
#include <glusterfs/defaults.h>
#include <glusterfs/logging.h>
@@ -33,8 +32,7 @@
struct mdc_statfs_cache {
pthread_mutex_t lock;
- gf_boolean_t initialized;
- struct timespec last_refreshed;
+ time_t last_refreshed; /* (time_t)-1 if not yet initialized. */
struct statvfs buf;
};
@@ -61,7 +59,7 @@ struct mdc_statistics {
};
struct mdc_conf {
- int timeout;
+ uint32_t timeout;
gf_boolean_t cache_posix_acl;
gf_boolean_t cache_glusterfs_acl;
gf_boolean_t cache_selinux;
@@ -132,6 +130,7 @@ struct mdc_local {
char *key;
dict_t *xattr;
uint64_t incident_time;
+ bool update_cache;
};
int
@@ -375,10 +374,9 @@ unlock:
static gf_boolean_t
__is_cache_valid(xlator_t *this, time_t mdc_time)
{
- time_t now = 0;
gf_boolean_t ret = _gf_true;
struct mdc_conf *conf = NULL;
- int timeout = 0;
+ uint32_t timeout = 0;
time_t last_child_down = 0;
conf = this->private;
@@ -392,15 +390,13 @@ __is_cache_valid(xlator_t *this, time_t mdc_time)
last_child_down = conf->last_child_down;
timeout = conf->timeout;
- time(&now);
-
if ((mdc_time == 0) ||
((last_child_down != 0) && (mdc_time < last_child_down))) {
ret = _gf_false;
goto out;
}
- if (now >= (mdc_time + timeout)) {
+ if (gf_time() >= (mdc_time + timeout)) {
ret = _gf_false;
}
@@ -580,10 +576,9 @@ mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf,
mdc_from_iatt(mdc, iatt);
mdc->valid = _gf_true;
if (update_time) {
- time(&mdc->ia_time);
-
+ mdc->ia_time = gf_time();
if (mdc->xa_time && update_xa_time)
- time(&mdc->xa_time);
+ mdc->xa_time = mdc->ia_time;
}
gf_msg_callingfn(
@@ -784,7 +779,7 @@ mdc_inode_xatt_set(xlator_t *this, inode_t *inode, dict_t *dict)
if (newdict)
mdc->xattr = newdict;
- time(&mdc->xa_time);
+ mdc->xa_time = gf_time();
gf_msg_trace("md-cache", 0, "xatt cache set for (%s) time:%lld",
uuid_utoa(inode->gfid), (long long)mdc->xa_time);
}
@@ -985,7 +980,7 @@ out:
return ret;
}
-void
+static bool
mdc_load_reqs(xlator_t *this, dict_t *dict)
{
struct mdc_conf *conf = this->private;
@@ -994,6 +989,7 @@ mdc_load_reqs(xlator_t *this, dict_t *dict)
char *tmp = NULL;
char *tmp1 = NULL;
int ret = 0;
+ bool loaded = false;
tmp1 = conf->mdc_xattr_str;
if (!tmp1)
@@ -1011,13 +1007,17 @@ mdc_load_reqs(xlator_t *this, dict_t *dict)
conf->mdc_xattr_str = NULL;
gf_msg("md-cache", GF_LOG_ERROR, 0, MD_CACHE_MSG_NO_XATTR_CACHE,
"Disabled cache for xattrs, dict_set failed");
+ goto out;
}
pattern = strtok_r(NULL, ",", &tmp);
}
- GF_FREE(mdc_xattr_str);
+ loaded = true;
+
out:
- return;
+ GF_FREE(mdc_xattr_str);
+
+ return loaded;
}
struct checkpair {
@@ -1057,8 +1057,7 @@ mdc_cache_statfs(xlator_t *this, struct statvfs *buf)
pthread_mutex_lock(&conf->statfs_cache.lock);
{
memcpy(&conf->statfs_cache.buf, buf, sizeof(struct statvfs));
- clock_gettime(CLOCK_MONOTONIC, &conf->statfs_cache.last_refreshed);
- conf->statfs_cache.initialized = _gf_true;
+ conf->statfs_cache.last_refreshed = gf_time();
}
pthread_mutex_unlock(&conf->statfs_cache.lock);
}
@@ -1067,8 +1066,7 @@ int
mdc_load_statfs_info_from_cache(xlator_t *this, struct statvfs **buf)
{
struct mdc_conf *conf = this->private;
- struct timespec now;
- double cache_age = 0.0;
+ uint32_t cache_age = 0;
int ret = 0;
if (!buf || !conf) {
@@ -1077,23 +1075,23 @@ mdc_load_statfs_info_from_cache(xlator_t *this, struct statvfs **buf)
}
*buf = NULL;
- timespec_now(&now);
pthread_mutex_lock(&conf->statfs_cache.lock);
{
- /* Skip if the cache is not initialized */
- if (!conf->statfs_cache.initialized) {
+ /* Skip if the cache is not initialized. */
+ if (conf->statfs_cache.last_refreshed == (time_t)-1) {
ret = -1;
goto unlock;
}
- cache_age = (now.tv_sec - conf->statfs_cache.last_refreshed.tv_sec);
+ cache_age = (gf_time() - conf->statfs_cache.last_refreshed);
- gf_log(this->name, GF_LOG_DEBUG, "STATFS cache age = %lf", cache_age);
+ gf_log(this->name, GF_LOG_DEBUG, "STATFS cache age = %u secs",
+ cache_age);
if (cache_age > conf->timeout) {
- /* Expire the cache */
+ /* Expire the cache. */
gf_log(this->name, GF_LOG_DEBUG,
- "Cache age %lf exceeded timeout %d", cache_age,
+ "Cache age %u secs exceeded timeout %u secs", cache_age,
conf->timeout);
ret = -1;
goto unlock;
@@ -1107,6 +1105,31 @@ err:
return ret;
}
+static dict_t *
+mdc_prepare_request(xlator_t *this, mdc_local_t *local, dict_t *xdata)
+{
+ if (xdata != NULL) {
+ dict_ref(xdata);
+ }
+
+ if (local == NULL) {
+ return xdata;
+ }
+
+ if (xdata == NULL) {
+ xdata = dict_new();
+ if (xdata == NULL) {
+ local->update_cache = false;
+
+ return NULL;
+ }
+ }
+
+ local->update_cache = mdc_load_reqs(this, xdata);
+
+ return xdata;
+}
+
int
mdc_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct statvfs *buf,
@@ -1189,6 +1212,9 @@ mdc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
+ if (!local)
+ goto out;
+
if (op_ret != 0) {
if (op_errno == ENOENT)
GF_ATOMIC_INC(conf->mdc_counter.negative_lookup);
@@ -1206,9 +1232,6 @@ mdc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- if (!local)
- goto out;
-
if (local->loc.parent) {
mdc_inode_iatt_set(this, local->loc.parent, postparent,
local->incident_time);
@@ -1216,7 +1239,9 @@ mdc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (local->loc.inode) {
mdc_inode_iatt_set(this, local->loc.inode, stbuf, local->incident_time);
- mdc_inode_xatt_set(this, local->loc.inode, dict);
+ if (local->update_cache) {
+ mdc_inode_xatt_set(this, local->loc.inode, dict);
+ }
}
out:
MDC_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, dict,
@@ -1235,7 +1260,6 @@ mdc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
0,
};
dict_t *xattr_rsp = NULL;
- dict_t *xattr_alloc = NULL;
mdc_local_t *local = NULL;
struct mdc_conf *conf = this->private;
@@ -1286,18 +1310,18 @@ mdc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
return 0;
uncached:
- if (!xdata)
- xdata = xattr_alloc = dict_new();
- if (xdata)
- mdc_load_reqs(this, xdata);
+ xdata = mdc_prepare_request(this, local, xdata);
STACK_WIND(frame, mdc_lookup_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup, loc, xdata);
if (xattr_rsp)
dict_unref(xattr_rsp);
- if (xattr_alloc)
- dict_unref(xattr_alloc);
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
return 0;
}
@@ -1320,7 +1344,9 @@ mdc_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
}
mdc_inode_iatt_set(this, local->loc.inode, buf, local->incident_time);
- mdc_inode_xatt_set(this, local->loc.inode, xdata);
+ if (local->update_cache) {
+ mdc_inode_xatt_set(this, local->loc.inode, xdata);
+ }
out:
MDC_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata);
@@ -1334,7 +1360,6 @@ mdc_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
int ret;
struct iatt stbuf;
mdc_local_t *local = NULL;
- dict_t *xattr_alloc = NULL;
struct mdc_conf *conf = this->private;
local = mdc_local_get(frame, loc->inode);
@@ -1358,17 +1383,16 @@ mdc_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
return 0;
uncached:
- if (!xdata)
- xdata = xattr_alloc = dict_new();
- if (xdata)
- mdc_load_reqs(this, xdata);
+ xdata = mdc_prepare_request(this, local, xdata);
GF_ATOMIC_INC(conf->mdc_counter.stat_miss);
STACK_WIND(frame, mdc_stat_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->stat, loc, xdata);
- if (xattr_alloc)
- dict_unref(xattr_alloc);
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
return 0;
}
@@ -1391,7 +1415,9 @@ mdc_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
}
mdc_inode_iatt_set(this, local->fd->inode, buf, local->incident_time);
- mdc_inode_xatt_set(this, local->fd->inode, xdata);
+ if (local->update_cache) {
+ mdc_inode_xatt_set(this, local->fd->inode, xdata);
+ }
out:
MDC_STACK_UNWIND(fstat, frame, op_ret, op_errno, buf, xdata);
@@ -1405,14 +1431,13 @@ mdc_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
int ret;
struct iatt stbuf;
mdc_local_t *local = NULL;
- dict_t *xattr_alloc = NULL;
struct mdc_conf *conf = this->private;
local = mdc_local_get(frame, fd->inode);
if (!local)
goto uncached;
- local->fd = fd_ref(fd);
+ local->fd = __fd_ref(fd);
ret = mdc_inode_iatt_get(this, fd->inode, &stbuf);
if (ret != 0)
@@ -1424,17 +1449,16 @@ mdc_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
return 0;
uncached:
- if (!xdata)
- xdata = xattr_alloc = dict_new();
- if (xdata)
- mdc_load_reqs(this, xdata);
+ xdata = mdc_prepare_request(this, local, xdata);
GF_ATOMIC_INC(conf->mdc_counter.stat_miss);
STACK_WIND(frame, mdc_fstat_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fstat, fd, xdata);
- if (xattr_alloc)
- dict_unref(xattr_alloc);
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
return 0;
}
@@ -1473,8 +1497,9 @@ mdc_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
mdc_local_t *local = NULL;
local = mdc_local_get(frame, loc->inode);
-
- local->loc.inode = inode_ref(loc->inode);
+ if (local != NULL) {
+ local->loc.inode = inode_ref(loc->inode);
+ }
STACK_WIND(frame, mdc_truncate_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
@@ -1517,8 +1542,9 @@ mdc_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
mdc_local_t *local = NULL;
local = mdc_local_get(frame, fd->inode);
-
- local->fd = fd_ref(fd);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
STACK_WIND(frame, mdc_ftruncate_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
@@ -1566,9 +1592,10 @@ mdc_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
mdc_local_t *local = NULL;
local = mdc_local_get(frame, loc->inode);
-
- loc_copy(&local->loc, loc);
- local->xattr = dict_ref(xdata);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ local->xattr = dict_ref(xdata);
+ }
STACK_WIND(frame, mdc_mknod_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
@@ -1616,9 +1643,10 @@ mdc_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
mdc_local_t *local = NULL;
local = mdc_local_get(frame, loc->inode);
-
- loc_copy(&local->loc, loc);
- local->xattr = dict_ref(xdata);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ local->xattr = dict_ref(xdata);
+ }
STACK_WIND(frame, mdc_mkdir_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
@@ -1675,8 +1703,9 @@ mdc_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag,
mdc_local_t *local = NULL;
local = mdc_local_get(frame, loc->inode);
-
- loc_copy(&local->loc, loc);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ }
STACK_WIND(frame, mdc_unlink_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
@@ -1729,8 +1758,9 @@ mdc_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
mdc_local_t *local = NULL;
local = mdc_local_get(frame, loc->inode);
-
- loc_copy(&local->loc, loc);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ }
STACK_WIND(frame, mdc_rmdir_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rmdir, loc, flag, xdata);
@@ -1777,13 +1807,22 @@ mdc_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
loc_t *loc, mode_t umask, dict_t *xdata)
{
mdc_local_t *local = NULL;
+ char *name;
+ name = gf_strdup(linkname);
+ if (name == NULL) {
+ goto wind;
+ }
local = mdc_local_get(frame, loc->inode);
+ if (local == NULL) {
+ GF_FREE(name);
+ goto wind;
+ }
loc_copy(&local->loc, loc);
+ local->linkname = name;
- local->linkname = gf_strdup(linkname);
-
+wind:
STACK_WIND(frame, mdc_symlink_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata);
return 0;
@@ -1841,9 +1880,10 @@ mdc_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
mdc_local_t *local = NULL;
local = mdc_local_get(frame, oldloc->inode);
-
- loc_copy(&local->loc, oldloc);
- loc_copy(&local->loc2, newloc);
+ if (local != NULL) {
+ loc_copy(&local->loc, oldloc);
+ loc_copy(&local->loc2, newloc);
+ }
STACK_WIND(frame, mdc_rename_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
@@ -1892,9 +1932,10 @@ mdc_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
mdc_local_t *local = NULL;
local = mdc_local_get(frame, oldloc->inode);
-
- loc_copy(&local->loc, oldloc);
- loc_copy(&local->loc2, newloc);
+ if (local != NULL) {
+ loc_copy(&local->loc, oldloc);
+ loc_copy(&local->loc2, newloc);
+ }
STACK_WIND(frame, mdc_link_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
@@ -1943,9 +1984,10 @@ mdc_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
mdc_local_t *local = NULL;
local = mdc_local_get(frame, loc->inode);
-
- loc_copy(&local->loc, loc);
- local->xattr = dict_ref(xdata);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ local->xattr = dict_ref(xdata);
+ }
STACK_WIND(frame, mdc_create_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
@@ -1992,8 +2034,9 @@ mdc_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
}
local = mdc_local_get(frame, loc->inode);
-
- local->fd = fd_ref(fd);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
out:
STACK_WIND(frame, mdc_open_cbk, FIRST_CHILD(this),
@@ -2034,8 +2077,9 @@ mdc_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
mdc_local_t *local = NULL;
local = mdc_local_get(frame, fd->inode);
-
- local->fd = fd_ref(fd);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
STACK_WIND(frame, mdc_readv_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
@@ -2076,8 +2120,9 @@ mdc_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
mdc_local_t *local = NULL;
local = mdc_local_get(frame, fd->inode);
-
- local->fd = fd_ref(fd);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
STACK_WIND(frame, mdc_writev_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
@@ -2093,15 +2138,14 @@ mdc_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
mdc_local_t *local = NULL;
local = frame->local;
+ if (!local)
+ goto out;
if (op_ret != 0) {
mdc_inode_iatt_set(this, local->loc.inode, NULL, local->incident_time);
goto out;
}
- if (!local)
- goto out;
-
mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf,
_gf_true, local->incident_time);
mdc_inode_xatt_update(this, local->loc.inode, xdata);
@@ -2122,6 +2166,9 @@ mdc_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
struct mdc_conf *conf = this->private;
local = mdc_local_get(frame, loc->inode);
+ if (local == NULL) {
+ goto wind;
+ }
loc_copy(&local->loc, loc);
@@ -2149,6 +2196,7 @@ mdc_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
}
}
+wind:
STACK_WIND(frame, mdc_setattr_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
@@ -2194,8 +2242,11 @@ mdc_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
struct mdc_conf *conf = this->private;
local = mdc_local_get(frame, fd->inode);
+ if (local == NULL) {
+ goto wind;
+ }
- local->fd = fd_ref(fd);
+ local->fd = __fd_ref(fd);
if ((valid & GF_SET_ATTR_MODE) && conf->cache_glusterfs_acl) {
if (!xdata)
@@ -2221,6 +2272,7 @@ mdc_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
}
}
+wind:
STACK_WIND(frame, mdc_fsetattr_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
@@ -2262,8 +2314,9 @@ mdc_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
mdc_local_t *local = NULL;
local = mdc_local_get(frame, fd->inode);
-
- local->fd = fd_ref(fd);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
STACK_WIND(frame, mdc_fsync_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
@@ -2318,9 +2371,10 @@ mdc_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr,
mdc_local_t *local = NULL;
local = mdc_local_get(frame, loc->inode);
-
- loc_copy(&local->loc, loc);
- local->xattr = dict_ref(xattr);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ local->xattr = dict_ref(xattr);
+ }
STACK_WIND(frame, mdc_setxattr_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr, loc, xattr, flags, xdata);
@@ -2376,9 +2430,10 @@ mdc_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr,
mdc_local_t *local = NULL;
local = mdc_local_get(frame, fd->inode);
-
- local->fd = fd_ref(fd);
- local->xattr = dict_ref(xattr);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ local->xattr = dict_ref(xattr);
+ }
STACK_WIND(frame, mdc_fsetxattr_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsetxattr, fd, xattr, flags, xdata);
@@ -2408,7 +2463,9 @@ mdc_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- mdc_inode_xatt_set(this, local->loc.inode, xdata);
+ if (local->update_cache) {
+ mdc_inode_xatt_set(this, local->loc.inode, xdata);
+ }
out:
MDC_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata);
@@ -2425,19 +2482,19 @@ mdc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
mdc_local_t *local = NULL;
dict_t *xattr = NULL;
struct mdc_conf *conf = this->private;
- dict_t *xattr_alloc = NULL;
- gf_boolean_t key_satisfied = _gf_true;
+ gf_boolean_t key_satisfied = _gf_false;
local = mdc_local_get(frame, loc->inode);
- if (!local)
+ if (!local) {
goto uncached;
+ }
loc_copy(&local->loc, loc);
if (!is_mdc_key_satisfied(this, key)) {
- key_satisfied = _gf_false;
goto uncached;
}
+ key_satisfied = _gf_true;
ret = mdc_inode_xatt_get(this, loc->inode, &xattr);
if (ret != 0)
@@ -2458,18 +2515,17 @@ mdc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
uncached:
if (key_satisfied) {
- if (!xdata)
- xdata = xattr_alloc = dict_new();
- if (xdata)
- mdc_load_reqs(this, xdata);
+ xdata = mdc_prepare_request(this, local, xdata);
}
GF_ATOMIC_INC(conf->mdc_counter.xattr_miss);
STACK_WIND(frame, mdc_getxattr_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr, loc, key, xdata);
- if (xattr_alloc)
- dict_unref(xattr_alloc);
+ if (key_satisfied && (xdata != NULL)) {
+ dict_unref(xdata);
+ }
+
return 0;
}
@@ -2496,7 +2552,9 @@ mdc_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- mdc_inode_xatt_set(this, local->fd->inode, xdata);
+ if (local->update_cache) {
+ mdc_inode_xatt_set(this, local->fd->inode, xdata);
+ }
out:
MDC_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, xattr, xdata);
@@ -2513,14 +2571,13 @@ mdc_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
dict_t *xattr = NULL;
int op_errno = ENODATA;
struct mdc_conf *conf = this->private;
- dict_t *xattr_alloc = NULL;
gf_boolean_t key_satisfied = _gf_true;
local = mdc_local_get(frame, fd->inode);
if (!local)
goto uncached;
- local->fd = fd_ref(fd);
+ local->fd = __fd_ref(fd);
if (!is_mdc_key_satisfied(this, key)) {
key_satisfied = _gf_false;
@@ -2546,18 +2603,17 @@ mdc_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
uncached:
if (key_satisfied) {
- if (!xdata)
- xdata = xattr_alloc = dict_new();
- if (xdata)
- mdc_load_reqs(this, xdata);
+ xdata = mdc_prepare_request(this, local, xdata);
}
GF_ATOMIC_INC(conf->mdc_counter.xattr_miss);
STACK_WIND(frame, mdc_fgetxattr_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fgetxattr, fd, key, xdata);
- if (xattr_alloc)
- dict_unref(xattr_alloc);
+ if (key_satisfied && (xdata != NULL)) {
+ dict_unref(xdata);
+ }
+
return 0;
}
@@ -2613,12 +2669,21 @@ mdc_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
int ret = 0;
dict_t *xattr = NULL;
struct mdc_conf *conf = this->private;
+ char *name2;
+
+ name2 = gf_strdup(name);
+ if (name2 == NULL) {
+ goto uncached;
+ }
local = mdc_local_get(frame, loc->inode);
+ if (local == NULL) {
+ GF_FREE(name2);
+ goto uncached;
+ }
loc_copy(&local->loc, loc);
-
- local->key = gf_strdup(name);
+ local->key = name2;
if (!is_mdc_key_satisfied(this, name))
goto uncached;
@@ -2704,12 +2769,21 @@ mdc_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
int ret = 0;
dict_t *xattr = NULL;
struct mdc_conf *conf = this->private;
+ char *name2;
- local = mdc_local_get(frame, fd->inode);
+ name2 = gf_strdup(name);
+ if (name2 == NULL) {
+ goto uncached;
+ }
- local->fd = fd_ref(fd);
+ local = mdc_local_get(frame, fd->inode);
+ if (local == NULL) {
+ GF_FREE(name2);
+ goto uncached;
+ }
- local->key = gf_strdup(name);
+ local->fd = __fd_ref(fd);
+ local->key = name2;
if (!is_mdc_key_satisfied(this, name))
goto uncached;
@@ -2767,27 +2841,23 @@ int
mdc_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
dict_t *xdata)
{
- dict_t *xattr_alloc = NULL;
mdc_local_t *local = NULL;
local = mdc_local_get(frame, loc->inode);
-
- loc_copy(&local->loc, loc);
-
- if (!xdata)
- xdata = xattr_alloc = dict_new();
-
- if (xdata) {
- /* Tell readdir-ahead to include these keys in xdata when it
- * internally issues readdirp() in it's opendir_cbk */
- mdc_load_reqs(this, xdata);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
}
+ /* Tell readdir-ahead to include these keys in xdata when it
+ * internally issues readdirp() in it's opendir_cbk */
+ xdata = mdc_prepare_request(this, local, xdata);
+
STACK_WIND(frame, mdc_opendir_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
- if (xattr_alloc)
- dict_unref(xattr_alloc);
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
return 0;
}
@@ -2815,7 +2885,9 @@ mdc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
continue;
mdc_inode_iatt_set(this, entry->inode, &entry->d_stat,
local->incident_time);
- mdc_inode_xatt_set(this, entry->inode, entry->dict);
+ if (local->update_cache) {
+ mdc_inode_xatt_set(this, entry->inode, entry->dict);
+ }
}
unwind:
@@ -2827,24 +2899,23 @@ int
mdc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t offset, dict_t *xdata)
{
- dict_t *xattr_alloc = NULL;
mdc_local_t *local = NULL;
local = mdc_local_get(frame, fd->inode);
if (!local)
goto out;
- local->fd = fd_ref(fd);
+ local->fd = __fd_ref(fd);
- if (!xdata)
- xdata = xattr_alloc = dict_new();
- if (xdata)
- mdc_load_reqs(this, xdata);
+ xdata = mdc_prepare_request(this, local, xdata);
STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata);
- if (xattr_alloc)
- dict_unref(xattr_alloc);
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
return 0;
out:
MDC_STACK_UNWIND(readdirp, frame, -1, ENOMEM, NULL, NULL);
@@ -2875,7 +2946,6 @@ int
mdc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t offset, dict_t *xdata)
{
- int need_unref = 0;
mdc_local_t *local = NULL;
struct mdc_conf *conf = this->private;
@@ -2883,7 +2953,7 @@ mdc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
if (!local)
goto unwind;
- local->fd = fd_ref(fd);
+ local->fd = __fd_ref(fd);
if (!conf->force_readdirp) {
STACK_WIND(frame, mdc_readdir_cbk, FIRST_CHILD(this),
@@ -2891,19 +2961,14 @@ mdc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
return 0;
}
- if (!xdata) {
- xdata = dict_new();
- need_unref = 1;
- }
-
- if (xdata)
- mdc_load_reqs(this, xdata);
+ xdata = mdc_prepare_request(this, local, xdata);
STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata);
- if (need_unref && xdata)
+ if (xdata != NULL) {
dict_unref(xdata);
+ }
return 0;
unwind:
@@ -2945,7 +3010,9 @@ mdc_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
mdc_local_t *local;
local = mdc_local_get(frame, fd->inode);
- local->fd = fd_ref(fd);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
STACK_WIND(frame, mdc_fallocate_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
@@ -2987,7 +3054,9 @@ mdc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
mdc_local_t *local;
local = mdc_local_get(frame, fd->inode);
- local->fd = fd_ref(fd);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
STACK_WIND(frame, mdc_discard_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
@@ -3028,7 +3097,9 @@ mdc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
mdc_local_t *local;
local = mdc_local_get(frame, fd->inode);
- local->fd = fd_ref(fd);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
STACK_WIND(frame, mdc_zerofill_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
@@ -3110,7 +3181,7 @@ mdc_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
if (!local)
goto unwind;
- local->fd = fd_ref(fd);
+ local->fd = __fd_ref(fd);
STACK_WIND(frame, mdc_fsyncdir_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsyncdir, fd, flags, xdata);
@@ -3483,7 +3554,12 @@ mdc_register_xattr_inval(xlator_t *this)
goto out;
}
- mdc_load_reqs(this, xattr);
+ if (!mdc_load_reqs(this, xattr)) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, MD_CACHE_MSG_NO_MEMORY,
+ "failed to populate cache entries");
+ ret = -1;
+ goto out;
+ }
frame = create_frame(this, this->ctx->pool);
if (!frame) {
@@ -3532,7 +3608,7 @@ int
mdc_reconfigure(xlator_t *this, dict_t *options)
{
struct mdc_conf *conf = NULL;
- int timeout = 0;
+ int timeout = 0, ret = 0;
char *tmp_str = NULL;
conf = this->private;
@@ -3572,7 +3648,10 @@ mdc_reconfigure(xlator_t *this, dict_t *options)
GF_OPTION_RECONF("md-cache-statfs", conf->cache_statfs, options, bool, out);
GF_OPTION_RECONF("xattr-cache-list", tmp_str, options, str, out);
- mdc_xattr_list_populate(conf, tmp_str);
+
+ ret = mdc_xattr_list_populate(conf, tmp_str);
+ if (ret < 0)
+ goto out;
/* If timeout is greater than 60s (default before the patch that added
* cache invalidation support was added) then, cache invalidation
@@ -3585,25 +3664,22 @@ mdc_reconfigure(xlator_t *this, dict_t *options)
}
conf->timeout = timeout;
- (void)mdc_register_xattr_inval(this);
+ ret = mdc_register_xattr_inval(this);
out:
- return 0;
+ return ret;
}
int32_t
mdc_mem_acct_init(xlator_t *this)
{
- int ret = -1;
-
- ret = xlator_mem_acct_init(this, gf_mdc_mt_end + 1);
- return ret;
+ return xlator_mem_acct_init(this, gf_mdc_mt_end + 1);
}
int
mdc_init(xlator_t *this)
{
struct mdc_conf *conf = NULL;
- int timeout = 0;
+ uint32_t timeout = 0;
char *tmp_str = NULL;
conf = GF_CALLOC(sizeof(*conf), 1, gf_mdc_mt_mdc_conf_t);
@@ -3615,7 +3691,7 @@ mdc_init(xlator_t *this)
LOCK_INIT(&conf->lock);
- GF_OPTION_INIT("md-cache-timeout", timeout, int32, out);
+ GF_OPTION_INIT("md-cache-timeout", timeout, uint32, out);
GF_OPTION_INIT("cache-selinux", conf->cache_selinux, bool, out);
@@ -3649,7 +3725,9 @@ mdc_init(xlator_t *this)
GF_OPTION_INIT("xattr-cache-list", tmp_str, str, out);
mdc_xattr_list_populate(conf, tmp_str);
- time(&conf->last_child_down);
+ conf->last_child_down = gf_time();
+ conf->statfs_cache.last_refreshed = (time_t)-1;
+
/* initialize gf_atomic_t counters */
GF_ATOMIC_INIT(conf->mdc_counter.stat_hit, 0);
GF_ATOMIC_INIT(conf->mdc_counter.stat_miss, 0);
@@ -3680,7 +3758,7 @@ out:
}
void
-mdc_update_child_down_time(xlator_t *this, time_t *now)
+mdc_update_child_down_time(xlator_t *this, time_t now)
{
struct mdc_conf *conf = NULL;
@@ -3688,7 +3766,7 @@ mdc_update_child_down_time(xlator_t *this, time_t *now)
LOCK(&conf->lock);
{
- conf->last_child_down = *now;
+ conf->last_child_down = now;
}
UNLOCK(&conf->lock);
}
@@ -3698,14 +3776,12 @@ mdc_notify(xlator_t *this, int event, void *data, ...)
{
int ret = 0;
struct mdc_conf *conf = NULL;
- time_t now = 0;
conf = this->private;
switch (event) {
case GF_EVENT_CHILD_DOWN:
case GF_EVENT_SOME_DESCENDENT_DOWN:
- time(&now);
- mdc_update_child_down_time(this, &now);
+ mdc_update_child_down_time(this, gf_time());
break;
case GF_EVENT_UPCALL:
if (conf->mdc_invalidation)
diff --git a/xlators/performance/nl-cache/src/nl-cache-helper.c b/xlators/performance/nl-cache/src/nl-cache-helper.c
index 03dedf8ea08..29b99b5b8ea 100644
--- a/xlators/performance/nl-cache/src/nl-cache-helper.c
+++ b/xlators/performance/nl-cache/src/nl-cache-helper.c
@@ -113,7 +113,7 @@ out:
}
void
-nlc_update_child_down_time(xlator_t *this, time_t *now)
+nlc_update_child_down_time(xlator_t *this, time_t now)
{
nlc_conf_t *conf = NULL;
@@ -121,7 +121,7 @@ nlc_update_child_down_time(xlator_t *this, time_t *now)
LOCK(&conf->lock);
{
- conf->last_child_down = *now;
+ conf->last_child_down = now;
}
UNLOCK(&conf->lock);
@@ -262,7 +262,7 @@ nlc_init_invalid_ctx(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx)
if (nlc_ctx->timer) {
gf_tw_mod_timer_pending(conf->timer_wheel, nlc_ctx->timer,
conf->cache_timeout);
- time(&nlc_ctx->cache_time);
+ nlc_ctx->cache_time = gf_time();
goto unlock;
}
@@ -496,7 +496,7 @@ __nlc_inode_ctx_timer_start(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx)
nlc_ctx->timer_data = tmp;
gf_tw_add_timer(conf->timer_wheel, timer);
- time(&nlc_ctx->cache_time);
+ nlc_ctx->cache_time = gf_time();
gf_msg_trace(this->name, 0,
"Registering timer:%p, inode:%p, "
"gfid:%s",
diff --git a/xlators/performance/nl-cache/src/nl-cache.c b/xlators/performance/nl-cache/src/nl-cache.c
index cd0e1d195fd..33a7c471663 100644
--- a/xlators/performance/nl-cache/src/nl-cache.c
+++ b/xlators/performance/nl-cache/src/nl-cache.c
@@ -520,15 +520,13 @@ int
nlc_notify(xlator_t *this, int event, void *data, ...)
{
int ret = 0;
- time_t now = 0;
switch (event) {
case GF_EVENT_CHILD_DOWN:
case GF_EVENT_SOME_DESCENDENT_DOWN:
case GF_EVENT_CHILD_UP:
case GF_EVENT_SOME_DESCENDENT_UP:
- time(&now);
- nlc_update_child_down_time(this, &now);
+ nlc_update_child_down_time(this, gf_time());
/* TODO: nlc_clear_all_cache (this); else
lru prune will lazily clear it*/
break;
@@ -731,7 +729,7 @@ nlc_init(xlator_t *this)
GF_ATOMIC_INIT(conf->nlc_counter.nlc_invals, 0);
INIT_LIST_HEAD(&conf->lru);
- time(&conf->last_child_down);
+ conf->last_child_down = gf_time();
conf->timer_wheel = glusterfs_ctx_tw_get(this->ctx);
if (!conf->timer_wheel) {
diff --git a/xlators/performance/nl-cache/src/nl-cache.h b/xlators/performance/nl-cache/src/nl-cache.h
index 8b09972bb09..85fcc176342 100644
--- a/xlators/performance/nl-cache/src/nl-cache.h
+++ b/xlators/performance/nl-cache/src/nl-cache.h
@@ -155,7 +155,7 @@ nlc_local_init(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
loc_t *loc, loc_t *loc2);
void
-nlc_update_child_down_time(xlator_t *this, time_t *now);
+nlc_update_child_down_time(xlator_t *this, time_t now);
void
nlc_inode_clear_cache(xlator_t *this, inode_t *inode, int reason);
diff --git a/xlators/performance/open-behind/src/open-behind-messages.h b/xlators/performance/open-behind/src/open-behind-messages.h
index f25082433f8..0e789177684 100644
--- a/xlators/performance/open-behind/src/open-behind-messages.h
+++ b/xlators/performance/open-behind/src/open-behind-messages.h
@@ -23,6 +23,10 @@
*/
GLFS_MSGID(OPEN_BEHIND, OPEN_BEHIND_MSG_XLATOR_CHILD_MISCONFIGURED,
- OPEN_BEHIND_MSG_VOL_MISCONFIGURED, OPEN_BEHIND_MSG_NO_MEMORY);
+ OPEN_BEHIND_MSG_VOL_MISCONFIGURED, OPEN_BEHIND_MSG_NO_MEMORY,
+ OPEN_BEHIND_MSG_FAILED, OPEN_BEHIND_MSG_BAD_STATE);
+
+#define OPEN_BEHIND_MSG_FAILED_STR "Failed to submit fop"
+#define OPEN_BEHIND_MSG_BAD_STATE_STR "Unexpected state"
#endif /* _OPEN_BEHIND_MESSAGES_H_ */
diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c
index cbe89ec82e8..600c3b62ffe 100644
--- a/xlators/performance/open-behind/src/open-behind.c
+++ b/xlators/performance/open-behind/src/open-behind.c
@@ -16,6 +16,18 @@
#include "open-behind-messages.h"
#include <glusterfs/glusterfs-acl.h>
+/* Note: The initial design of open-behind was made to cover the simple case
+ * of open, read, close for small files. This pattern combined with
+ * quick-read can do the whole operation without a single request to the
+ * bricks (except the initial lookup).
+ *
+ * The way to do this has been improved, but the logic remains the same.
+ * Basically, this means that any operation sent to the fd or the inode
+ * that it's not a read, causes the open request to be sent to the
+ * bricks, and all future operations will be executed synchronously,
+ * including opens (it's reset once all fd's are closed).
+ */
+
typedef struct ob_conf {
gf_boolean_t use_anonymous_fd; /* use anonymous FDs wherever safe
e.g - fstat() readv()
@@ -32,1096 +44,811 @@ typedef struct ob_conf {
*/
} ob_conf_t;
-typedef struct ob_inode {
- inode_t *inode;
- struct list_head resume_fops;
- struct list_head ob_fds;
- int count;
- int op_ret;
- int op_errno;
- gf_boolean_t open_in_progress;
- int unlinked;
-} ob_inode_t;
+/* A negative state represents an errno value negated. In this case the
+ * current operation cannot be processed. */
+typedef enum _ob_state {
+ /* There are no opens on the inode or the first open is already
+ * completed. The current operation can be sent directly. */
+ OB_STATE_READY = 0,
-typedef struct ob_fd {
- call_frame_t *open_frame;
- loc_t loc;
- dict_t *xdata;
- int flags;
- int op_errno;
- ob_inode_t *ob_inode;
- fd_t *fd;
- gf_boolean_t opened;
- gf_boolean_t ob_inode_fops_waiting;
- struct list_head list;
- struct list_head ob_fds_on_inode;
-} ob_fd_t;
+ /* There's an open pending and it has been triggered. The current
+ * operation should be "stubbified" and processed with
+ * ob_stub_dispatch(). */
+ OB_STATE_OPEN_TRIGGERED,
-ob_inode_t *
-ob_inode_alloc(inode_t *inode)
-{
- ob_inode_t *ob_inode = NULL;
+ /* There's an open pending but it has not been triggered. The current
+ * operation can be processed directly but using an anonymous fd. */
+ OB_STATE_OPEN_PENDING,
- ob_inode = GF_CALLOC(1, sizeof(*ob_inode), gf_ob_mt_inode_t);
- if (ob_inode == NULL)
- goto out;
+ /* The current operation is the first open on the inode. */
+ OB_STATE_FIRST_OPEN
+} ob_state_t;
- ob_inode->inode = inode;
- INIT_LIST_HEAD(&ob_inode->resume_fops);
- INIT_LIST_HEAD(&ob_inode->ob_fds);
-out:
- return ob_inode;
-}
-
-void
-ob_inode_free(ob_inode_t *ob_inode)
-{
- if (ob_inode == NULL)
- goto out;
+typedef struct ob_inode {
+ /* List of stubs pending on the first open. Once the first open is
+ * complete, all these stubs will be resubmitted, and dependencies
+ * will be checked again. */
+ struct list_head resume_fops;
- list_del_init(&ob_inode->resume_fops);
- list_del_init(&ob_inode->ob_fds);
+ /* The inode this object references. */
+ inode_t *inode;
- GF_FREE(ob_inode);
-out:
- return;
-}
+ /* The fd from the first open sent to this inode. It will be set
+ * from the moment the open is processed until the open if fully
+ * executed or closed before actually opened. It's NULL in all
+ * other cases. */
+ fd_t *first_fd;
+
+ /* The stub from the first open operation. When open fop starts
+ * being processed, it's assigned the OB_OPEN_PREPARING value
+ * until the actual stub is created. This is necessary to avoid
+ * creating the stub inside a locked region. Once the stub is
+ * successfully created, it's assigned here. This value is set
+ * to NULL once the stub is resumed. */
+ call_stub_t *first_open;
+
+ /* The total number of currently open fd's on this inode. */
+ int32_t open_count;
+
+ /* This flag is set as soon as we know that the open will be
+ * sent to the bricks, even before the stub is ready. */
+ bool triggered;
+} ob_inode_t;
-ob_inode_t *
-ob_inode_get(xlator_t *this, inode_t *inode)
+/* Dummy pointer used temporarily while the actual open stub is being created */
+#define OB_OPEN_PREPARING ((call_stub_t *)-1)
+
+#define OB_POST_COMMON(_fop, _xl, _frame, _fd, _args...) \
+ case OB_STATE_FIRST_OPEN: \
+ gf_smsg((_xl)->name, GF_LOG_ERROR, EINVAL, OPEN_BEHIND_MSG_BAD_STATE, \
+ "fop=%s", #_fop, "state=%d", __ob_state, NULL); \
+ default_##_fop##_failure_cbk(_frame, EINVAL); \
+ break; \
+ case OB_STATE_READY: \
+ default_##_fop(_frame, _xl, ##_args); \
+ break; \
+ case OB_STATE_OPEN_TRIGGERED: { \
+ call_stub_t *__ob_stub = fop_##_fop##_stub(_frame, ob_##_fop, \
+ ##_args); \
+ if (__ob_stub != NULL) { \
+ ob_stub_dispatch(_xl, __ob_inode, _fd, __ob_stub); \
+ break; \
+ } \
+ __ob_state = -ENOMEM; \
+ } \
+ default: \
+ gf_smsg((_xl)->name, GF_LOG_ERROR, -__ob_state, \
+ OPEN_BEHIND_MSG_FAILED, "fop=%s", #_fop, NULL); \
+ default_##_fop##_failure_cbk(_frame, -__ob_state)
+
+#define OB_POST_FD(_fop, _xl, _frame, _fd, _trigger, _args...) \
+ do { \
+ ob_inode_t *__ob_inode; \
+ fd_t *__first_fd; \
+ ob_state_t __ob_state = ob_open_and_resume_fd( \
+ _xl, _fd, 0, true, _trigger, &__ob_inode, &__first_fd); \
+ switch (__ob_state) { \
+ case OB_STATE_OPEN_PENDING: \
+ if (!(_trigger)) { \
+ fd_t *__ob_fd = fd_anonymous_with_flags((_fd)->inode, \
+ (_fd)->flags); \
+ if (__ob_fd != NULL) { \
+ default_##_fop(_frame, _xl, ##_args); \
+ fd_unref(__ob_fd); \
+ break; \
+ } \
+ __ob_state = -ENOMEM; \
+ } \
+ OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args); \
+ } \
+ } while (0)
+
+#define OB_POST_FLUSH(_xl, _frame, _fd, _args...) \
+ do { \
+ ob_inode_t *__ob_inode; \
+ fd_t *__first_fd; \
+ ob_state_t __ob_state = ob_open_and_resume_fd( \
+ _xl, _fd, 0, true, false, &__ob_inode, &__first_fd); \
+ switch (__ob_state) { \
+ case OB_STATE_OPEN_PENDING: \
+ default_flush_cbk(_frame, NULL, _xl, 0, 0, NULL); \
+ break; \
+ OB_POST_COMMON(flush, _xl, _frame, __first_fd, ##_args); \
+ } \
+ } while (0)
+
+#define OB_POST_INODE(_fop, _xl, _frame, _inode, _trigger, _args...) \
+ do { \
+ ob_inode_t *__ob_inode; \
+ fd_t *__first_fd; \
+ ob_state_t __ob_state = ob_open_and_resume_inode( \
+ _xl, _inode, NULL, 0, true, _trigger, &__ob_inode, &__first_fd); \
+ switch (__ob_state) { \
+ case OB_STATE_OPEN_PENDING: \
+ OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args); \
+ } \
+ } while (0)
+
+static ob_inode_t *
+ob_inode_get_locked(xlator_t *this, inode_t *inode)
{
ob_inode_t *ob_inode = NULL;
uint64_t value = 0;
- int ret = 0;
- if (!inode)
- goto out;
+ if ((__inode_ctx_get(inode, this, &value) == 0) && (value != 0)) {
+ return (ob_inode_t *)(uintptr_t)value;
+ }
- LOCK(&inode->lock);
- {
- __inode_ctx_get(inode, this, &value);
- if (value == 0) {
- ob_inode = ob_inode_alloc(inode);
- if (ob_inode == NULL)
- goto unlock;
-
- value = (uint64_t)(uintptr_t)ob_inode;
- ret = __inode_ctx_set(inode, this, &value);
- if (ret < 0) {
- ob_inode_free(ob_inode);
- ob_inode = NULL;
- }
- } else {
- ob_inode = (ob_inode_t *)(uintptr_t)value;
+ ob_inode = GF_CALLOC(1, sizeof(*ob_inode), gf_ob_mt_inode_t);
+ if (ob_inode != NULL) {
+ ob_inode->inode = inode;
+ INIT_LIST_HEAD(&ob_inode->resume_fops);
+
+ value = (uint64_t)(uintptr_t)ob_inode;
+ if (__inode_ctx_set(inode, this, &value) < 0) {
+ GF_FREE(ob_inode);
+ ob_inode = NULL;
}
}
-unlock:
- UNLOCK(&inode->lock);
-out:
return ob_inode;
}
-ob_fd_t *
-__ob_fd_ctx_get(xlator_t *this, fd_t *fd)
+static ob_state_t
+ob_open_and_resume_inode(xlator_t *xl, inode_t *inode, fd_t *fd,
+ int32_t open_count, bool synchronous, bool trigger,
+ ob_inode_t **pob_inode, fd_t **pfd)
{
- uint64_t value = 0;
- int ret = -1;
- ob_fd_t *ob_fd = NULL;
+ ob_conf_t *conf;
+ ob_inode_t *ob_inode;
+ call_stub_t *open_stub;
- ret = __fd_ctx_get(fd, this, &value);
- if (ret)
- return NULL;
+ if (inode == NULL) {
+ return OB_STATE_READY;
+ }
- ob_fd = (void *)((long)value);
+ conf = xl->private;
- return ob_fd;
-}
+ *pfd = NULL;
-ob_fd_t *
-ob_fd_ctx_get(xlator_t *this, fd_t *fd)
-{
- ob_fd_t *ob_fd = NULL;
-
- LOCK(&fd->lock);
+ LOCK(&inode->lock);
{
- ob_fd = __ob_fd_ctx_get(this, fd);
- }
- UNLOCK(&fd->lock);
-
- return ob_fd;
-}
+ ob_inode = ob_inode_get_locked(xl, inode);
+ if (ob_inode == NULL) {
+ UNLOCK(&inode->lock);
-int
-__ob_fd_ctx_set(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
-{
- uint64_t value = 0;
- int ret = -1;
+ return -ENOMEM;
+ }
+ *pob_inode = ob_inode;
+
+ ob_inode->open_count += open_count;
+
+ /* If first_fd is not NULL, it means that there's a previous open not
+ * yet completed. */
+ if (ob_inode->first_fd != NULL) {
+ *pfd = ob_inode->first_fd;
+ /* If the current request doesn't trigger the open and it hasn't
+ * been triggered yet, we can continue without issuing the open
+ * only if the current request belongs to the same fd as the
+ * first one. */
+ if (!trigger && !ob_inode->triggered &&
+ (ob_inode->first_fd == fd)) {
+ UNLOCK(&inode->lock);
+
+ return OB_STATE_OPEN_PENDING;
+ }
- value = (long)((void *)ob_fd);
+ /* We need to issue the open. It could have already been triggered
+ * before. In this case open_stub will be NULL. Or the initial open
+ * may not be completely ready yet. In this case open_stub will be
+ * OB_OPEN_PREPARING. */
+ open_stub = ob_inode->first_open;
+ ob_inode->first_open = NULL;
+ ob_inode->triggered = true;
- ret = __fd_ctx_set(fd, this, value);
+ UNLOCK(&inode->lock);
- return ret;
-}
+ if ((open_stub != NULL) && (open_stub != OB_OPEN_PREPARING)) {
+ call_resume(open_stub);
+ }
-int
-ob_fd_ctx_set(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
-{
- int ret = -1;
+ return OB_STATE_OPEN_TRIGGERED;
+ }
- LOCK(&fd->lock);
- {
- ret = __ob_fd_ctx_set(this, fd, ob_fd);
- }
- UNLOCK(&fd->lock);
+ /* There's no pending open. Only opens can be non synchronous, so all
+ * regular fops will be processed directly. For non synchronous opens,
+ * we'll still process them normally (i.e. synchornous) if there are
+ * more file descriptors open. */
+ if (synchronous || (ob_inode->open_count > open_count)) {
+ UNLOCK(&inode->lock);
- return ret;
-}
+ return OB_STATE_READY;
+ }
-ob_fd_t *
-ob_fd_new(void)
-{
- ob_fd_t *ob_fd = NULL;
+ *pfd = fd;
- ob_fd = GF_CALLOC(1, sizeof(*ob_fd), gf_ob_mt_fd_t);
+ /* This is the first open. We keep a reference on the fd and set
+ * first_open stub to OB_OPEN_PREPARING until the actual stub can
+ * be assigned (we don't create the stub here to avoid doing memory
+ * allocations inside the mutex). */
+ ob_inode->first_fd = __fd_ref(fd);
+ ob_inode->first_open = OB_OPEN_PREPARING;
- INIT_LIST_HEAD(&ob_fd->list);
- INIT_LIST_HEAD(&ob_fd->ob_fds_on_inode);
+ /* If lazy_open is not set, we'll need to immediately send the open,
+ * so we set triggered right now. */
+ ob_inode->triggered = !conf->lazy_open;
+ }
+ UNLOCK(&inode->lock);
- return ob_fd;
+ return OB_STATE_FIRST_OPEN;
}
-void
-ob_fd_free(ob_fd_t *ob_fd)
+static ob_state_t
+ob_open_and_resume_fd(xlator_t *xl, fd_t *fd, int32_t open_count,
+ bool synchronous, bool trigger, ob_inode_t **pob_inode,
+ fd_t **pfd)
{
- LOCK(&ob_fd->fd->inode->lock);
- {
- list_del_init(&ob_fd->ob_fds_on_inode);
- }
- UNLOCK(&ob_fd->fd->inode->lock);
-
- loc_wipe(&ob_fd->loc);
-
- if (ob_fd->xdata)
- dict_unref(ob_fd->xdata);
+ uint64_t err;
- if (ob_fd->open_frame) {
- /* If we sill have a frame it means that background open has never
- * been triggered. We need to release the pending reference. */
- fd_unref(ob_fd->fd);
-
- STACK_DESTROY(ob_fd->open_frame->root);
+ if ((fd_ctx_get(fd, xl, &err) == 0) && (err != 0)) {
+ return (ob_state_t)-err;
}
- GF_FREE(ob_fd);
+ return ob_open_and_resume_inode(xl, fd->inode, fd, open_count, synchronous,
+ trigger, pob_inode, pfd);
}
-int
-ob_wake_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, fd_t *fd_ret, dict_t *xdata)
+static ob_state_t
+ob_open_behind(xlator_t *xl, fd_t *fd, int32_t flags, ob_inode_t **pob_inode,
+ fd_t **pfd)
{
- fd_t *fd = NULL;
- int count = 0;
- int ob_inode_op_ret = 0;
- int ob_inode_op_errno = 0;
- ob_fd_t *ob_fd = NULL;
- call_stub_t *stub = NULL, *tmp = NULL;
- ob_inode_t *ob_inode = NULL;
- gf_boolean_t ob_inode_fops_waiting = _gf_false;
- struct list_head fops_waiting_on_fd, fops_waiting_on_inode;
+ bool synchronous;
- fd = frame->local;
- frame->local = NULL;
-
- INIT_LIST_HEAD(&fops_waiting_on_fd);
- INIT_LIST_HEAD(&fops_waiting_on_inode);
+ /* TODO: If O_CREAT, O_APPEND, O_WRONLY or O_DIRECT are specified, shouldn't
+ * we also execute this open synchronously ? */
+ synchronous = (flags & O_TRUNC) != 0;
- ob_inode = ob_inode_get(this, fd->inode);
+ return ob_open_and_resume_fd(xl, fd, 1, synchronous, true, pob_inode, pfd);
+}
- LOCK(&fd->lock);
+static int32_t
+ob_stub_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
+ call_stub_t *stub)
+{
+ LOCK(&ob_inode->inode->lock);
{
- ob_fd = __ob_fd_ctx_get(this, fd);
- ob_fd->opened = _gf_true;
-
- ob_inode_fops_waiting = ob_fd->ob_inode_fops_waiting;
-
- list_splice_init(&ob_fd->list, &fops_waiting_on_fd);
-
- if (op_ret < 0) {
- /* mark fd BAD for ever */
- ob_fd->op_errno = op_errno;
- ob_fd = NULL; /*shouldn't be freed*/
- } else {
- __fd_ctx_del(fd, this, NULL);
- }
- }
- UNLOCK(&fd->lock);
-
- if (ob_inode_fops_waiting) {
- LOCK(&fd->inode->lock);
- {
- count = --ob_inode->count;
- if (op_ret < 0) {
- /* TODO: when to reset the error? */
- ob_inode->op_ret = -1;
- ob_inode->op_errno = op_errno;
- }
-
- if (count == 0) {
- ob_inode->open_in_progress = _gf_false;
- ob_inode_op_ret = ob_inode->op_ret;
- ob_inode_op_errno = ob_inode->op_errno;
- list_splice_init(&ob_inode->resume_fops,
- &fops_waiting_on_inode);
- }
+ /* We only queue a stub if the open has not been completed or
+ * cancelled. */
+ if (ob_inode->first_fd == fd) {
+ list_add_tail(&stub->list, &ob_inode->resume_fops);
+ stub = NULL;
}
- UNLOCK(&fd->inode->lock);
- }
-
- if (ob_fd)
- ob_fd_free(ob_fd);
-
- list_for_each_entry_safe(stub, tmp, &fops_waiting_on_fd, list)
- {
- list_del_init(&stub->list);
-
- if (op_ret < 0)
- call_unwind_error(stub, -1, op_errno);
- else
- call_resume(stub);
}
+ UNLOCK(&ob_inode->inode->lock);
- list_for_each_entry_safe(stub, tmp, &fops_waiting_on_inode, list)
- {
- list_del_init(&stub->list);
-
- if (ob_inode_op_ret < 0)
- call_unwind_error(stub, -1, ob_inode_op_errno);
- else
- call_resume(stub);
+ if (stub != NULL) {
+ call_resume(stub);
}
- /* The background open is completed. We can release the 'fd' reference. */
- fd_unref(fd);
-
- STACK_DESTROY(frame->root);
-
return 0;
}
-int
-ob_fd_wake(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
+static void
+ob_open_destroy(call_stub_t *stub, fd_t *fd)
{
- call_frame_t *frame = NULL;
+ stub->frame->local = NULL;
+ STACK_DESTROY(stub->frame->root);
+ call_stub_destroy(stub);
+ fd_unref(fd);
+}
- if (ob_fd == NULL) {
- LOCK(&fd->lock);
- {
- ob_fd = __ob_fd_ctx_get(this, fd);
- if (!ob_fd)
- goto unlock;
+static int32_t
+ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
+ call_stub_t *stub)
+{
+ bool closed;
- frame = ob_fd->open_frame;
- ob_fd->open_frame = NULL;
- }
- unlock:
- UNLOCK(&fd->lock);
- } else {
- LOCK(&fd->lock);
- {
- frame = ob_fd->open_frame;
- ob_fd->open_frame = NULL;
+ LOCK(&ob_inode->inode->lock);
+ {
+ closed = ob_inode->first_fd != fd;
+ if (!closed) {
+ if (ob_inode->triggered) {
+ ob_inode->first_open = NULL;
+ } else {
+ ob_inode->first_open = stub;
+ stub = NULL;
+ }
}
- UNLOCK(&fd->lock);
}
+ UNLOCK(&ob_inode->inode->lock);
- if (frame) {
- /* We don't need to take a reference here. We already have a reference
- * while the open is pending. */
- frame->local = fd;
-
- STACK_WIND(frame, ob_wake_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &ob_fd->loc, ob_fd->flags, fd,
- ob_fd->xdata);
+ if (stub != NULL) {
+ if (closed) {
+ ob_open_destroy(stub, fd);
+ } else {
+ call_resume(stub);
+ }
}
return 0;
}
-void
-ob_inode_wake(xlator_t *this, struct list_head *ob_fds)
+static void
+ob_resume_pending(struct list_head *list)
{
- ob_fd_t *ob_fd = NULL, *tmp = NULL;
+ call_stub_t *stub;
- if (!list_empty(ob_fds)) {
- list_for_each_entry_safe(ob_fd, tmp, ob_fds, ob_fds_on_inode)
- {
- ob_fd_wake(this, ob_fd->fd, ob_fd);
- ob_fd_free(ob_fd);
- }
- }
-}
+ while (!list_empty(list)) {
+ stub = list_first_entry(list, call_stub_t, list);
+ list_del_init(&stub->list);
-/* called holding inode->lock and fd->lock */
-void
-ob_fd_copy(ob_fd_t *src, ob_fd_t *dst)
-{
- if (!src || !dst)
- goto out;
-
- dst->fd = src->fd;
- dst->loc.inode = inode_ref(src->loc.inode);
- gf_uuid_copy(dst->loc.gfid, src->loc.gfid);
- dst->flags = src->flags;
- dst->xdata = dict_ref(src->xdata);
- dst->ob_inode = src->ob_inode;
-out:
- return;
+ call_resume(stub);
+ }
}
-int
-open_all_pending_fds_and_resume(xlator_t *this, inode_t *inode,
- call_stub_t *stub)
+static void
+ob_open_completed(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, int32_t op_ret,
+ int32_t op_errno)
{
- ob_inode_t *ob_inode = NULL;
- ob_fd_t *ob_fd = NULL, *tmp = NULL;
- gf_boolean_t was_open_in_progress = _gf_false;
- gf_boolean_t wait_for_open = _gf_false;
- struct list_head ob_fds;
+ struct list_head list;
- ob_inode = ob_inode_get(this, inode);
- if (ob_inode == NULL)
- goto out;
+ INIT_LIST_HEAD(&list);
- INIT_LIST_HEAD(&ob_fds);
+ if (op_ret < 0) {
+ fd_ctx_set(fd, xl, op_errno <= 0 ? EIO : op_errno);
+ }
- LOCK(&inode->lock);
+ LOCK(&ob_inode->inode->lock);
{
- was_open_in_progress = ob_inode->open_in_progress;
- ob_inode->unlinked = 1;
-
- if (was_open_in_progress) {
- list_add_tail(&stub->list, &ob_inode->resume_fops);
- goto inode_unlock;
- }
-
- list_for_each_entry(ob_fd, &ob_inode->ob_fds, ob_fds_on_inode)
- {
- LOCK(&ob_fd->fd->lock);
- {
- if (ob_fd->opened)
- goto fd_unlock;
-
- ob_inode->count++;
- ob_fd->ob_inode_fops_waiting = _gf_true;
-
- if (ob_fd->open_frame == NULL) {
- /* open in progress no need of wake */
- } else {
- tmp = ob_fd_new();
- tmp->open_frame = ob_fd->open_frame;
- ob_fd->open_frame = NULL;
-
- ob_fd_copy(ob_fd, tmp);
- list_add_tail(&tmp->ob_fds_on_inode, &ob_fds);
- }
- }
- fd_unlock:
- UNLOCK(&ob_fd->fd->lock);
- }
-
- if (ob_inode->count) {
- wait_for_open = ob_inode->open_in_progress = _gf_true;
- list_add_tail(&stub->list, &ob_inode->resume_fops);
+ /* Only update the fields if the file has not been closed before
+ * getting here. */
+ if (ob_inode->first_fd == fd) {
+ list_splice_init(&ob_inode->resume_fops, &list);
+ ob_inode->first_fd = NULL;
+ ob_inode->first_open = NULL;
+ ob_inode->triggered = false;
}
}
-inode_unlock:
- UNLOCK(&inode->lock);
+ UNLOCK(&ob_inode->inode->lock);
-out:
- if (!was_open_in_progress) {
- if (!wait_for_open) {
- call_resume(stub);
- } else {
- ob_inode_wake(this, &ob_fds);
- }
- }
+ ob_resume_pending(&list);
- return 0;
+ fd_unref(fd);
}
-int
-open_and_resume(xlator_t *this, fd_t *fd, call_stub_t *stub)
+static int32_t
+ob_open_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- ob_fd_t *ob_fd = NULL;
- int op_errno = 0;
-
- if (!fd)
- goto nofd;
-
- LOCK(&fd->lock);
- {
- ob_fd = __ob_fd_ctx_get(this, fd);
- if (!ob_fd)
- goto unlock;
+ ob_inode_t *ob_inode;
- if (ob_fd->op_errno) {
- op_errno = ob_fd->op_errno;
- goto unlock;
- }
+ ob_inode = frame->local;
+ frame->local = NULL;
- list_add_tail(&stub->list, &ob_fd->list);
- }
-unlock:
- UNLOCK(&fd->lock);
+ ob_open_completed(xl, ob_inode, cookie, op_ret, op_errno);
-nofd:
- if (op_errno)
- call_unwind_error(stub, -1, op_errno);
- else if (ob_fd)
- ob_fd_wake(this, fd, NULL);
- else
- call_resume(stub);
+ STACK_DESTROY(frame->root);
return 0;
}
-int
-ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+static int32_t
+ob_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
fd_t *fd, dict_t *xdata)
{
- ob_fd_t *ob_fd = NULL;
- int ret = -1;
- ob_conf_t *conf = NULL;
- ob_inode_t *ob_inode = NULL;
- gf_boolean_t open_in_progress = _gf_false;
- int unlinked = 0;
+ STACK_WIND_COOKIE(frame, ob_open_cbk, fd, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
- conf = this->private;
+ return 0;
+}
- if (flags & O_TRUNC) {
- STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
- return 0;
+static int32_t
+ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
+ dict_t *xdata)
+{
+ ob_inode_t *ob_inode;
+ call_frame_t *open_frame;
+ call_stub_t *stub;
+ fd_t *first_fd;
+ ob_state_t state;
+
+ state = ob_open_behind(this, fd, flags, &ob_inode, &first_fd);
+ if (state == OB_STATE_READY) {
+ /* There's no pending open, but there are other file descriptors opened
+ * or the current flags require a synchronous open. */
+ return default_open(frame, this, loc, flags, fd, xdata);
}
- ob_inode = ob_inode_get(this, fd->inode);
-
- ob_fd = ob_fd_new();
- if (!ob_fd)
- goto enomem;
-
- ob_fd->ob_inode = ob_inode;
+ if (state == OB_STATE_OPEN_TRIGGERED) {
+ /* The first open is in progress (either because it was already issued
+ * or because this request triggered it). We try to create a new stub
+ * to retry the operation once the initial open completes. */
+ stub = fop_open_stub(frame, ob_open, loc, flags, fd, xdata);
+ if (stub != NULL) {
+ return ob_stub_dispatch(this, ob_inode, first_fd, stub);
+ }
- ob_fd->fd = fd;
+ state = -ENOMEM;
+ }
- ob_fd->open_frame = copy_frame(frame);
- if (!ob_fd->open_frame)
- goto enomem;
- ret = loc_copy(&ob_fd->loc, loc);
- if (ret)
- goto enomem;
+ if (state == OB_STATE_FIRST_OPEN) {
+ /* We try to create a stub for the new open. A new frame needs to be
+ * used because the current one may be destroyed soon after sending
+ * the open's reply. */
+ open_frame = copy_frame(frame);
+ if (open_frame != NULL) {
+ stub = fop_open_stub(open_frame, ob_open_resume, loc, flags, fd,
+ xdata);
+ if (stub != NULL) {
+ open_frame->local = ob_inode;
- ob_fd->flags = flags;
- if (xdata)
- ob_fd->xdata = dict_ref(xdata);
+ /* TODO: Previous version passed xdata back to the caller, but
+ * probably this doesn't make sense since it won't contain
+ * any requested data. I think it would be better to pass
+ * NULL for xdata. */
+ default_open_cbk(frame, NULL, this, 0, 0, fd, xdata);
- LOCK(&fd->inode->lock);
- {
- open_in_progress = ob_inode->open_in_progress;
- unlinked = ob_inode->unlinked;
- if (!open_in_progress && !unlinked) {
- ret = ob_fd_ctx_set(this, fd, ob_fd);
- if (ret) {
- UNLOCK(&fd->inode->lock);
- goto enomem;
+ return ob_open_dispatch(this, ob_inode, first_fd, stub);
}
- list_add(&ob_fd->ob_fds_on_inode, &ob_inode->ob_fds);
+ STACK_DESTROY(open_frame->root);
}
- }
- UNLOCK(&fd->inode->lock);
- /* We take a reference while the background open is pending or being
- * processed. If we finally wind the request in the foreground, then
- * ob_fd_free() will take care of this additional reference. */
- fd_ref(fd);
+ /* In case of error, simulate a regular completion but with an error
+ * code. */
+ ob_open_completed(this, ob_inode, first_fd, -1, ENOMEM);
- if (!open_in_progress && !unlinked) {
- STACK_UNWIND_STRICT(open, frame, 0, 0, fd, xdata);
-
- if (!conf->lazy_open)
- ob_fd_wake(this, fd, NULL);
- } else {
- ob_fd_free(ob_fd);
- STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ state = -ENOMEM;
}
- return 0;
-enomem:
- if (ob_fd) {
- if (ob_fd->open_frame)
- STACK_DESTROY(ob_fd->open_frame->root);
-
- loc_wipe(&ob_fd->loc);
- if (ob_fd->xdata)
- dict_unref(ob_fd->xdata);
+ /* In case of failure we need to decrement the number of open files because
+ * ob_fdclose() won't be called. */
- GF_FREE(ob_fd);
+ LOCK(&fd->inode->lock);
+ {
+ ob_inode->open_count--;
}
+ UNLOCK(&fd->inode->lock);
- return -1;
+ gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s",
+ "open", "path=%s", loc->path, NULL);
+
+ return default_open_failure_cbk(frame, -state);
}
-int
-ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
- dict_t *xdata)
+static int32_t
+ob_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- fd_t *old_fd = NULL;
- int ret = -1;
- int op_errno = ENOMEM;
- call_stub_t *stub = NULL;
-
- old_fd = fd_lookup(fd->inode, 0);
- if (old_fd) {
- /* open-behind only when this is the first FD */
- stub = fop_open_stub(frame, default_open_resume, loc, flags, fd, xdata);
- if (!stub) {
- fd_unref(old_fd);
- goto err;
- }
-
- open_and_resume(this, old_fd, stub);
-
- fd_unref(old_fd);
-
- return 0;
- }
-
- ret = ob_open_behind(frame, this, loc, flags, fd, xdata);
- if (ret) {
- goto err;
+ ob_inode_t *ob_inode;
+ call_stub_t *stub;
+ fd_t *first_fd;
+ ob_state_t state;
+
+ /* Create requests are never delayed. We always send them synchronously. */
+ state = ob_open_and_resume_fd(this, fd, 1, true, true, &ob_inode,
+ &first_fd);
+ if (state == OB_STATE_READY) {
+ /* There's no pending open, but there are other file descriptors opened
+ * so we simply forward the request synchronously. */
+ return default_create(frame, this, loc, flags, mode, umask, fd, xdata);
}
- return 0;
-err:
- gf_msg(this->name, GF_LOG_ERROR, op_errno, OPEN_BEHIND_MSG_NO_MEMORY, "%s",
- loc->path);
-
- STACK_UNWIND_STRICT(open, frame, -1, op_errno, 0, 0);
-
- return 0;
-}
+ if (state == OB_STATE_OPEN_TRIGGERED) {
+ /* The first open is in progress (either because it was already issued
+ * or because this request triggered it). We try to create a new stub
+ * to retry the operation once the initial open completes. */
+ stub = fop_create_stub(frame, ob_create, loc, flags, mode, umask, fd,
+ xdata);
+ if (stub != NULL) {
+ return ob_stub_dispatch(this, ob_inode, first_fd, stub);
+ }
-fd_t *
-ob_get_wind_fd(xlator_t *this, fd_t *fd, uint32_t *flag)
-{
- fd_t *wind_fd = NULL;
- ob_fd_t *ob_fd = NULL;
- ob_conf_t *conf = NULL;
+ state = -ENOMEM;
+ }
- conf = this->private;
+ /* Since we forced a synchronous request, OB_STATE_FIRST_OPEN will never
+ * be returned by ob_open_and_resume_fd(). If we are here it can only be
+ * because there has been a problem. */
- ob_fd = ob_fd_ctx_get(this, fd);
+ /* In case of failure we need to decrement the number of open files because
+ * ob_fdclose() won't be called. */
- if (ob_fd && ob_fd->open_frame && conf->use_anonymous_fd) {
- wind_fd = fd_anonymous(fd->inode);
- if ((ob_fd->flags & O_DIRECT) && (flag))
- *flag = *flag | O_DIRECT;
- } else {
- wind_fd = fd_ref(fd);
+ LOCK(&fd->inode->lock);
+ {
+ ob_inode->open_count--;
}
+ UNLOCK(&fd->inode->lock);
+
+ gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s",
+ "create", "path=%s", loc->path, NULL);
- return wind_fd;
+ return default_create_failure_cbk(frame, -state);
}
-int
+static int32_t
ob_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t offset, uint32_t flags, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- fd_t *wind_fd = NULL;
- ob_conf_t *conf = NULL;
-
- conf = this->private;
+ ob_conf_t *conf = this->private;
+ bool trigger = conf->read_after_open || !conf->use_anonymous_fd;
- if (!conf->read_after_open)
- wind_fd = ob_get_wind_fd(this, fd, &flags);
- else
- wind_fd = fd_ref(fd);
-
- stub = fop_readv_stub(frame, default_readv_resume, wind_fd, size, offset,
- flags, xdata);
- fd_unref(wind_fd);
-
- if (!stub)
- goto err;
-
- open_and_resume(this, wind_fd, stub);
-
- return 0;
-err:
- STACK_UNWIND_STRICT(readv, frame, -1, ENOMEM, 0, 0, 0, 0, 0);
+ OB_POST_FD(readv, this, frame, fd, trigger, fd, size, offset, flags, xdata);
return 0;
}
-int
+static int32_t
ob_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov,
int count, off_t offset, uint32_t flags, struct iobref *iobref,
dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- stub = fop_writev_stub(frame, default_writev_resume, fd, iov, count, offset,
- flags, iobref, xdata);
- if (!stub)
- goto err;
-
- open_and_resume(this, fd, stub);
-
- return 0;
-err:
- STACK_UNWIND_STRICT(writev, frame, -1, ENOMEM, 0, 0, 0);
+ OB_POST_FD(writev, this, frame, fd, true, fd, iov, count, offset, flags,
+ iobref, xdata);
return 0;
}
-int
+static int32_t
ob_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- fd_t *wind_fd = NULL;
-
- wind_fd = ob_get_wind_fd(this, fd, NULL);
-
- stub = fop_fstat_stub(frame, default_fstat_resume, wind_fd, xdata);
-
- fd_unref(wind_fd);
-
- if (!stub)
- goto err;
-
- open_and_resume(this, wind_fd, stub);
+ ob_conf_t *conf = this->private;
+ bool trigger = !conf->use_anonymous_fd;
- return 0;
-err:
- STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0);
+ OB_POST_FD(fstat, this, frame, fd, trigger, fd, xdata);
return 0;
}
-int
+static int32_t
ob_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
gf_seek_what_t what, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- fd_t *wind_fd = NULL;
-
- wind_fd = ob_get_wind_fd(this, fd, NULL);
+ ob_conf_t *conf = this->private;
+ bool trigger = !conf->use_anonymous_fd;
- stub = fop_seek_stub(frame, default_seek_resume, wind_fd, offset, what,
- xdata);
-
- fd_unref(wind_fd);
-
- if (!stub)
- goto err;
-
- open_and_resume(this, wind_fd, stub);
-
- return 0;
-err:
- STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0);
+ OB_POST_FD(seek, this, frame, fd, trigger, fd, offset, what, xdata);
return 0;
}
-int
+static int32_t
ob_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- ob_fd_t *ob_fd = NULL;
- gf_boolean_t unwind = _gf_false;
-
- LOCK(&fd->lock);
- {
- ob_fd = __ob_fd_ctx_get(this, fd);
- if (ob_fd && ob_fd->open_frame)
- /* if open() was never wound to backend,
- no need to wind flush() either.
- */
- unwind = _gf_true;
- }
- UNLOCK(&fd->lock);
-
- if (unwind)
- goto unwind;
-
- stub = fop_flush_stub(frame, default_flush_resume, fd, xdata);
- if (!stub)
- goto err;
-
- open_and_resume(this, fd, stub);
-
- return 0;
-err:
- STACK_UNWIND_STRICT(flush, frame, -1, ENOMEM, 0);
-
- return 0;
-
-unwind:
- STACK_UNWIND_STRICT(flush, frame, 0, 0, 0);
+ OB_POST_FLUSH(this, frame, fd, fd, xdata);
return 0;
}
-int
+static int32_t
ob_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int flag, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- stub = fop_fsync_stub(frame, default_fsync_resume, fd, flag, xdata);
- if (!stub)
- goto err;
-
- open_and_resume(this, fd, stub);
-
- return 0;
-err:
- STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, 0, 0, 0);
+ OB_POST_FD(fsync, this, frame, fd, true, fd, flag, xdata);
return 0;
}
-int
+static int32_t
ob_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
struct gf_flock *flock, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- stub = fop_lk_stub(frame, default_lk_resume, fd, cmd, flock, xdata);
- if (!stub)
- goto err;
-
- open_and_resume(this, fd, stub);
-
- return 0;
-err:
- STACK_UNWIND_STRICT(lk, frame, -1, ENOMEM, 0, 0);
+ OB_POST_FD(lk, this, frame, fd, true, fd, cmd, flock, xdata);
return 0;
}
-int
+static int32_t
ob_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- stub = fop_ftruncate_stub(frame, default_ftruncate_resume, fd, offset,
- xdata);
- if (!stub)
- goto err;
-
- open_and_resume(this, fd, stub);
-
- return 0;
-err:
- STACK_UNWIND_STRICT(ftruncate, frame, -1, ENOMEM, 0, 0, 0);
+ OB_POST_FD(ftruncate, this, frame, fd, true, fd, offset, xdata);
return 0;
}
-int
+static int32_t
ob_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr,
int flags, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- stub = fop_fsetxattr_stub(frame, default_fsetxattr_resume, fd, xattr, flags,
- xdata);
- if (!stub)
- goto err;
-
- open_and_resume(this, fd, stub);
-
- return 0;
-err:
- STACK_UNWIND_STRICT(fsetxattr, frame, -1, ENOMEM, 0);
+ OB_POST_FD(fsetxattr, this, frame, fd, true, fd, xattr, flags, xdata);
return 0;
}
-int
+static int32_t
ob_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- stub = fop_fgetxattr_stub(frame, default_fgetxattr_resume, fd, name, xdata);
- if (!stub)
- goto err;
-
- open_and_resume(this, fd, stub);
-
- return 0;
-err:
- STACK_UNWIND_STRICT(fgetxattr, frame, -1, ENOMEM, 0, 0);
+ OB_POST_FD(fgetxattr, this, frame, fd, true, fd, name, xdata);
return 0;
}
-int
+static int32_t
ob_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- stub = fop_fremovexattr_stub(frame, default_fremovexattr_resume, fd, name,
- xdata);
- if (!stub)
- goto err;
-
- open_and_resume(this, fd, stub);
-
- return 0;
-err:
- STACK_UNWIND_STRICT(fremovexattr, frame, -1, ENOMEM, 0);
+ OB_POST_FD(fremovexattr, this, frame, fd, true, fd, name, xdata);
return 0;
}
-int
+static int32_t
ob_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
int cmd, struct gf_flock *flock, dict_t *xdata)
{
- call_stub_t *stub = fop_finodelk_stub(frame, default_finodelk_resume,
- volume, fd, cmd, flock, xdata);
- if (stub)
- open_and_resume(this, fd, stub);
- else
- STACK_UNWIND_STRICT(finodelk, frame, -1, ENOMEM, 0);
+ OB_POST_FD(finodelk, this, frame, fd, true, volume, fd, cmd, flock, xdata);
return 0;
}
-int
+static int32_t
ob_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
const char *basename, entrylk_cmd cmd, entrylk_type type,
dict_t *xdata)
{
- call_stub_t *stub = fop_fentrylk_stub(
- frame, default_fentrylk_resume, volume, fd, basename, cmd, type, xdata);
- if (stub)
- open_and_resume(this, fd, stub);
- else
- STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOMEM, 0);
+ OB_POST_FD(fentrylk, this, frame, fd, true, volume, fd, basename, cmd, type,
+ xdata);
return 0;
}
-int
+static int32_t
ob_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
- call_stub_t *stub = fop_fxattrop_stub(frame, default_fxattrop_resume, fd,
- optype, xattr, xdata);
- if (stub)
- open_and_resume(this, fd, stub);
- else
- STACK_UNWIND_STRICT(fxattrop, frame, -1, ENOMEM, 0, 0);
+ OB_POST_FD(fxattrop, this, frame, fd, true, fd, optype, xattr, xdata);
return 0;
}
-int
+static int32_t
ob_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *iatt,
int valid, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- stub = fop_fsetattr_stub(frame, default_fsetattr_resume, fd, iatt, valid,
- xdata);
- if (!stub)
- goto err;
-
- open_and_resume(this, fd, stub);
-
- return 0;
-err:
- STACK_UNWIND_STRICT(fsetattr, frame, -1, ENOMEM, 0, 0, 0);
+ OB_POST_FD(fsetattr, this, frame, fd, true, fd, iatt, valid, xdata);
return 0;
}
-int
+static int32_t
ob_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
off_t offset, size_t len, dict_t *xdata)
{
- call_stub_t *stub;
+ OB_POST_FD(fallocate, this, frame, fd, true, fd, mode, offset, len, xdata);
- stub = fop_fallocate_stub(frame, default_fallocate_resume, fd, mode, offset,
- len, xdata);
- if (!stub)
- goto err;
-
- open_and_resume(this, fd, stub);
-
- return 0;
-err:
- STACK_UNWIND_STRICT(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
-int
+static int32_t
ob_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
size_t len, dict_t *xdata)
{
- call_stub_t *stub;
-
- stub = fop_discard_stub(frame, default_discard_resume, fd, offset, len,
- xdata);
- if (!stub)
- goto err;
-
- open_and_resume(this, fd, stub);
+ OB_POST_FD(discard, this, frame, fd, true, fd, offset, len, xdata);
return 0;
-err:
- STACK_UNWIND_STRICT(discard, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
}
-int
+static int32_t
ob_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
off_t len, dict_t *xdata)
{
- call_stub_t *stub;
-
- stub = fop_zerofill_stub(frame, default_zerofill_resume, fd, offset, len,
- xdata);
- if (!stub)
- goto err;
+ OB_POST_FD(zerofill, this, frame, fd, true, fd, offset, len, xdata);
- open_and_resume(this, fd, stub);
-
- return 0;
-err:
- STACK_UNWIND_STRICT(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
-int
+static int32_t
ob_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- stub = fop_unlink_stub(frame, default_unlink_resume, loc, xflags, xdata);
- if (!stub)
- goto err;
-
- open_all_pending_fds_and_resume(this, loc->inode, stub);
-
- return 0;
-err:
- STACK_UNWIND_STRICT(unlink, frame, -1, ENOMEM, 0, 0, 0);
+ OB_POST_INODE(unlink, this, frame, loc->inode, true, loc, xflags, xdata);
return 0;
}
-int
+static int32_t
ob_rename(call_frame_t *frame, xlator_t *this, loc_t *src, loc_t *dst,
dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- stub = fop_rename_stub(frame, default_rename_resume, src, dst, xdata);
- if (!stub)
- goto err;
-
- open_all_pending_fds_and_resume(this, dst->inode, stub);
-
- return 0;
-err:
- STACK_UNWIND_STRICT(rename, frame, -1, ENOMEM, 0, 0, 0, 0, 0, 0);
+ OB_POST_INODE(rename, this, frame, dst->inode, true, src, dst, xdata);
return 0;
}
-int32_t
+static int32_t
ob_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
int32_t valid, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- stub = fop_setattr_stub(frame, default_setattr_resume, loc, stbuf, valid,
- xdata);
- if (!stub)
- goto err;
+ OB_POST_INODE(setattr, this, frame, loc->inode, true, loc, stbuf, valid,
+ xdata);
- open_all_pending_fds_and_resume(this, loc->inode, stub);
-
- return 0;
-err:
- STACK_UNWIND_STRICT(setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
-int32_t
+static int32_t
ob_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
int32_t flags, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- gf_boolean_t access_xattr = _gf_false;
-
if (dict_get(dict, POSIX_ACL_DEFAULT_XATTR) ||
dict_get(dict, POSIX_ACL_ACCESS_XATTR) ||
- dict_get(dict, GF_SELINUX_XATTR_KEY))
- access_xattr = _gf_true;
-
- if (!access_xattr)
+ dict_get(dict, GF_SELINUX_XATTR_KEY)) {
return default_setxattr(frame, this, loc, dict, flags, xdata);
+ }
- stub = fop_setxattr_stub(frame, default_setxattr_resume, loc, dict, flags,
- xdata);
- if (!stub)
- goto err;
-
- open_all_pending_fds_and_resume(this, loc->inode, stub);
+ OB_POST_INODE(setxattr, this, frame, loc->inode, true, loc, dict, flags,
+ xdata);
return 0;
-err:
- STACK_UNWIND_STRICT(setxattr, frame, -1, ENOMEM, NULL);
- return 0;
}
-int
-ob_release(xlator_t *this, fd_t *fd)
+static void
+ob_fdclose(xlator_t *this, fd_t *fd)
{
- ob_fd_t *ob_fd = NULL;
+ struct list_head list;
+ ob_inode_t *ob_inode;
+ call_stub_t *stub;
+
+ INIT_LIST_HEAD(&list);
+ stub = NULL;
- ob_fd = ob_fd_ctx_get(this, fd);
+ LOCK(&fd->inode->lock);
+ {
+ ob_inode = ob_inode_get_locked(this, fd->inode);
+ if (ob_inode != NULL) {
+ ob_inode->open_count--;
+
+ /* If this fd is the same as ob_inode->first_fd, it means that
+ * the initial open has not fully completed. We'll try to cancel
+ * it. */
+ if (ob_inode->first_fd == fd) {
+ if (ob_inode->first_open == OB_OPEN_PREPARING) {
+ /* In this case ob_open_dispatch() has not been called yet.
+ * We clear first_fd and first_open to allow that function
+ * to know that the open is not really needed. This also
+ * allows other requests to work as expected if they
+ * arrive before the dispatch function is called. If there
+ * are pending fops, we can directly process them here.
+ * (note that there shouldn't be any fd related fops, but
+ * if there are, it's fine if they fail). */
+ ob_inode->first_fd = NULL;
+ ob_inode->first_open = NULL;
+ ob_inode->triggered = false;
+ list_splice_init(&ob_inode->resume_fops, &list);
+ } else if (!ob_inode->triggered) {
+ /* If the open has already been dispatched, we can only
+ * cancel it if it has not been triggered. Otherwise we
+ * simply wait until it completes. While it's not triggered,
+ * first_open must be a valid stub and there can't be any
+ * pending fops. */
+ GF_ASSERT((ob_inode->first_open != NULL) &&
+ list_empty(&ob_inode->resume_fops));
+
+ ob_inode->first_fd = NULL;
+ stub = ob_inode->first_open;
+ ob_inode->first_open = NULL;
+ }
+ }
+ }
+ }
+ UNLOCK(&fd->inode->lock);
- ob_fd_free(ob_fd);
+ if (stub != NULL) {
+ ob_open_destroy(stub, fd);
+ }
- return 0;
+ ob_resume_pending(&list);
}
int
ob_forget(xlator_t *this, inode_t *inode)
{
- ob_inode_t *ob_inode = NULL;
+ ob_inode_t *ob_inode;
uint64_t value = 0;
- inode_ctx_del(inode, this, &value);
-
- if (value) {
+ if ((inode_ctx_del(inode, this, &value) == 0) && (value != 0)) {
ob_inode = (ob_inode_t *)(uintptr_t)value;
- ob_inode_free(ob_inode);
+ GF_FREE(ob_inode);
}
return 0;
@@ -1153,20 +880,18 @@ ob_priv_dump(xlator_t *this)
int
ob_fdctx_dump(xlator_t *this, fd_t *fd)
{
- ob_fd_t *ob_fd = NULL;
char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
0,
};
- int ret = 0;
+ uint64_t value = 0;
+ int ret = 0, error = 0;
ret = TRY_LOCK(&fd->lock);
if (ret)
return 0;
- ob_fd = __ob_fd_ctx_get(this, fd);
- if (!ob_fd) {
- UNLOCK(&fd->lock);
- return 0;
+ if ((__fd_ctx_get(fd, this, &value) == 0) && (value != 0)) {
+ error = (int32_t)value;
}
gf_proc_dump_build_key(key_prefix, "xlator.performance.open-behind",
@@ -1175,17 +900,7 @@ ob_fdctx_dump(xlator_t *this, fd_t *fd)
gf_proc_dump_write("fd", "%p", fd);
- gf_proc_dump_write("open_frame", "%p", ob_fd->open_frame);
-
- if (ob_fd->open_frame)
- gf_proc_dump_write("open_frame.root.unique", "%" PRIu64,
- ob_fd->open_frame->root->unique);
-
- gf_proc_dump_write("loc.path", "%s", ob_fd->loc.path);
-
- gf_proc_dump_write("loc.ino", "%s", uuid_utoa(ob_fd->loc.gfid));
-
- gf_proc_dump_write("flags", "%d", ob_fd->flags);
+ gf_proc_dump_write("error", "%d", error);
UNLOCK(&fd->lock);
@@ -1282,6 +997,7 @@ fini(xlator_t *this)
struct xlator_fops fops = {
.open = ob_open,
+ .create = ob_create,
.readv = ob_readv,
.writev = ob_writev,
.flush = ob_flush,
@@ -1307,7 +1023,7 @@ struct xlator_fops fops = {
};
struct xlator_cbks cbks = {
- .release = ob_release,
+ .fdclose = ob_fdclose,
.forget = ob_forget,
};
diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c
index 4f16d148262..7fe4b3c3a4b 100644
--- a/xlators/performance/quick-read/src/quick-read.c
+++ b/xlators/performance/quick-read/src/quick-read.c
@@ -421,9 +421,6 @@ qr_content_update(xlator_t *this, qr_inode_t *qr_inode, void *data,
qr_private_t *priv = NULL;
qr_inode_table_t *table = NULL;
uint32_t rollover = 0;
- struct timeval tv = {
- 0,
- };
rollover = gen >> 32;
gen = gen & 0xffffffff;
@@ -431,7 +428,6 @@ qr_content_update(xlator_t *this, qr_inode_t *qr_inode, void *data,
priv = this->private;
table = &priv->table;
- gettimeofday(&tv, NULL);
LOCK(&table->lock);
{
if ((rollover != qr_inode->gen_rollover) ||
@@ -453,8 +449,7 @@ qr_content_update(xlator_t *this, qr_inode_t *qr_inode, void *data,
qr_inode->ia_ctime_nsec = buf->ia_ctime_nsec;
qr_inode->buf = *buf;
-
- memcpy(&qr_inode->last_refresh, &tv, sizeof(struct timeval));
+ qr_inode->last_refresh = gf_time();
__qr_inode_register(this, table, qr_inode);
}
@@ -524,9 +519,7 @@ __qr_content_refresh(xlator_t *this, qr_inode_t *qr_inode, struct iatt *buf,
if (qr_size_fits(conf, buf) && qr_time_equal(conf, qr_inode, buf)) {
qr_inode->buf = *buf;
-
- gettimeofday(&qr_inode->last_refresh, NULL);
-
+ qr_inode->last_refresh = gf_time();
__qr_inode_register(this, table, qr_inode);
} else {
__qr_inode_prune(this, table, qr_inode, gen);
@@ -558,20 +551,14 @@ __qr_cache_is_fresh(xlator_t *this, qr_inode_t *qr_inode)
{
qr_conf_t *conf = NULL;
qr_private_t *priv = NULL;
- struct timeval now;
- struct timeval diff;
priv = this->private;
conf = &priv->conf;
- gettimeofday(&now, NULL);
-
- timersub(&now, &qr_inode->last_refresh, &diff);
-
- if (qr_inode->last_refresh.tv_sec < priv->last_child_down)
+ if (qr_inode->last_refresh < priv->last_child_down)
return _gf_false;
- if (diff.tv_sec >= conf->cache_timeout)
+ if (gf_time() - qr_inode->last_refresh >= conf->cache_timeout)
return _gf_false;
return _gf_true;
@@ -1034,7 +1021,7 @@ qr_inodectx_dump(xlator_t *this, inode_t *inode)
char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
0,
};
- char buf[256] = {
+ char buf[GF_TIMESTR_SIZE] = {
0,
};
@@ -1049,12 +1036,8 @@ qr_inodectx_dump(xlator_t *this, inode_t *inode)
gf_proc_dump_write("entire-file-cached", "%s",
qr_inode->data ? "yes" : "no");
- if (qr_inode->last_refresh.tv_sec) {
- gf_time_fmt(buf, sizeof buf, qr_inode->last_refresh.tv_sec,
- gf_timefmt_FT);
- snprintf(buf + strlen(buf), sizeof buf - strlen(buf),
- ".%" GF_PRI_SUSECONDS, qr_inode->last_refresh.tv_usec);
-
+ if (qr_inode->last_refresh) {
+ gf_time_fmt(buf, sizeof buf, qr_inode->last_refresh, gf_timefmt_FT);
gf_proc_dump_write("last-cache-validation-time", "%s", buf);
}
@@ -1407,7 +1390,7 @@ qr_init(xlator_t *this)
ret = 0;
- time(&priv->last_child_down);
+ priv->last_child_down = gf_time();
GF_ATOMIC_INIT(priv->generation, 0);
this->private = priv;
out:
@@ -1457,7 +1440,7 @@ qr_conf_destroy(qr_conf_t *conf)
}
void
-qr_update_child_down_time(xlator_t *this, time_t *now)
+qr_update_child_down_time(xlator_t *this, time_t now)
{
qr_private_t *priv = NULL;
@@ -1465,7 +1448,7 @@ qr_update_child_down_time(xlator_t *this, time_t *now)
LOCK(&priv->lock);
{
- priv->last_child_down = *now;
+ priv->last_child_down = now;
}
UNLOCK(&priv->lock);
}
@@ -1511,7 +1494,6 @@ qr_notify(xlator_t *this, int event, void *data, ...)
{
int ret = 0;
qr_private_t *priv = NULL;
- time_t now = 0;
qr_conf_t *conf = NULL;
priv = this->private;
@@ -1520,8 +1502,7 @@ qr_notify(xlator_t *this, int event, void *data, ...)
switch (event) {
case GF_EVENT_CHILD_DOWN:
case GF_EVENT_SOME_DESCENDENT_DOWN:
- time(&now);
- qr_update_child_down_time(this, &now);
+ qr_update_child_down_time(this, gf_time());
break;
case GF_EVENT_UPCALL:
if (conf->qr_invalidation)
diff --git a/xlators/performance/quick-read/src/quick-read.h b/xlators/performance/quick-read/src/quick-read.h
index 67850821b8e..20fcc70b3a7 100644
--- a/xlators/performance/quick-read/src/quick-read.h
+++ b/xlators/performance/quick-read/src/quick-read.h
@@ -39,7 +39,7 @@ struct qr_inode {
uint32_t ia_ctime_nsec;
uint32_t gen_rollover;
struct iatt buf;
- struct timeval last_refresh;
+ time_t last_refresh;
struct list_head lru;
uint64_t gen;
uint64_t invalidation_time;
diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c
index e1d0e9aaf00..00cfca016e6 100644
--- a/xlators/performance/write-behind/src/write-behind.c
+++ b/xlators/performance/write-behind/src/write-behind.c
@@ -2489,7 +2489,7 @@ wb_mark_readdirp_start(xlator_t *this, inode_t *directory)
wb_directory_inode = wb_inode_create(this, directory);
- if (!wb_directory_inode || !wb_directory_inode->lock.spinlock)
+ if (!wb_directory_inode)
return;
LOCK(&wb_directory_inode->lock);
@@ -2509,7 +2509,7 @@ wb_mark_readdirp_end(xlator_t *this, inode_t *directory)
wb_directory_inode = wb_inode_ctx_get(this, directory);
- if (!wb_directory_inode || !wb_directory_inode->lock.spinlock)
+ if (!wb_directory_inode)
return;
LOCK(&wb_directory_inode->lock);
diff --git a/xlators/protocol/server/src/Makefile.am b/xlators/protocol/server/src/Makefile.am
index 01edbd35d9c..5e875c8df0b 100644
--- a/xlators/protocol/server/src/Makefile.am
+++ b/xlators/protocol/server/src/Makefile.am
@@ -4,11 +4,11 @@ endif
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/protocol
-server_la_LDFLAGS = $(LIB_DL) -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+server_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
server_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
$(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
- $(top_builddir)/rpc/xdr/src/libgfxdr.la
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la $(LIB_DL)
server_la_SOURCES = server.c server-resolve.c server-helpers.c \
server-rpc-fops.c server-handshake.c authenticate.c \
diff --git a/xlators/protocol/server/src/server-common.c b/xlators/protocol/server/src/server-common.c
index 4bb84042a9e..cd79cf4d930 100644
--- a/xlators/protocol/server/src/server-common.c
+++ b/xlators/protocol/server/src/server-common.c
@@ -828,7 +828,7 @@ server4_post_lease(gfx_lease_rsp *rsp, struct gf_lease *lease)
void
server4_post_link(server_state_t *state, gfx_common_3iatt_rsp *rsp,
inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+ struct iatt *postparent)
{
inode_t *link_inode = NULL;
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
index 5b76b49cc82..721968004a0 100644
--- a/xlators/protocol/server/src/server.c
+++ b/xlators/protocol/server/src/server.c
@@ -267,6 +267,8 @@ server_priv(xlator_t *this)
gf_proc_dump_build_key(key, "server", "total-bytes-write");
gf_proc_dump_write(key, "%" PRIu64, total_write);
+ rpcsvc_statedump(conf->rpc);
+
ret = 0;
out:
if (ret)
@@ -407,6 +409,7 @@ server_call_xlator_mem_cleanup(xlator_t *this, char *victim_name)
if (!arg->victim_name) {
gf_smsg(this->name, GF_LOG_CRITICAL, ENOMEM, LG_MSG_NO_MEMORY,
"Memory allocation is failed");
+ free(arg);
return;
}
diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
index da9c653218f..f10722ec3fb 100644
--- a/xlators/storage/posix/src/posix-common.c
+++ b/xlators/storage/posix/src/posix-common.c
@@ -140,6 +140,7 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
struct timespec sleep_till = {
0,
};
+ glusterfs_ctx_t *ctx = this->ctx;
switch (event) {
case GF_EVENT_PARENT_UP: {
@@ -150,8 +151,6 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
case GF_EVENT_PARENT_DOWN: {
if (!victim->cleanup_starting)
break;
- gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
- victim->name);
if (priv->janitor) {
pthread_mutex_lock(&priv->janitor_mutex);
@@ -160,7 +159,7 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
ret = gf_tw_del_timer(this->ctx->tw->timer_wheel,
priv->janitor);
if (!ret) {
- clock_gettime(CLOCK_REALTIME, &sleep_till);
+ timespec_now_realtime(&sleep_till);
sleep_till.tv_sec += 1;
/* Wait to set janitor_task flag to _gf_false by
* janitor_task_done */
@@ -168,7 +167,7 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
(void)pthread_cond_timedwait(&priv->janitor_cond,
&priv->janitor_mutex,
&sleep_till);
- clock_gettime(CLOCK_REALTIME, &sleep_till);
+ timespec_now_realtime(&sleep_till);
sleep_till.tv_sec += 1;
}
}
@@ -177,6 +176,16 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
GF_FREE(priv->janitor);
}
priv->janitor = NULL;
+ pthread_mutex_lock(&ctx->fd_lock);
+ {
+ while (priv->rel_fdcount > 0) {
+ pthread_cond_wait(&priv->fd_cond, &ctx->fd_lock);
+ }
+ }
+ pthread_mutex_unlock(&ctx->fd_lock);
+
+ gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
+ victim->name);
default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data);
} break;
default:
@@ -1084,7 +1093,13 @@ posix_init(xlator_t *this)
pthread_cond_init(&_private->fsync_cond, NULL);
pthread_mutex_init(&_private->janitor_mutex, NULL);
pthread_cond_init(&_private->janitor_cond, NULL);
+ pthread_cond_init(&_private->fd_cond, NULL);
INIT_LIST_HEAD(&_private->fsyncs);
+ _private->rel_fdcount = 0;
+ ret = posix_spawn_ctx_janitor_thread(this);
+ if (ret)
+ goto out;
+
ret = gf_thread_create(&_private->fsyncer, NULL, posix_fsyncer, this,
"posixfsy");
if (ret) {
@@ -1197,6 +1212,8 @@ posix_fini(xlator_t *this)
{
struct posix_private *priv = this->private;
gf_boolean_t health_check = _gf_false;
+ glusterfs_ctx_t *ctx = this->ctx;
+ uint32_t count;
int ret = 0;
int i = 0;
@@ -1243,6 +1260,19 @@ posix_fini(xlator_t *this)
priv->janitor = NULL;
}
+ pthread_mutex_lock(&ctx->fd_lock);
+ {
+ count = --ctx->pxl_count;
+ if (count == 0) {
+ pthread_cond_signal(&ctx->fd_cond);
+ }
+ }
+ pthread_mutex_unlock(&ctx->fd_lock);
+
+ if (count == 0) {
+ pthread_join(ctx->janitor, NULL);
+ }
+
if (priv->fsyncer) {
(void)gf_thread_cleanup_xint(priv->fsyncer);
priv->fsyncer = 0;
@@ -1436,24 +1466,21 @@ struct volume_options posix_options[] = {
.min = 0000,
.max = 0777,
.default_value = "0000",
- .validate = GF_OPT_VALIDATE_MIN,
- .validate = GF_OPT_VALIDATE_MAX,
+ .validate = GF_OPT_VALIDATE_BOTH,
.description = "Mode bit permission that will always be set on a file."},
{.key = {"force-directory-mode"},
.type = GF_OPTION_TYPE_INT,
.min = 0000,
.max = 0777,
.default_value = "0000",
- .validate = GF_OPT_VALIDATE_MIN,
- .validate = GF_OPT_VALIDATE_MAX,
+ .validate = GF_OPT_VALIDATE_BOTH,
.description = "Mode bit permission that will be always set on directory"},
{.key = {"create-mask"},
.type = GF_OPTION_TYPE_INT,
.min = 0000,
.max = 0777,
.default_value = "0777",
- .validate = GF_OPT_VALIDATE_MIN,
- .validate = GF_OPT_VALIDATE_MAX,
+ .validate = GF_OPT_VALIDATE_BOTH,
.description = "Any bit not set here will be removed from the"
"modes set on a file when it is created"},
{.key = {"create-directory-mask"},
@@ -1461,8 +1488,7 @@ struct volume_options posix_options[] = {
.min = 0000,
.max = 0777,
.default_value = "0777",
- .validate = GF_OPT_VALIDATE_MIN,
- .validate = GF_OPT_VALIDATE_MAX,
+ .validate = GF_OPT_VALIDATE_BOTH,
.description = "Any bit not set here will be removed from the"
"modes set on a directory when it is created"},
{.key = {"max-hardlinks"},
diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c
index c3a20f4c85c..8cc3ccf8c00 100644
--- a/xlators/storage/posix/src/posix-entry-ops.c
+++ b/xlators/storage/posix/src/posix-entry-ops.c
@@ -198,6 +198,19 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
goto out;
}
+#ifdef __NetBSD__
+ /* Same for NetBSD's .attribute directory */
+ if (__is_root_gfid(loc->pargfid) && loc->name &&
+ (strcmp(loc->name, ".attribute") == 0)) {
+ gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_LOOKUP_NOT_PERMITTED,
+ "Lookup issued on .attribute,"
+ " which is not permitted");
+ op_errno = EPERM;
+ op_ret = -1;
+ goto out;
+ }
+#endif /* __NetBSD__ */
+
op_ret = dict_get_int32_sizen(xdata, GF_GFIDLESS_LOOKUP, &gfidless);
op_ret = -1;
if (gf_uuid_is_null(loc->pargfid) || (loc->name == NULL)) {
@@ -650,6 +663,19 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
goto out;
}
+#ifdef __NetBSD__
+ /* Same for NetBSD's .attribute directory */
+ if (__is_root_gfid(loc->pargfid) &&
+ (strcmp(loc->name, ".attribute") == 0)) {
+ gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_MKDIR_NOT_PERMITTED,
+ "mkdir issued on .attribute, which"
+ "is not permitted");
+ op_errno = EPERM;
+ op_ret = -1;
+ goto out;
+ }
+#endif
+
priv = this->private;
VALIDATE_OR_GOTO(priv, out);
GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno,
@@ -1416,6 +1442,19 @@ posix_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
goto out;
}
+#ifdef __NetBSD__
+ /* Same for NetBSD's .attribute directory */
+ if (__is_root_gfid(loc->pargfid) &&
+ (strcmp(loc->name, ".attribute") == 0)) {
+ gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_RMDIR_NOT_PERMITTED,
+ "rmdir issued on .attribute, which"
+ "is not permitted");
+ op_errno = EPERM;
+ op_ret = -1;
+ goto out;
+ }
+#endif
+
priv = this->private;
MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &stbuf);
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index 4c1da0ffa75..67db3324083 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -1070,7 +1070,7 @@ verify_handle:
ret = posix_handle_soft(this, path, loc, uuid_curr, &stat);
out:
- if (!(*op_errno))
+ if (ret && !(*op_errno))
*op_errno = errno;
return ret;
}
@@ -1505,7 +1505,7 @@ posix_janitor_task(void *data)
if (!priv)
goto out;
- time(&now);
+ now = gf_time();
if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) {
if (priv->disable_landfill_purge) {
gf_msg_debug(this->name, 0,
@@ -1592,16 +1592,108 @@ unlock:
return;
}
+static struct posix_fd *
+janitor_get_next_fd(glusterfs_ctx_t *ctx)
+{
+ struct posix_fd *pfd = NULL;
+
+ while (list_empty(&ctx->janitor_fds)) {
+ if (ctx->pxl_count == 0) {
+ return NULL;
+ }
+
+ pthread_cond_wait(&ctx->fd_cond, &ctx->fd_lock);
+ }
+
+ pfd = list_first_entry(&ctx->janitor_fds, struct posix_fd, list);
+ list_del_init(&pfd->list);
+
+ return pfd;
+}
+
+static void
+posix_close_pfd(xlator_t *xl, struct posix_fd *pfd)
+{
+ THIS = xl;
+
+ if (pfd->dir == NULL) {
+ gf_msg_trace(xl->name, 0, "janitor: closing file fd=%d", pfd->fd);
+ sys_close(pfd->fd);
+ } else {
+ gf_msg_debug(xl->name, 0, "janitor: closing dir fd=%p", pfd->dir);
+ sys_closedir(pfd->dir);
+ }
+
+ GF_FREE(pfd);
+}
+
+static void *
+posix_ctx_janitor_thread_proc(void *data)
+{
+ xlator_t *xl;
+ struct posix_fd *pfd;
+ glusterfs_ctx_t *ctx = NULL;
+ struct posix_private *priv_fd;
+
+ ctx = data;
+
+ pthread_mutex_lock(&ctx->fd_lock);
+
+ while ((pfd = janitor_get_next_fd(ctx)) != NULL) {
+ pthread_mutex_unlock(&ctx->fd_lock);
+
+ xl = pfd->xl;
+ posix_close_pfd(xl, pfd);
+
+ pthread_mutex_lock(&ctx->fd_lock);
+
+ priv_fd = xl->private;
+ priv_fd->rel_fdcount--;
+ if (!priv_fd->rel_fdcount)
+ pthread_cond_signal(&priv_fd->fd_cond);
+ }
+
+ pthread_mutex_unlock(&ctx->fd_lock);
+
+ return NULL;
+}
+
+int
+posix_spawn_ctx_janitor_thread(xlator_t *this)
+{
+ int ret = 0;
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = this->ctx;
+
+ pthread_mutex_lock(&ctx->fd_lock);
+ {
+ if (ctx->pxl_count++ == 0) {
+ ret = gf_thread_create(&ctx->janitor, NULL,
+ posix_ctx_janitor_thread_proc, ctx,
+ "posixctxjan");
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED,
+ "spawning janitor thread failed");
+ ctx->pxl_count--;
+ }
+ }
+ }
+ pthread_mutex_unlock(&ctx->fd_lock);
+
+ return ret;
+}
+
static int
-is_fresh_file(int64_t sec, int64_t ns)
+is_fresh_file(struct timespec *ts)
{
- struct timeval tv;
+ struct timespec now;
int64_t elapsed;
- gettimeofday(&tv, NULL);
+ timespec_now_realtime(&now);
+ elapsed = (int64_t)gf_tsdiff(ts, &now);
- elapsed = (tv.tv_sec - sec) * 1000000L;
- elapsed += tv.tv_usec - (ns / 1000L);
if (elapsed < 0) {
/* The file has been modified in the future !!!
* Is it fresh ? previous implementation considered this as a
@@ -1610,11 +1702,7 @@ is_fresh_file(int64_t sec, int64_t ns)
}
/* If the file is newer than a second, we consider it fresh. */
- if (elapsed < 1000000) {
- return 1;
- }
-
- return 0;
+ return elapsed < 1000000;
}
int
@@ -1677,7 +1765,9 @@ posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req)
if (ret != 16) {
/* TODO: This is a very hacky way of doing this, and very prone to
* errors and unexpected behavior. This should be changed. */
- if (is_fresh_file(stbuf.ia_ctime, stbuf.ia_ctime_nsec)) {
+ struct timespec ts = {.tv_sec = stbuf.ia_ctime,
+ .tv_nsec = stbuf.ia_ctime_nsec};
+ if (is_fresh_file(&ts)) {
gf_msg(this->name, GF_LOG_ERROR, ENOENT, P_MSG_FRESHFILE,
"Fresh file: %s", path);
return -ENOENT;
@@ -1691,7 +1781,7 @@ posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req)
if (ret != 16) {
/* TODO: This is a very hacky way of doing this, and very prone to
* errors and unexpected behavior. This should be changed. */
- if (is_fresh_file(stat.st_ctim.tv_sec, stat.st_ctim.tv_nsec)) {
+ if (is_fresh_file(&stat.st_ctim)) {
gf_msg(this->name, GF_LOG_ERROR, ENOENT, P_MSG_FRESHFILE,
"Fresh file: %s", path);
return -ENOENT;
@@ -1924,7 +2014,7 @@ posix_fs_health_check(xlator_t *this, char *file_path)
{
struct posix_private *priv = NULL;
int ret = -1;
- char timestamp[256] = {
+ char timestamp[GF_TIMESTR_SIZE] = {
0,
};
int fd = -1;
@@ -1939,9 +2029,7 @@ posix_fs_health_check(xlator_t *this, char *file_path)
int timeout = 0;
struct aiocb aiocb;
- GF_VALIDATE_OR_GOTO(this->name, this, out);
priv = this->private;
- GF_VALIDATE_OR_GOTO("posix-helpers", priv, out);
timeout = priv->health_check_timeout;
@@ -1952,7 +2040,7 @@ posix_fs_health_check(xlator_t *this, char *file_path)
goto out;
}
- time_sec = time(NULL);
+ time_sec = gf_time();
gf_time_fmt(timestamp, sizeof timestamp, time_sec, gf_timefmt_FT);
timelen = strlen(timestamp);
@@ -2224,7 +2312,7 @@ posix_disk_space_check(xlator_t *this)
double totsz = 0;
double freesz = 0;
- GF_VALIDATE_OR_GOTO(this->name, this, out);
+ GF_VALIDATE_OR_GOTO("posix-helpers", this, out);
priv = this->private;
GF_VALIDATE_OR_GOTO(this->name, priv, out);
@@ -2317,7 +2405,7 @@ posix_spawn_disk_space_check_thread(xlator_t *xl)
ret = gf_thread_create(&priv->disk_space_check, NULL,
posix_disk_space_check_thread_proc, xl,
- "posix_reserve");
+ "posixrsv");
if (ret) {
priv->disk_space_check_active = _gf_false;
gf_msg(xl->name, GF_LOG_ERROR, errno, P_MSG_DISK_SPACE_CHECK_FAILED,
@@ -2397,23 +2485,8 @@ posix_fsyncer_syncfs(xlator_t *this, struct list_head *head)
stub = list_entry(head->prev, call_stub_t, list);
ret = posix_fd_ctx_get(stub->args.fd, this, &pfd, NULL);
- if (ret)
- return;
-
-#ifdef GF_LINUX_HOST_OS
- /* syncfs() is not "declared" in RHEL's glibc even though
- the kernel has support.
- */
-#include <sys/syscall.h>
-#include <unistd.h>
-#ifdef SYS_syncfs
- syscall(SYS_syncfs, pfd->fd);
-#else
- sync();
-#endif
-#else
- sync();
-#endif
+ if (!ret)
+ (void)gf_syncfs(pfd->fd);
}
void *
@@ -3505,6 +3578,7 @@ posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this)
gf_boolean_t have_val = _gf_false;
data_t *arg_data = NULL;
char *xattr_name = NULL;
+ size_t xattr_len = 0;
gf_boolean_t is_stale = _gf_false;
op_ret = dict_get_str_sizen(xdata, GF_PREOP_PARENT_KEY, &xattr_name);
@@ -3513,7 +3587,8 @@ posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this)
return is_stale;
}
- arg_data = dict_get(xdata, xattr_name);
+ xattr_len = strlen(xattr_name);
+ arg_data = dict_getn(xdata, xattr_name, xattr_len);
if (!arg_data) {
op_ret = 0;
dict_del_sizen(xdata, GF_PREOP_PARENT_KEY);
@@ -3561,7 +3636,7 @@ posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this)
}
out:
- dict_del_sizen(xdata, xattr_name);
+ dict_deln(xdata, xattr_name, xattr_len);
dict_del_sizen(xdata, GF_PREOP_PARENT_KEY);
if (op_ret == -1) {
@@ -3570,3 +3645,22 @@ out:
return is_stale;
}
+
+/* Delete user xattr from the file at the file-path specified by data and from
+ * dict */
+int
+posix_delete_user_xattr(dict_t *dict, char *k, data_t *v, void *data)
+{
+ int ret;
+ char *real_path = data;
+
+ ret = sys_lremovexattr(real_path, k);
+ if (ret) {
+ gf_msg("posix-helpers", GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED, errno,
+ "removexattr failed. key %s path %s", k, real_path);
+ }
+
+ dict_del(dict, k);
+
+ return ret;
+}
diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
index 439a4362fc6..6d54d37e5aa 100644
--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
+++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
@@ -54,6 +54,7 @@
#include <glusterfs/events.h>
#include "posix-gfid-path.h"
#include <glusterfs/compat-uuid.h>
+#include <glusterfs/common-utils.h>
extern char *marker_xattrs[];
#define ALIGN_SIZE 4096
@@ -1360,6 +1361,22 @@ out:
return 0;
}
+static void
+posix_add_fd_to_cleanup(xlator_t *this, struct posix_fd *pfd)
+{
+ glusterfs_ctx_t *ctx = this->ctx;
+ struct posix_private *priv = this->private;
+
+ pfd->xl = this;
+ pthread_mutex_lock(&ctx->fd_lock);
+ {
+ list_add_tail(&pfd->list, &ctx->janitor_fds);
+ priv->rel_fdcount++;
+ pthread_cond_signal(&ctx->fd_cond);
+ }
+ pthread_mutex_unlock(&ctx->fd_lock);
+}
+
int32_t
posix_releasedir(xlator_t *this, fd_t *fd)
{
@@ -1382,11 +1399,7 @@ posix_releasedir(xlator_t *this, fd_t *fd)
"pfd->dir is NULL for fd=%p", fd);
goto out;
}
-
- gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir);
-
- sys_closedir(pfd->dir);
- GF_FREE(pfd);
+ posix_add_fd_to_cleanup(this, pfd);
out:
return 0;
@@ -2294,8 +2307,7 @@ posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in,
flags);
if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
+ op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_COPY_FILE_RANGE_FAILED,
"copy_file_range failed: fd_in: %p (gfid: %s) ,"
" fd_out %p (gfid:%s)",
@@ -2510,7 +2522,6 @@ out:
int32_t
posix_release(xlator_t *this, fd_t *fd)
{
- struct posix_private *priv = NULL;
struct posix_fd *pfd = NULL;
int ret = -1;
uint64_t tmp_pfd = 0;
@@ -2518,8 +2529,6 @@ posix_release(xlator_t *this, fd_t *fd)
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(fd, out);
- priv = this->private;
-
ret = fd_ctx_del(fd, this, &tmp_pfd);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL,
@@ -2533,13 +2542,7 @@ posix_release(xlator_t *this, fd_t *fd)
"pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd);
}
- gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir);
-
- sys_close(pfd->fd);
- GF_FREE(pfd);
-
- if (!priv)
- goto out;
+ posix_add_fd_to_cleanup(this, pfd);
out:
return 0;
@@ -2709,6 +2712,7 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
int32_t ret = 0;
ssize_t acl_size = 0;
dict_t *xattr = NULL;
+ dict_t *subvol_xattrs = NULL;
posix_xattr_filler_t filler = {
0,
};
@@ -2724,6 +2728,10 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
struct mdata_iatt mdata_iatt = {
0,
};
+ int8_t sync_backend_xattrs = _gf_false;
+ data_pair_t *custom_xattrs;
+ data_t *keyval = NULL;
+ char **xattrs_to_heal = get_xattrs_to_heal();
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID(frame->root->uid, frame->root->gid);
@@ -2906,6 +2914,66 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
goto out;
}
+ ret = dict_get_int8(xdata, "sync_backend_xattrs", &sync_backend_xattrs);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to get sync_backend_xattrs");
+ }
+
+ if (sync_backend_xattrs) {
+ /* List all custom xattrs */
+ subvol_xattrs = dict_new();
+ if (!subvol_xattrs)
+ goto out;
+
+ ret = dict_set_int32_sizen(xdata, "list-xattr", 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM,
+ "Unable to set list-xattr in dict ");
+ goto out;
+ }
+
+ subvol_xattrs = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata,
+ NULL);
+
+ /* Remove all user xattrs from the file */
+ dict_foreach_fnmatch(subvol_xattrs, "user.*", posix_delete_user_xattr,
+ real_path);
+
+ /* Remove all custom xattrs from the file */
+ for (i = 1; xattrs_to_heal[i]; i++) {
+ keyval = dict_get(subvol_xattrs, xattrs_to_heal[i]);
+ if (keyval) {
+ ret = sys_lremovexattr(real_path, xattrs_to_heal[i]);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED,
+ errno, "removexattr failed. key %s path %s",
+ xattrs_to_heal[i], loc->path);
+ goto out;
+ }
+
+ dict_del(subvol_xattrs, xattrs_to_heal[i]);
+ keyval = NULL;
+ }
+ }
+
+ /* Set custom xattrs based on info provided by DHT */
+ custom_xattrs = dict->members_list;
+
+ while (custom_xattrs != NULL) {
+ ret = sys_lsetxattr(real_path, custom_xattrs->key,
+ custom_xattrs->value->data,
+ custom_xattrs->value->len, flags);
+ if (ret) {
+ op_errno = errno;
+ gf_log(this->name, GF_LOG_ERROR, "setxattr failed - %s %d",
+ custom_xattrs->key, ret);
+ goto out;
+ }
+
+ custom_xattrs = custom_xattrs->next;
+ }
+ }
+
xattr = dict_new();
if (!xattr)
goto out;
@@ -3013,6 +3081,9 @@ out:
if (xattr)
dict_unref(xattr);
+ if (subvol_xattrs)
+ dict_unref(subvol_xattrs);
+
return 0;
}
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index 662b5c69f8a..b8db146eef2 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -125,7 +125,7 @@ struct posix_fd {
off_t dir_eof; /* offset at dir EOF */
struct list_head list; /* to add to the janitor list */
int odirect;
-
+ xlator_t *xl;
char _pad[4]; /* manual padding */
};
@@ -137,10 +137,6 @@ struct posix_private {
gf_lock_t lock;
char *hostname;
- /* Statistics, provides activity of the server */
-
- struct timeval prev_fetch_time;
- struct timeval init_time;
time_t last_landfill_check;
@@ -170,6 +166,7 @@ struct posix_private {
pthread_cond_t fsync_cond;
pthread_mutex_t janitor_mutex;
pthread_cond_t janitor_cond;
+ pthread_cond_t fd_cond;
int fsync_queue_count;
int32_t janitor_sleep_duration;
@@ -254,8 +251,7 @@ struct posix_private {
gf_boolean_t aio_configured;
gf_boolean_t aio_init_done;
gf_boolean_t aio_capable;
-
- char _pad[4]; /* manual padding */
+ uint32_t rel_fdcount;
};
typedef struct {
@@ -662,10 +658,16 @@ posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd,
int
posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno);
+int
+posix_spawn_ctx_janitor_thread(xlator_t *this);
+
void
posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata);
gf_boolean_t
posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this);
+int
+posix_delete_user_xattr(dict_t *dict, char *k, data_t *v, void *data);
+
#endif /* _POSIX_H */
diff --git a/xlators/system/posix-acl/src/posix-acl.c b/xlators/system/posix-acl/src/posix-acl.c
index 77c8df5a54f..fc227364b31 100644
--- a/xlators/system/posix-acl/src/posix-acl.c
+++ b/xlators/system/posix-acl/src/posix-acl.c
@@ -50,8 +50,8 @@ r00t()
return conf->super_uid;
}
-int
-whitelisted_xattr(const char *key)
+static int
+allowed_xattr(const char *key)
{
if (!key)
return 0;
@@ -2016,7 +2016,7 @@ int
posix_acl_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *name, dict_t *xdata)
{
- if (whitelisted_xattr(name))
+ if (allowed_xattr(name))
goto green;
if (acl_permits(frame, loc->inode, POSIX_ACL_READ))
@@ -2039,7 +2039,7 @@ int
posix_acl_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
const char *name, dict_t *xdata)
{
- if (whitelisted_xattr(name))
+ if (allowed_xattr(name))
goto green;
if (acl_permits(frame, fd->inode, POSIX_ACL_READ))
@@ -2072,7 +2072,7 @@ posix_acl_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
goto red;
}
- if (whitelisted_xattr(name)) {
+ if (allowed_xattr(name)) {
if (!frame_is_user(frame, ctx->uid)) {
op_errno = EPERM;
goto red;